diff --git a/ChangeLog b/ChangeLog index c6baf65369..61a1cf18b4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,38 @@ +2010-09-22 - V3.01 + * Thread-safety! Moved all critical globals and statics to + members of the appropriate class. Tesseract is now + thread-safe (multiple instances can be used in parallel + in multiple threads.) with the minor exception that some + control parameters are still global and affect all threads. + * Added Cube, a new recognizer for Arabic. Cube can also be + used in combination with normal Tesseract for other languages + with an improvement in accuracy at the cost of (much) lower speed. + There is no training module for Cube yet. + * OcrEngineMode in Init replaces AccuracyVSpeed to control cube. + * Greatly improved segmentation search with consequent accuracy and + speed improvements, especially for Chinese. + * Added PageIterator and ResultIterator as cleaner ways to get the + full results out of Tesseract, that are not currently provided + by any of the TessBaseAPI::Get* methods. + All other methods, such as the ETEXT_STRUCT in particular are + deprecated and will be deleted in the future. + * ApplyBoxes totally rewritten to make training easier. + It can now cope with touching/overlapping training characters, + and a new boxfile format allows word boxes instead of character + boxes, BUT to use that you have to have already boostrapped the + language with character boxes. "Cyclic dependency" on traineddata. + * Auto orientation and script detection added to page layout analysis. + * Deleted *lots* of dead code. + * Fixxht module replaced with scalable data-driven module. + * Output font characteristics accuracy improved. + * Removed the double conversion at each classification. + * Upgraded oldest structs to be classes and deprecated PBLOB. + * Removed non-deterministic baseline fit. + * Added fixed length dawgs for Chinese. + * Handling of vertical text improved. + * Handling of leader dots improved. + * Table detection greatly improved. + 2010-09-21 - V3.00 * Preparations for thread safety: * Changed TessBaseAPI methods to be non-static diff --git a/Makefile.am b/Makefile.am index cd8fc777b3..8c398422b5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,6 +1,6 @@ # TODO(luc) Add 'doc' to this list when ready ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = ccstruct ccutil classify cutil dict image textord viewer wordrec ccmain training tessdata testing java api vs2008 +SUBDIRS = ccstruct ccutil classify cube cutil dict image neural_networks/runtime textord viewer wordrec ccmain training tessdata testing java api #if USING_GETTEXT #SUBDIRS += po #AM_CPPFLAGS = -DLOCALEDIR=\"$(localedir)\" diff --git a/Makefile.in b/Makefile.in index dfeebc4cc8..4184e84f69 100644 --- a/Makefile.in +++ b/Makefile.in @@ -234,7 +234,7 @@ top_srcdir = @top_srcdir@ # TODO(luc) Add 'doc' to this list when ready ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = ccstruct ccutil classify cutil dict image textord viewer wordrec ccmain training tessdata testing java api vs2008 +SUBDIRS = ccstruct ccutil classify cube cutil dict image neural_networks/runtime textord viewer wordrec ccmain training tessdata testing java api #if USING_GETTEXT #SUBDIRS += po #AM_CPPFLAGS = -DLOCALEDIR=\"$(localedir)\" diff --git a/ReleaseNotes b/ReleaseNotes index f07c6973c3..2f62500cbe 100644 --- a/ReleaseNotes +++ b/ReleaseNotes @@ -1,3 +1,38 @@ +Tesseract release notes Oct 1 2010 - V3.01 + * Thread-safety! Moved all critical globals and statics to + members of the appropriate class. Tesseract is now + thread-safe (multiple instances can be used in parallel + in multiple threads.) with the minor exception that some + control parameters are still global and affect all threads. + * Added Cube, a new recognizer for Arabic. Cube can also be + used in combination with normal Tesseract for other languages + with an improvement in accuracy at the cost of (much) lower speed. + There is no training module for Cube yet. + * OcrEngineMode in Init replaces AccuracyVSpeed to control cube. + * Greatly improved segmentation search with consequent accuracy and + speed improvements, especially for Chinese. + * Added PageIterator and ResultIterator as cleaner ways to get the + full results out of Tesseract, that are not currently provided + by any of the TessBaseAPI::Get* methods. + All other methods, such as the ETEXT_STRUCT in particular are + deprecated and will be deleted in the future. + * ApplyBoxes totally rewritten to make training easier. + It can now cope with touching/overlapping training characters, + and a new boxfile format allows word boxes instead of character + boxes, BUT to use that you have to have already boostrapped the + language with character boxes. "Cyclic dependency" on traineddata. + * Auto orientation and script detection added to page layout analysis. + * Deleted *lots* of dead code. + * Fixxht module replaced with scalable data-driven module. + * Output font characteristics accuracy improved. + * Removed the double conversion at each classification. + * Upgraded oldest structs to be classes and deprecated PBLOB. + * Removed non-deterministic baseline fit. + * Added fixed length dawgs for Chinese. + * Handling of vertical text improved. + * Handling of leader dots improved. + * Table detection greatly improved. + Tesseract release notes Sep 30 2010 - V3.00 * Preparations for thread safety: * Changed TessBaseAPI methods to be non-static diff --git a/api/Makefile.am b/api/Makefile.am index 4c149f6102..c8f848cc2d 100644 --- a/api/Makefile.am +++ b/api/Makefile.am @@ -8,13 +8,15 @@ AM_CPPFLAGS = -DLOCALEDIR=\"$(localedir)\"\ -I$(top_srcdir)/textord include_HEADERS = \ - baseapi.h tesseractmain.h + apitypes.h baseapi.h pageiterator.h resultiterator.h tesseractmain.h lib_LTLIBRARIES = libtesseract_api.la -libtesseract_api_la_SOURCES = baseapi.cpp +libtesseract_api_la_SOURCES = baseapi.cpp pageiterator.cpp resultiterator.cpp libtesseract_api_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) libtesseract_api_la_LIBADD = \ ../ccmain/libtesseract_main.la \ + ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../textord/libtesseract_textord.la \ ../wordrec/libtesseract_wordrec.la \ ../classify/libtesseract_classify.la \ diff --git a/api/Makefile.in b/api/Makefile.in index f79f728ef8..4353ea80a2 100644 --- a/api/Makefile.in +++ b/api/Makefile.in @@ -74,6 +74,8 @@ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \ "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) libtesseract_api_la_DEPENDENCIES = ../ccmain/libtesseract_main.la \ + ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../textord/libtesseract_textord.la \ ../wordrec/libtesseract_wordrec.la \ ../classify/libtesseract_classify.la \ @@ -82,7 +84,8 @@ libtesseract_api_la_DEPENDENCIES = ../ccmain/libtesseract_main.la \ ../image/libtesseract_image.la ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ ../ccutil/libtesseract_ccutil.la -am_libtesseract_api_la_OBJECTS = baseapi.lo +am_libtesseract_api_la_OBJECTS = baseapi.lo pageiterator.lo \ + resultiterator.lo libtesseract_api_la_OBJECTS = $(am_libtesseract_api_la_OBJECTS) libtesseract_api_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ @@ -294,13 +297,15 @@ AM_CPPFLAGS = -DLOCALEDIR=\"$(localedir)\"\ -I$(top_srcdir)/textord include_HEADERS = \ - baseapi.h tesseractmain.h + apitypes.h baseapi.h pageiterator.h resultiterator.h tesseractmain.h lib_LTLIBRARIES = libtesseract_api.la -libtesseract_api_la_SOURCES = baseapi.cpp +libtesseract_api_la_SOURCES = baseapi.cpp pageiterator.cpp resultiterator.cpp libtesseract_api_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) libtesseract_api_la_LIBADD = \ ../ccmain/libtesseract_main.la \ + ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../textord/libtesseract_textord.la \ ../wordrec/libtesseract_wordrec.la \ ../classify/libtesseract_classify.la \ @@ -446,6 +451,8 @@ distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/baseapi.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pageiterator.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/resultiterator.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tesseractmain.Po@am__quote@ .cpp.o: diff --git a/api/apitypes.h b/api/apitypes.h new file mode 100644 index 0000000000..3527a9c62c --- /dev/null +++ b/api/apitypes.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////// +// File: apitypes.h +// Description: Types used in both the API and internally +// Author: Ray Smith +// Created: Wed Mar 03 09:22:53 PST 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_API_APITYPES_H__ +#define TESSERACT_API_APITYPES_H__ + +#include "publictypes.h" + +// The types used by the API and Page/ResultIterator can be found in +// ccstruct/publictypes.h. +// API interfaces and API users should be sure to include this file, rather +// than the lower-level one, and lower-level code should be sure to include +// only the lower-level file. + +#endif // TESSERACT_API_APITYPES_H__ diff --git a/api/baseapi.cpp b/api/baseapi.cpp index ed1ea6d7ac..d7e6de6435 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -25,38 +25,31 @@ #ifdef HAVE_LIBLEPT // Include leptonica library only if autoconf (or makefile etc) tell us to. #include "allheaders.h" +#else +#error "Sorry: Tesseract no longer compiles without leptonica!" #endif #include "baseapi.h" +#include "resultiterator.h" #include "thresholder.h" #include "tesseractmain.h" #include "tesseractclass.h" -#include "tessedit.h" -#include "ocrclass.h" #include "pageres.h" #include "tessvars.h" #include "control.h" -#include "applybox.h" #include "pgedit.h" -#include "varabled.h" +#include "paramsd.h" #include "output.h" -#include "mainblk.h" #include "globals.h" -#include "adaptmatch.h" #include "edgblob.h" #include "tessbox.h" -#include "tordvars.h" #include "imgs.h" #include "makerow.h" #include "tstruct.h" -#include "tessout.h" -#include "tface.h" #include "permute.h" #include "otsuthr.h" #include "osdetect.h" -#include "chopper.h" -#include "matchtab.h" namespace tesseract { @@ -74,17 +67,19 @@ const char* kInputFile = "noname.tif"; TessBaseAPI::TessBaseAPI() : tesseract_(NULL), + osd_tesseract_(NULL), // Thresholder is initialized to NULL here, but will be set before use by: // A constructor of a derived API, SetThresholder(), or // created implicitly when used in InternalSetImage. thresholder_(NULL), - threshold_done_(false), block_list_(NULL), page_res_(NULL), input_file_(NULL), output_file_(NULL), datapath_(NULL), language_(NULL), + last_oem_requested_(OEM_DEFAULT), + recognition_done_(false), rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0), image_width_(0), image_height_(0) { } @@ -110,17 +105,49 @@ void TessBaseAPI::SetOutputName(const char* name) { *output_file_ = name; } -// Set the value of an internal "variable" (of either old or new types). -// Supply the name of the variable and the value as a string, just as -// you would in a config file. -// Returns false if the name lookup failed. -// SetVariable may be used before Init, to set things that control -// initialization, but note that on End all settings are lost and -// the next Init will use the defaults unless SetVariable is used again. -bool TessBaseAPI::SetVariable(const char* variable, const char* value) { - if (tesseract_ == NULL) - tesseract_ = new Tesseract; - return set_variable(variable, value); +bool TessBaseAPI::SetVariable(const char* name, const char* value) { + if (tesseract_ == NULL) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, false, tesseract_->params()); +} + +bool TessBaseAPI::SetVariableIfInit(const char* name, const char* value) { + if (tesseract_ == NULL) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, true, tesseract_->params()); +} + +bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { + IntParam *p = ParamUtils::FindParam( + name, GlobalParams()->int_params, tesseract_->params()->int_params); + if (p == NULL) return false; + *value = (inT32)(*p); + return true; +} + +bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { + BoolParam *p = ParamUtils::FindParam( + name, GlobalParams()->bool_params, tesseract_->params()->bool_params); + if (p == NULL) return false; + *value = (BOOL8)(*p); + return true; +} + +const char *TessBaseAPI::GetStringVariable(const char *name) const { + StringParam *p = ParamUtils::FindParam( + name, GlobalParams()->string_params, tesseract_->params()->string_params); + return (p != NULL) ? p->string() : NULL; +} + +bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { + DoubleParam *p = ParamUtils::FindParam( + name, GlobalParams()->double_params, tesseract_->params()->double_params); + if (p == NULL) return false; + *value = (double)(*p); + return true; +} + +// Print Tesseract parameters to the given file. +void TessBaseAPI::PrintVariables(FILE *fp) const { + ParamUtils::PrintParams(fp, tesseract_->params()); } // The datapath must be the name of the data directory (no ending /) or @@ -130,16 +157,17 @@ bool TessBaseAPI::SetVariable(const char* variable, const char* value) { // be returned. // Returns 0 on success and -1 on initialization failure. int TessBaseAPI::Init(const char* datapath, const char* language, - char **configs, int configs_size, - bool configs_global_only) { - // If the datapath or the language have changed, then start again. + OcrEngineMode oem, char **configs, int configs_size, + bool configs_init_only) { + // If the datapath, OcrEngineMode or the language have changed - start again. // Note that the language_ field stores the last requested language that was // initialized successfully, while tesseract_->lang stores the language // actually used. They differ only if the requested language was NULL, in // which case tesseract_->lang is set to the Tesseract default ("eng"). if (tesseract_ != NULL && - (datapath_ == NULL || language_ == NULL || *datapath_ != datapath - || (*language_ != language && tesseract_->lang != language))) { + (datapath_ == NULL || language_ == NULL || + *datapath_ != datapath || last_oem_requested_ != oem || + (*language_ != language && tesseract_->lang != language))) { tesseract_->end_tesseract(); delete tesseract_; tesseract_ = NULL; @@ -151,7 +179,7 @@ int TessBaseAPI::Init(const char* datapath, const char* language, tesseract_ = new Tesseract; if (tesseract_->init_tesseract( datapath, output_file_ != NULL ? output_file_->string() : NULL, - language, configs, configs_size, configs_global_only) != 0) { + language, oem, configs, configs_size, configs_init_only) != 0) { return -1; } } @@ -164,6 +192,7 @@ int TessBaseAPI::Init(const char* datapath, const char* language, language_ = new STRING(language); else *language_ = language; + last_oem_requested_ = oem; // For same language and datapath, just reset the adaptive classifier. if (reset_classifier) tesseract_->ResetAdaptiveClassifier(); @@ -181,46 +210,24 @@ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) { return tesseract_->init_tesseract_lm(datapath, NULL, language); } -// Init only the classifer component of Tesseract. Used to initialize the -// specified language when no dawg models are available. -int TessBaseAPI::InitWithoutLangModel(const char* datapath, - const char* language) { - // If the datapath or the language have changed, then start again. - if (tesseract_ != NULL && - (datapath_ == NULL || language_ == NULL || - *datapath_ != datapath || *language_ != language)) { - tesseract_->end_tesseract(); - delete tesseract_; - tesseract_ = NULL; - } - if (datapath_ == NULL) - datapath_ = new STRING(datapath); - else - *datapath_ = datapath; - if (language_ == NULL) - language_ = new STRING(language); - else - *language_ = language; +// Init only for page layout analysis. Use only for calls to SetImage and +// AnalysePage. Calls that attempt recognition will generate an error. +void TessBaseAPI::InitForAnalysePage() { if (tesseract_ == NULL) { tesseract_ = new Tesseract; - return tesseract_->init_tesseract_classifier( - datapath, output_file_ != NULL ? output_file_->string() : NULL, - language, NULL, 0, false); + tesseract_->InitAdaptiveClassifier(false); } - // For same language and datapath, just reset the adaptive classifier. - tesseract_->ResetAdaptiveClassifier(); - return 0; } -// Read a "config" file containing a set of variable, value pairs. +// Read a "config" file containing a set of parameter name, value pairs. // Searches the standard places: tessdata/configs, tessdata/tessconfigs // and also accepts a relative or absolute path name. -void TessBaseAPI::ReadConfigFile(const char* filename, bool global_only) { - tesseract_->read_config_file(filename, global_only); +void TessBaseAPI::ReadConfigFile(const char* filename, bool init_only) { + tesseract_->read_config_file(filename, init_only); } // Set the current page segmentation mode. Defaults to PSM_AUTO. -// The mode is stored as an INT_VARIABLE so it can also be modified by +// The mode is stored as an IntParam so it can also be modified by // ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). void TessBaseAPI::SetPageSegMode(PageSegMode mode) { if (tesseract_ == NULL) @@ -236,21 +243,6 @@ PageSegMode TessBaseAPI::GetPageSegMode() const { static_cast(tesseract_->tessedit_pageseg_mode)); } -// Set the hint for trading accuracy against speed. -// Default is AVS_FASTEST, which is the old behaviour. -// Note that this is only a hint. Depending on the language and/or -// build configuration, speed and accuracy may not be tradeable. -// Also note that despite being an enum, any value in the range -// AVS_FASTEST to AVS_MOST_ACCURATE can be provided, and may or may not -// have an effect, depending on the implementation. -// The mode is stored as an INT_VARIABLE so it can also be modified by -// ReadConfigFile or SetVariable("tessedit_accuracyvspeed", mode as string). -void TessBaseAPI::SetAccuracyVSpeed(AccuracyVSpeed mode) { - if (tesseract_ == NULL) - tesseract_ = new Tesseract; - tesseract_->tessedit_accuracyvspeed.set_value(mode); -} - // Recognize a rectangle from an image and return the result as a string. // May be called many times for a single Init. // Currently has no error checking. @@ -312,10 +304,8 @@ void TessBaseAPI::SetImage(const unsigned char* imagedata, // Because of that, an implementation that sources and targets Pix may end up // with less copies than an implementation that does not. void TessBaseAPI::SetImage(const Pix* pix) { -#ifdef HAVE_LIBLEPT if (InternalSetImage()) thresholder_->SetImage(pix); -#endif } // Restrict recognition to a sub-rectangle of the image. Call after SetImage. @@ -331,280 +321,224 @@ void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { // ONLY available if you have Leptonica installed. // Get a copy of the internal thresholded image from Tesseract. Pix* TessBaseAPI::GetThresholdedImage() { -#ifdef HAVE_LIBLEPT if (tesseract_ == NULL) return NULL; if (tesseract_->pix_binary() == NULL) Threshold(tesseract_->mutable_pix_binary()); return pixClone(tesseract_->pix_binary()); -#else - return NULL; -#endif } // Get the result of page layout analysis as a leptonica-style // Boxa, Pixa pair, in reading order. // Can be called before or after Recognize. -// For now only gets text regions. Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { -#ifdef HAVE_LIBLEPT - if (block_list_ == NULL || block_list_->empty()) { - FindLines(); - } - int im_height = pixGetHeight(tesseract_->pix_binary()); - Boxa* boxa = boxaCreate(block_list_->length()); - if (pixa != NULL) { - *pixa = pixaCreate(boxaGetCount(boxa)); - } - BLOCK_IT it(block_list_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOCK* block = it.data(); - POLY_BLOCK* poly = block->poly_block(); - TBOX box; - if (poly != NULL) { - if (!poly->IsText()) - continue; // Use only text blocks. - POLY_BLOCK image_block(poly->points(), poly->isA()); - image_block.rotate(block->re_rotation()); - box = *image_block.bounding_box(); - if (pixa != NULL) { - Pix* pix = pixCreate(box.width(), box.height(), 1); - PB_LINE_IT *lines; - // Block outline is a polygon, so use a PC_LINE_IT to get the - // rasterized interior. (Runs of interior pixels on a line.) - lines = new PB_LINE_IT(&image_block); - for (int y = box.bottom(); y < box.top(); ++y) { - ICOORDELT_LIST* segments = lines->get_line(y); - if (!segments->empty()) { - ICOORDELT_IT s_it(segments); - // Each element of segments is a start x and x size of the - // run of interior pixels. - for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) { - int start = s_it.data()->x(); - int xext = s_it.data()->y(); - // Copy the run from the source image to the block image. - pixRasterop(pix, start - box.left(), - box.height() - 1 - (y - box.bottom()), - xext, 1, PIX_SRC, tesseract_->pix_binary(), - start, im_height - 1 - y); - } - } - delete segments; - } - delete lines; - pixaAddPix(*pixa, pix, L_INSERT); - } - } else { - if (!block_list_->singleton()) - continue; // A null poly block can only be used if it is the only block. - box = block->bounding_box(); - if (pixa != NULL) { - Pix* pix = pixCreate(box.width(), box.height(), 1); - // Just copy the whole block as there is only a bounding box. - pixRasterop(pix, 0, 0, box.width(), box.height(), - PIX_SRC, tesseract_->pix_binary(), - box.left(), im_height - box.top()); - pixaAddPix(*pixa, pix, L_INSERT); - } - } - Box* lbox = boxCreate(box.left(), im_height - box.top(), - box.width(), box.height()); - boxaAddBox(boxa, lbox, L_INSERT); - } - return boxa; -#else - return NULL; -#endif + return GetComponentImages(RIL_BLOCK, pixa, NULL); } -// Get the textlines as a leptonica-style -// Boxa, Pixa pair, in reading order. +// Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. // Can be called before or after Recognize. // If blockids is not NULL, the block-id of each line is also returned as an // array of one element per line. delete [] after use. Boxa* TessBaseAPI::GetTextlines(Pixa** pixa, int** blockids) { -#ifdef HAVE_LIBLEPT - if (block_list_ == NULL || block_list_->empty()) { - FindLines(); - } - // A local PAGE_RES prevents the clear if Recognize is called after. - PAGE_RES page_res(block_list_); - PAGE_RES_IT page_res_it(page_res_ != NULL ? page_res_ : &page_res); - // Count the lines to get a size for the arrays. - int line_count = 0; - for (page_res_it.restart_page(); page_res_it.word() != NULL; - page_res_it.forward()) { - if (page_res_it.row() != page_res_it.next_row()) { - ++line_count; - } - } + return GetComponentImages(RIL_TEXTLINE, pixa, blockids); +} - int im_height = pixGetHeight(tesseract_->pix_binary()); - Boxa* boxa = boxaCreate(line_count); - if (pixa != NULL) - *pixa = pixaCreate(line_count); - if (blockids != NULL) - *blockids = new int[line_count]; - int blockid = 0; - int lineindex = 0; - for (page_res_it.restart_page(); page_res_it.word() != NULL; - page_res_it.forward(), ++lineindex) { - WERD_RES *word = page_res_it.word(); - BLOCK* block = page_res_it.block()->block; - // Get the line bounding box. - PAGE_RES_IT word_it(page_res_it); // Save start of line. - TBOX line_box = word->word->bounding_box(); - while (page_res_it.next_row() == page_res_it.row()) { - page_res_it.forward(); - word = page_res_it.word(); - TBOX word_box = word->word->bounding_box(); - word_box.rotate(block->re_rotation()); - line_box += word_box; - } - Box* lbox = boxCreate(line_box.left(), im_height - line_box.top(), - line_box.width(), line_box.height()); - boxaAddBox(boxa, lbox, L_INSERT); - if (pixa != NULL) { - Pix* pix = pixCreate(line_box.width(), line_box.height(), 1); - // Copy all the words to the output pix. - while (word_it.row() == page_res_it.row()) { - word = word_it.word(); - TBOX word_box = word->word->bounding_box(); - word_box.rotate(block->re_rotation()); - pixRasterop(pix, word_box.left() - line_box.left(), - line_box.top() - word_box.top(), - word_box.width(), word_box.height(), - PIX_SRC, tesseract_->pix_binary(), - word_box.left(), im_height - word_box.top()); - word_it.forward(); - } - pixaAddPix(*pixa, pix, L_INSERT); - pixaAddBox(*pixa, lbox, L_CLONE); - } - if (blockids != NULL) { - (*blockids)[lineindex] = blockid; - if (page_res_it.block() != page_res_it.next_block()) - ++blockid; - } - } - return boxa; -#else - return NULL; -#endif +// Gets the individual connected (text) components (created +// after pages segmentation step, but before recognition) +// as a leptonica-style Boxa, Pixa pair, in reading order. +// Can be called before or after Recognize. +Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { + return GetComponentImages(RIL_SYMBOL, pixa, NULL); } // Get the words as a leptonica-style // Boxa, Pixa pair, in reading order. // Can be called before or after Recognize. Boxa* TessBaseAPI::GetWords(Pixa** pixa) { -#ifdef HAVE_LIBLEPT - if (block_list_ == NULL || block_list_->empty()) { - FindLines(); - } - // A local PAGE_RES prevents the clear if Recognize is called after. - PAGE_RES page_res(block_list_); - PAGE_RES_IT page_res_it(page_res_ != NULL ? page_res_ : &page_res); - // Count the words to get a size for the arrays. - int word_count = 0; - for (page_res_it.restart_page(); page_res_it.word () != NULL; - page_res_it.forward()) - ++word_count; + return GetComponentImages(RIL_WORD, pixa, NULL); +} - int im_height = pixGetHeight(tesseract_->pix_binary()); - Boxa* boxa = boxaCreate(word_count); - if (pixa != NULL) { - *pixa = pixaCreate(word_count); - } - for (page_res_it.restart_page(); page_res_it.word () != NULL; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - BLOCK* block = page_res_it.block()->block; - TBOX box = word->word->bounding_box(); - box.rotate(block->re_rotation()); - Box* lbox = boxCreate(box.left(), im_height - box.top(), - box.width(), box.height()); - boxaAddBox(boxa, lbox, L_INSERT); - if (pixa != NULL) { - Pix* pix = pixCreate(box.width(), box.height(), 1); - // Copy the whole word bounding box to the output pix. - pixRasterop(pix, 0, 0, box.width(), box.height(), - PIX_SRC, tesseract_->pix_binary(), - box.left(), im_height - box.top()); - pixaAddPix(*pixa, pix, L_INSERT); - pixaAddBox(*pixa, lbox, L_CLONE); +// Get the given level kind of components (block, textline, word etc.) as a +// leptonica-style Boxa, Pixa pair, in reading order. +// Can be called before or after Recognize. +// If blockids is not NULL, the block-id of each component is also returned +// as an array of one element per component. delete [] after use. +Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, + Pixa** pixa, int** blockids) { + PageIterator* page_it = GetIterator(); + if (page_it == NULL) + page_it = AnalyseLayout(); + if (page_it == NULL) + return NULL; // Failed. + + // Count the components to get a size for the arrays. + int component_count = 0; + int left, top, right, bottom; + do { + if (page_it->BoundingBox(level, &left, &top, &right, &bottom)) + ++component_count; + } while (page_it->Next(level)); + + Boxa* boxa = boxaCreate(component_count); + if (pixa != NULL) + *pixa = pixaCreate(component_count); + if (blockids != NULL) + *blockids = new int[component_count]; + + int blockid = 0; + int component_index = 0; + page_it->Begin(); + do { + if (page_it->BoundingBox(level, &left, &top, &right, &bottom)) { + Box* lbox = boxCreate(left, top, right - left, bottom - top); + boxaAddBox(boxa, lbox, L_INSERT); + if (pixa != NULL) { + Pix* pix = page_it->GetBinaryImage(level); + pixaAddPix(*pixa, pix, L_INSERT); + pixaAddBox(*pixa, lbox, L_CLONE); + } + if (blockids != NULL) { + (*blockids)[component_index] = blockid; + if (page_it->IsAtFinalElement(RIL_BLOCK, level)) + ++blockid; + } + ++component_index; } - } + } while (page_it->Next(level)); + delete page_it; return boxa; -#else - return NULL; -#endif // HAVE_LIBLEPT } // Dump the internal binary image to a PGM file. void TessBaseAPI::DumpPGM(const char* filename) { if (tesseract_ == NULL) return; - IMAGELINE line; - line.init(page_image.get_xsize()); FILE *fp = fopen(filename, "w"); - fprintf(fp, "P5 " INT32FORMAT " " INT32FORMAT " 255\n", - page_image.get_xsize(), page_image.get_ysize()); - for (int j = page_image.get_ysize()-1; j >= 0 ; --j) { - page_image.get_line(0, j, page_image.get_xsize(), &line, 0); - for (int i = 0; i < page_image.get_xsize(); ++i) { - uinT8 b = line.pixels[i] ? 255 : 0; + Pix* pix = tesseract_->pix_binary(); + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + l_uint32* data = pixGetData(pix); + fprintf(fp, "P5 %d %d 255\n", width, height); + for (int y = 0; y < height; ++y, data += pixGetWpl(pix)) { + for (int x = 0; x < width; ++x) { + uinT8 b = GET_DATA_BIT(data, x) ? 0 : 255; fwrite(&b, 1, 1, fp); } } fclose(fp); } +// Placeholder for call to Cube and test that the input data is correct. +// reskew is the direction of baselines in the skewed image in +// normalized (cos theta, sin theta) form, so (0.866, 0.5) would represent +// a 30 degree anticlockwise skew. +int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks, + Boxa* boxa_words, Pixa* pixa_words, + const FCOORD& reskew, Pix* page_pix, + PAGE_RES* page_res) { + int block_count = boxaGetCount(boxa_blocks); + ASSERT_HOST(block_count == pixaGetCount(pixa_blocks)); + // Write each block to the current directory as junk_write_display.nnn.png. + for (int i = 0; i < block_count; ++i) { + Pix* pix = pixaGetPix(pixa_blocks, i, L_CLONE); + pixDisplayWrite(pix, 1); + } + int word_count = boxaGetCount(boxa_words); + ASSERT_HOST(word_count == pixaGetCount(pixa_words)); + int pr_word = 0; + PAGE_RES_IT page_res_it(page_res); + for (page_res_it.restart_page(); page_res_it.word () != NULL; + page_res_it.forward(), ++pr_word) { + WERD_RES *word = page_res_it.word(); + WERD_CHOICE* choice = word->best_choice; + // Write the first 100 words to files names wordims/.tif. + if (pr_word < 100) { + STRING filename("wordims/"); + if (choice != NULL) { + filename += choice->unichar_string(); + } else { + char numbuf[32]; + filename += "unclassified"; + snprintf(numbuf, 32, "%03d", pr_word); + filename += numbuf; + } + filename += ".tif"; + Pix* pix = pixaGetPix(pixa_words, pr_word, L_CLONE); + pixWrite(filename.string(), pix, IFF_TIFF_G4); + } + } + ASSERT_HOST(pr_word == word_count); + return 0; +} + +// Runs page layout analysis in the mode set by SetPageSegMode. +// May optionally be called prior to Recognize to get access to just +// the page layout results. Returns an iterator to the results. +// Returns NULL on error or an empty page. +// The returned iterator must be deleted after use. +// WARNING! This class points to data held within the TessBaseAPI class, and +// therefore can only be used while the TessBaseAPI class still exists and +// has not been subjected to a call of Init, SetImage, Recognize, Clear, End +// DetectOS, or anything else that changes the internal PAGE_RES. +PageIterator* TessBaseAPI::AnalyseLayout() { + if (FindLines() == 0) { + if (block_list_->empty()) + return NULL; // The page was empty. + page_res_ = new PAGE_RES(block_list_, NULL); + // TODO(rays) Support transmission of image scaling and resolution through + // ImageThresholder, so it can be used here in place of literal 1, 300. + return new PageIterator(page_res_, tesseract_, 1, 300, + rect_left_, rect_top_, rect_width_, rect_height_); + } + return NULL; +} + // Recognize the tesseract global image and return the result as Tesseract // internal structures. -int TessBaseAPI::Recognize(struct ETEXT_STRUCT* monitor) { +int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { if (tesseract_ == NULL) return -1; - if (thresholder_ == NULL || thresholder_->IsEmpty()) { - tprintf("Please call SetImage before attempting recognition."); - return -1; - } - if (page_res_ != NULL) - ClearResults(); if (FindLines() != 0) return -1; - if (tesseract_->tessedit_resegment_from_boxes) - tesseract_->apply_boxes(*input_file_, block_list_); - tesseract_->SetBlackAndWhitelist(); + if (page_res_ != NULL) + delete page_res_; - page_res_ = new PAGE_RES(block_list_); - int result = 0; - if (interactive_mode) { - tesseract_->pgeditor_main(block_list_); + tesseract_->SetBlackAndWhitelist(); + recognition_done_ = true; + if (tesseract_->tessedit_resegment_from_line_boxes) + page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); + else if (tesseract_->tessedit_resegment_from_boxes) + page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); + else + page_res_ = new PAGE_RES(block_list_, &tesseract_->prev_word_best_choice_); + if (tesseract_->tessedit_make_boxes_from_boxes) { + tesseract_->CorrectClassifyWords(page_res_); + return 0; + } + if (tesseract_->interactive_mode) { + tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); // The page_res is invalid after an interactive session, so cleanup // in a way that lets us continue to the next page without crashing. delete page_res_; page_res_ = NULL; return -1; } else if (tesseract_->tessedit_train_from_boxes) { - apply_box_training(*output_file_, block_list_); - } else if (tesseract_->global_tessedit_ambigs_training) { - FILE *ambigs_output_file = tesseract_->init_ambigs_training(*input_file_); + tesseract_->ApplyBoxTraining(*output_file_, page_res_); + } else if (tesseract_->tessedit_ambigs_training) { + FILE *training_output_file = tesseract_->init_recog_training(*input_file_); // OCR the page segmented into words by tesseract. - tesseract_->ambigs_training_segmented( - *input_file_, page_res_, monitor, ambigs_output_file); - fclose(ambigs_output_file); + tesseract_->recog_training_segmented( + *input_file_, page_res_, monitor, training_output_file); + fclose(training_output_file); } else { // Now run the main recognition. - // Running base tesseract if the inttemp for the current language loaded. - if (tesseract_->inttemp_loaded_) { - tesseract_->recog_all_words(page_res_, monitor); - } + tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0); } - return result; + return 0; } // Tests the chopper by exhaustively running chop_one_blob. -int TessBaseAPI::RecognizeForChopTest(struct ETEXT_STRUCT* monitor) { +int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { if (tesseract_ == NULL) return -1; if (thresholder_ == NULL || thresholder_->IsEmpty()) { @@ -616,78 +550,43 @@ int TessBaseAPI::RecognizeForChopTest(struct ETEXT_STRUCT* monitor) { if (FindLines() != 0) return -1; // Additional conditions under which chopper test cannot be run - if (tesseract_->tessedit_train_from_boxes_word_level || interactive_mode) - return -1; - ASSERT_HOST(tesseract_->inttemp_loaded_); + if (tesseract_->interactive_mode) return -1; - page_res_ = new PAGE_RES(block_list_); + recognition_done_ = true; - PAGE_RES_IT page_res_it(page_res_); + page_res_ = new PAGE_RES(block_list_, &(tesseract_->prev_word_best_choice_)); - tesseract_->tess_matcher = &Tesseract::tess_default_matcher; - tesseract_->tess_tester = NULL; - tesseract_->tess_trainer = NULL; + PAGE_RES_IT page_res_it(page_res_); while (page_res_it.word() != NULL) { WERD_RES *word_res = page_res_it.word(); - WERD *word = word_res->word; - if (word->cblob_list()->empty()) { - page_res_it.forward(); - continue; - } - WERD *bln_word = make_bln_copy(word, page_res_it.row()->row, - page_res_it.block()->block, - word_res->x_height, &word_res->denorm); - ASSERT_HOST(!bln_word->blob_list()->empty()); - TWERD *tessword = make_tess_word(bln_word, NULL); - if (tessword->blobs == NULL) { - make_tess_word(bln_word, NULL); - } - TBLOB *pblob; - TBLOB *blob; - init_match_table(); - BLOB_CHOICE_LIST *match_result; - BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR(); - tesseract_->tess_denorm = &word_res->denorm; - tesseract_->tess_word = bln_word; - ASSERT_HOST(tessword->blobs != NULL); - for (blob = tessword->blobs, pblob = NULL; - blob != NULL; blob = blob->next) { - match_result = tesseract_->classify_blob(pblob, blob, blob->next, NULL, - "chop_word:", Green); - if (match_result == NULL) - tprintf("Null classifier output!\n"); - tesseract_->modify_blob_choice(match_result, 0); - ASSERT_HOST(!match_result->empty()); - *char_choices += match_result; - pblob = blob; - } - inT32 blob_number; - SEAMS seam_list = start_seam_list(tessword->blobs); - int right_chop_index = 0; - while (tesseract_->chop_one_blob(tessword, char_choices, - &blob_number, &seam_list, - &right_chop_index)) { - } - - word_res->best_choice = new WERD_CHOICE(); - word_res->raw_choice = new WERD_CHOICE(); - word_res->best_choice->make_bad(); - word_res->raw_choice->make_bad(); - tesseract_->getDict().permute_characters(*char_choices, 1000.0, - word_res->best_choice, - word_res->raw_choice); - - word_res->outword = make_ed_word(tessword, bln_word); + tesseract_->MaximallyChopWord(page_res_it.block()->block, + page_res_it.row()->row, + word_res); page_res_it.forward(); } return 0; } +// Get an iterator to the results of LayoutAnalysis and/or Recognize. +// The returned iterator must be deleted after use. +// WARNING! This class points to data held within the TessBaseAPI class, and +// therefore can only be used while the TessBaseAPI class still exists and +// has not been subjected to a call of Init, SetImage, Recognize, Clear, End +// DetectOS, or anything else that changes the internal PAGE_RES. +ResultIterator* TessBaseAPI::GetIterator() { + if (tesseract_ == NULL || page_res_ == NULL) + return NULL; + // TODO(rays) Support transmission of image scaling and resolution through + // ImageThresholder, so it can be used here in place of literal 1, 300. + return new ResultIterator(page_res_, tesseract_, 1, 300, + rect_left_, rect_top_, rect_width_, rect_height_); +} + // Make a text string from the internal data structures. char* TessBaseAPI::GetUTF8Text() { if (tesseract_ == NULL || - (page_res_ == NULL && Recognize(NULL) < 0)) + (!recognition_done_ && Recognize(NULL) < 0)) return NULL; int total_length = TextLength(NULL); PAGE_RES_IT page_res_it(page_res_); @@ -755,9 +654,9 @@ static void AddBoxTohOCR(const TBOX& box, int image_height, STRING* hocr_str) { // Make a HTML-formatted string with hOCR markup from the internal // data structures. +// page_number is 0-based but will appear in the output as 1-based. // STL removed from original patch submission and refactored by rays. -// page_id is 1-based and will appear in the output. -char* TessBaseAPI::GetHOCRText(int page_id) { +char* TessBaseAPI::GetHOCRText(int page_number) { if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0)) return NULL; @@ -768,6 +667,7 @@ char* TessBaseAPI::GetHOCRText(int page_id) { BLOCK_RES *block = NULL; // current row BLOCK *real_block = NULL; int lcnt = 1, bcnt = 1, wcnt = 1; + int page_id = page_number + 1; // hOCR uses 1-based page numbers. STRING hocr_str; @@ -782,13 +682,12 @@ char* TessBaseAPI::GetHOCRText(int page_id) { for (page_res_it.restart_page(); page_res_it.word () != NULL; page_res_it.forward()) { - if (block != page_res_it.block ()) { - + if (block != page_res_it.block()) { if (block != NULL) { hocr_str += "\n

\n\n"; } - block = page_res_it.block (); // current row + block = page_res_it.block(); // current row real_block = block->block; real_row = NULL; row = NULL; @@ -798,14 +697,13 @@ char* TessBaseAPI::GetHOCRText(int page_id) { AddBoxTohOCR(real_block->bounding_box(), image_height_, &hocr_str); hocr_str += "\n

\n"; } - if (row != page_res_it.row ()) { - + if (row != page_res_it.row()) { if (row != NULL) { hocr_str += "\n"; } prev_row = real_row; - row = page_res_it.row (); // current row + row = page_res_it.row(); // current row real_row = row->row; if (prev_row != NULL && @@ -832,18 +730,18 @@ char* TessBaseAPI::GetHOCRText(int page_id) { if (word->bold > 0) hocr_str += ""; if (word->italic > 0) - hocr_str += ""; - int i; - // escape special characters - for (i = 0; - choice->unichar_string()[i] != '\0'; - i++) { - if (choice->unichar_string()[i] == '<') { hocr_str += "<"; } - else if (choice->unichar_string()[i] == '>') { hocr_str += ">"; } - else if (choice->unichar_string()[i] == '&') { hocr_str += "&"; } - else if (choice->unichar_string()[i] == '"') { hocr_str += """; } - else if (choice->unichar_string()[i] == '\'') { hocr_str += "'"; } - else { hocr_str += choice->unichar_string()[i]; } + hocr_str += ""; + int i; + // escape special characters + for (i = 0; + choice->unichar_string()[i] != '\0'; + i++) { + if (choice->unichar_string()[i] == '<') { hocr_str += "<"; } + else if (choice->unichar_string()[i] == '>') { hocr_str += ">"; } + else if (choice->unichar_string()[i] == '&') { hocr_str += "&"; } + else if (choice->unichar_string()[i] == '"') { hocr_str += """; } + else if (choice->unichar_string()[i] == '\'') { hocr_str += "'"; } + else { hocr_str += choice->unichar_string()[i]; } } if (word->italic > 0) hocr_str += ""; @@ -854,10 +752,10 @@ char* TessBaseAPI::GetHOCRText(int page_id) { hocr_str += " "; } } - if (block != NULL) - hocr_str += "\n

\n\n"; - hocr_str += "\n"; - + if (block != NULL) + hocr_str += "\n

\n\n"; + hocr_str += "\n"; + char *ret = new char[hocr_str.length() + 1]; strcpy(ret, hocr_str.string()); return ret; @@ -872,30 +770,25 @@ static int ConvertWordToBoxText(WERD_RES *word, int page_number, char* word_str) { // Copy the output word and denormalize it back to image coords. - WERD copy_outword; - copy_outword = *(word->outword); - copy_outword.baseline_denormalise(&word->denorm); - PBLOB_IT blob_it; - blob_it.set_to_list(copy_outword.blob_list()); - int length = copy_outword.blob_list()->length(); + // Can box_word be NULL? + ASSERT_HOST(word->box_word != NULL); + int length = word->box_word->length(); int output_size = 0; if (length > 0) { for (int index = 0, offset = 0; index < length; - offset += word->best_choice->unichar_lengths()[index++], - blob_it.forward()) { - PBLOB* blob = blob_it.data(); - TBOX blob_box = blob->bounding_box(); + offset += word->best_choice->unichar_lengths()[index++]) { + TBOX blob_box = word->box_word->BlobBox(index); if (word->tess_failed || blob_box.left() < 0 || blob_box.right() > image_width || blob_box.bottom() < 0 || blob_box.top() > image_height) { // Bounding boxes can be illegal when tess fails on a word. - blob_box = word->word->bounding_box(); // Use original word as backup. - tprintf("Using substitute bounding box at (%d,%d)->(%d,%d)\n", - blob_box.left(), blob_box.bottom(), - blob_box.right(), blob_box.top()); + blob_box -= word->word->bounding_box(); // Intersect with original. + if (blob_box.null_box()) { + blob_box = word->word->bounding_box(); // Use original as backup. + } } // A single classification unit can be composed of several UTF-8 @@ -919,14 +812,24 @@ static int ConvertWordToBoxText(WERD_RES *word, return output_size; } -// Multiplier for max expected textlength assumes typically 5 numbers @ -// (5 digits and a space) plus the newline = 5*(5+1)+1. Add to this the -// orginal UTF8 characters, and one kMaxCharsPerChar. -const int kCharsPerChar = 31; -// A maximal single box could occupy 5 numbers at 20 digits (for 64 bit) and a -// space plus the newline 5*(20+1)+1 and the maximum length of a UNICHAR. +// The 5 numbers output for each box (the usual 4 and a page number.) +const int kNumbersPerBlob = 5; +// The number of bytes taken by each number. Since we use inT16 for ICOORD, +// assume only 5 digits max. +const int kBytesPerNumber = 5; +// Multiplier for max expected textlength assumes (kBytesPerNumber + space) +// * kNumbersPerBlob plus the newline. Add to this the +// original UTF8 characters, and one kMaxBytesPerLine for safety. +const int kBytesPerBlob = kNumbersPerBlob * (kBytesPerNumber + 1) + 1; +const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1; +// Max bytes in the decimal representation of inT64. +const int kBytesPer64BitNumber = 20; +// A maximal single box could occupy kNumbersPerBlob numbers at +// kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a +// space plus the newline and the maximum length of a UNICHAR. // Test against this on each iteration for safety. -const int kMaxCharsPerChar = 106 + UNICHAR_LEN; +const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + + UNICHAR_LEN; // The recognized text is returned as a char* which is coded // as a UTF8 box file and must be freed with the delete [] operator. @@ -934,11 +837,12 @@ const int kMaxCharsPerChar = 106 + UNICHAR_LEN; char* TessBaseAPI::GetBoxText(int page_number) { int bottom = image_height_ - (rect_top_ + rect_height_); if (tesseract_ == NULL || - (page_res_ == NULL && Recognize(NULL) < 0)) + (!recognition_done_ && Recognize(NULL) < 0)) return NULL; int blob_count; int utf8_length = TextLength(&blob_count); - int total_length = blob_count*kCharsPerChar + utf8_length + kMaxCharsPerChar; + int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + + kMaxBytesPerLine; PAGE_RES_IT page_res_it(page_res_); char* result = new char[total_length]; char* ptr = result; @@ -949,7 +853,7 @@ char* TessBaseAPI::GetBoxText(int page_number) { image_width_, image_height_, page_number, ptr); // Just in case... - if (ptr - result + kMaxCharsPerChar > total_length) + if (ptr - result + kMaxBytesPerLine > total_length) break; } *ptr = '\0'; @@ -972,7 +876,7 @@ const int kLatinChs[] = { // and must be freed with the delete [] operator. char* TessBaseAPI::GetUNLVText() { if (tesseract_ == NULL || - (page_res_ == NULL && Recognize(NULL) < 0)) + (!recognition_done_ && Recognize(NULL) < 0)) return NULL; bool tilde_crunch_written = false; bool last_char_was_newline = true; @@ -1012,10 +916,6 @@ char* TessBaseAPI::GetUNLVText() { } else { // NORMAL PROCESSING of non tilde crunched words. tilde_crunch_written = false; - - if (word->word->flag(W_REP_CHAR) && tessedit_consistent_reps) - ensure_rep_chars_are_consistent(word); - tesseract_->set_unlv_suspects(word); const char* wordstr = word->best_choice->unichar_string().string(); const STRING& lengths = word->best_choice->unichar_lengths(); @@ -1090,7 +990,7 @@ int TessBaseAPI::MeanTextConf() { // Returns an array of all word confidences, terminated by -1. int* TessBaseAPI::AllWordConfidences() { if (tesseract_ == NULL || - (page_res_ == NULL && Recognize(NULL) < 0)) + (!recognition_done_ && Recognize(NULL) < 0)) return NULL; int n_word = 0; PAGE_RES_IT res_it(page_res_); @@ -1120,7 +1020,6 @@ void TessBaseAPI::Clear() { if (thresholder_ != NULL) thresholder_->Clear(); ClearResults(); - page_image.destroy(); } // Close down tesseract and free up all memory. End() is equivalent to @@ -1143,8 +1042,15 @@ void TessBaseAPI::End() { if (tesseract_ != NULL) { tesseract_->end_tesseract(); delete tesseract_; + if (osd_tesseract_ == tesseract_) + osd_tesseract_ = NULL; tesseract_ = NULL; } + if (osd_tesseract_ != NULL) { + osd_tesseract_->end_tesseract(); + delete osd_tesseract_; + osd_tesseract_ = NULL; + } if (input_file_ != NULL) { delete input_file_; input_file_ = NULL; @@ -1197,13 +1103,20 @@ bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { return true; } -// Set the letter_is_okay function to point somewhere else. +// Sets Dict::letter_is_okay_ function to point to the given function. void TessBaseAPI::SetDictFunc(DictFunc f) { if (tesseract_ != NULL) { tesseract_->getDict().letter_is_okay_ = f; } } +// Sets Dict::probability_in_context_ function to point to the given function. +void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { + if (tesseract_ != NULL) { + tesseract_->getDict().probability_in_context_ = f; + } +} + // Common code for setting the image. bool TessBaseAPI::InternalSetImage() { if (tesseract_ == NULL) { @@ -1216,57 +1129,83 @@ bool TessBaseAPI::InternalSetImage() { return true; } -// Run the thresholder to make the thresholded image. If pix is not NULL, -// the source is thresholded to pix instead of the internal IMAGE. +// Run the thresholder to make the thresholded image, returned in pix, +// which must not be NULL. *pix must be initialized to NULL, or point +// to an existing pixDestroyable Pix. +// The usual argument to Threshold is Tesseract::mutable_pix_binary(). void TessBaseAPI::Threshold(Pix** pix) { -#ifdef HAVE_LIBLEPT - if (pix != NULL) - thresholder_->ThresholdToPix(pix); - else - thresholder_->ThresholdToIMAGE(&page_image); -#else - thresholder_->ThresholdToIMAGE(&page_image); -#endif + ASSERT_HOST(pix != NULL); + if (!thresholder_->IsBinary()) { + tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); + } + if (*pix != NULL) + pixDestroy(pix); + thresholder_->ThresholdToPix(pix); thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_, &image_height_); - threshold_done_ = true; } // Find lines from the image making the BLOCK_LIST. int TessBaseAPI::FindLines() { + if (thresholder_ == NULL || thresholder_->IsEmpty()) { + tprintf("Please call SetImage before attempting recognition."); + return -1; + } + if (recognition_done_) + ClearResults(); if (!block_list_->empty()) { return 0; } if (tesseract_ == NULL) { tesseract_ = new Tesseract; - tesseract_->InitAdaptiveClassifier(); + tesseract_->InitAdaptiveClassifier(false); } -#ifdef HAVE_LIBLEPT if (tesseract_->pix_binary() == NULL) Threshold(tesseract_->mutable_pix_binary()); -#endif - if (!threshold_done_) - Threshold(NULL); + if (tesseract_->ImageWidth() > MAX_INT16 || + tesseract_->ImageHeight() > MAX_INT16) { + tprintf("Image too large: (%d, %d)\n", + tesseract_->ImageWidth(), tesseract_->ImageHeight()); + return -1; + } - if (tesseract_->SegmentPage(input_file_, &page_image, block_list_) < 0) + Tesseract* osd_tess = osd_tesseract_; + OSResults osr; + if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) { + if (strcmp(language_->string(), "osd") == 0) { + osd_tess = tesseract_; + } else { + osd_tesseract_ = new Tesseract; + if (osd_tesseract_->init_tesseract( + datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY, + NULL, 0, false) == 0) { + osd_tess = osd_tesseract_; + } else { + tprintf("Warning: Auto orientation and script detection requested," + " but osd language failed to load\n"); + delete osd_tesseract_; + osd_tesseract_ = NULL; + } + } + } + + if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) return -1; - ASSERT_HOST(page_image.get_xsize() == rect_width_ || - page_image.get_xsize() == rect_width_ - 1); - ASSERT_HOST(page_image.get_ysize() == rect_height_ || - page_image.get_ysize() == rect_height_ - 1); return 0; } // Delete the pageres and clear the block list ready for a new page. void TessBaseAPI::ClearResults() { - threshold_done_ = false; - if (tesseract_ != NULL) + if (tesseract_ != NULL) { tesseract_->Clear(); + tesseract_->ResetFeaturesHaveBeenExtracted(); + } if (page_res_ != NULL) { delete page_res_; page_res_ = NULL; } + recognition_done_ = false; if (block_list_ == NULL) block_list_ = new BLOCK_LIST; else @@ -1309,12 +1248,73 @@ bool TessBaseAPI::DetectOS(OSResults* osr) { if (tesseract_ == NULL) return false; ClearResults(); - Threshold(NULL); + if (tesseract_->pix_binary() == NULL) + Threshold(tesseract_->mutable_pix_binary()); if (input_file_ == NULL) input_file_ = new STRING(kInputFile); return orientation_and_script_detection(*input_file_, osr, tesseract_); } +void TessBaseAPI::set_min_orientation_margin(double margin) { + tesseract_->min_orientation_margin.set_value(margin); +} + +// Return text orientation of each block as determined in an earlier page layout +// analysis operation. Orientation is returned as the number of ccw 90-degree +// rotations (in [0..3]) required to make the text in the block upright +// (readable). Note that this may not necessary be the block orientation +// preferred for recognition (such as the case of vertical CJK text). +// +// Also returns whether the text in the block is believed to have vertical +// writing direction (when in an upright page orientation). +// +// The returned array is of length equal to the number of text blocks, which may +// be less than the total number of blocks. The ordering is intended to be +// consistent with GetTextLines(). +void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, + bool** vertical_writing) { + delete[] *block_orientation; + *block_orientation = NULL; + delete[] *vertical_writing; + *vertical_writing = NULL; + BLOCK_IT block_it(block_list_); + + block_it.move_to_first(); + int num_blocks = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + if (!block_it.data()->poly_block()->IsText()) { + continue; + } + ++num_blocks; + } + if (!num_blocks) { + tprintf("WARNING: Found no blocks\n"); + return; + } + *block_orientation = new int[num_blocks]; + *vertical_writing = new bool[num_blocks]; + block_it.move_to_first(); + int i = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + if (!block_it.data()->poly_block()->IsText()) { + continue; + } + FCOORD re_rotation = block_it.data()->re_rotation(); + float re_theta = re_rotation.angle(); + FCOORD classify_rotation = block_it.data()->classify_rotation(); + float classify_theta = classify_rotation.angle(); + double rot_theta = - (re_theta - classify_theta) * 2.0 / PI; + if (rot_theta < 0) rot_theta += 4; + int num_rotations = static_cast(rot_theta + 0.5); + (*block_orientation)[i] = num_rotations; + // The classify_rotation is non-zero only if the text has vertical + // writing direction. + (*vertical_writing)[i] = classify_rotation.y() != 0.0f; + ++i; + } +} + // ____________________________________________________________________________ // Ocropus add-ons. @@ -1334,10 +1334,10 @@ void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { } -static ROW *make_tess_ocrrow(float baseline, - float xheight, - float descender, - float ascender) { +ROW *TessBaseAPI::MakeTessOCRRow(float baseline, + float xheight, + float descender, + float ascender) { inT32 xstarts[] = {-32000}; double quad_coeffs[] = {0, 0, baseline}; return new ROW(1, @@ -1350,72 +1350,62 @@ static ROW *make_tess_ocrrow(float baseline, 0); } -// Almost a copy of make_tess_row() from ccmain/tstruct.cpp. -static void fill_dummy_row(float baseline, float xheight, - float descender, float ascender, - TEXTROW* tessrow) { - tessrow->baseline.segments = 1; - tessrow->baseline.xstarts[0] = -32767; - tessrow->baseline.xstarts[1] = 32767; - tessrow->baseline.quads[0].a = 0; - tessrow->baseline.quads[0].b = 0; - tessrow->baseline.quads[0].c = bln_baseline_offset; - tessrow->xheight.segments = 1; - tessrow->xheight.xstarts[0] = -32767; - tessrow->xheight.xstarts[1] = 32767; - tessrow->xheight.quads[0].a = 0; - tessrow->xheight.quads[0].b = 0; - tessrow->xheight.quads[0].c = bln_baseline_offset + bln_x_height; - tessrow->lineheight = bln_x_height; - tessrow->ascrise = bln_x_height * (ascender - (xheight + baseline)) / xheight; - tessrow->descdrop = bln_x_height * (descender - baseline) / xheight; -} - - -// Return a TBLOB * from the whole page_image. -// To be freed later with free_blob(). -TBLOB *make_tesseract_blob(float baseline, float xheight, - float descender, float ascender) { - BLOCK *block = new BLOCK("a character", - TRUE, - 0, 0, - 0, 0, - page_image.get_xsize(), - page_image.get_ysize()); +// Creates a TBLOB* from the whole pix. +TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); // Create C_BLOBs from the page - extract_edges(NULL, &page_image, &page_image, - ICOORD(page_image.get_xsize(), page_image.get_ysize()), - block); + extract_edges(pix, &block); - // Create one PBLOB from all C_BLOBs - C_BLOB_LIST *list = block->blob_list(); + // Merge all C_BLOBs + C_BLOB_LIST *list = block.blob_list(); C_BLOB_IT c_blob_it(list); - PBLOB *pblob = new PBLOB; // will be (hopefully) deleted by the pblob_list - for (c_blob_it.mark_cycle_pt(); - !c_blob_it.cycled_list(); + if (c_blob_it.empty()) + return NULL; + // Move all the outlines to the first blob. + C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); + for (c_blob_it.forward(); + !c_blob_it.at_first(); c_blob_it.forward()) { C_BLOB *c_blob = c_blob_it.data(); - PBLOB c_as_p(c_blob, baseline + xheight); - merge_blobs(pblob, &c_as_p); + ol_it.add_list_after(c_blob->out_list()); } - PBLOB_LIST *pblob_list = new PBLOB_LIST; // will be deleted by the word - PBLOB_IT pblob_it(pblob_list); - pblob_it.add_after_then_move(pblob); - - // Normalize PBLOB - WERD word(pblob_list, 0, " "); - ROW *row = make_tess_ocrrow(baseline, xheight, descender, ascender); - word.baseline_normalise(row); - delete row; + // Convert the first blob to the output TBLOB. + return TBLOB::PolygonalCopy(c_blob_it.data()); +} - // Create a TBLOB from PBLOB - return make_tess_blob(pblob, /* flatten: */ TRUE); +// This method baseline normalizes a TBLOB in-place. The input row is used +// for normalization. The denorm is an optional parameter in which the +// normalization-antidote is returned. +void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, + bool numeric_mode, DENORM *denorm) { + TWERD word; + word.blobs = tblob; + word.Normalize(row, row->x_height(), numeric_mode, denorm); + word.blobs = NULL; +} + +// Return a TBLOB * from the whole pix. +// To be freed later with delete. +TBLOB *make_tesseract_blob(float baseline, float xheight, + float descender, float ascender, + bool numeric_mode, Pix* pix) { + TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); + + // Normalize TBLOB + ROW *row = + TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); + TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode, NULL); + delete row; + return tblob; } // Adapt to recognize the current image as the given character. -// The image must be preloaded and be just an image of a single character. +// The image must be preloaded into pix_binary_ and be just an image +// of a single character. void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, int length, float baseline, @@ -1423,12 +1413,9 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, float descender, float ascender) { UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); - LINE_STATS LineStats; - TEXTROW row; - fill_dummy_row(baseline, xheight, descender, ascender, &row); - GetLineStatsFromRow(&row, &LineStats); - - TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender); + TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, + tesseract_->classify_bln_numeric_mode, + tesseract_->pix_binary()); float threshold; UNICHAR_ID best_class = 0; float best_rating = -100; @@ -1436,7 +1423,9 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, // Classify to get a raw choice. BLOB_CHOICE_LIST choices; - tesseract_->AdaptiveClassifier(blob, NULL, &row, &choices, NULL); + DENORM denorm; + tesseract_->set_denorm(&denorm); + tesseract_->AdaptiveClassifier(blob, &choices, NULL); BLOB_CHOICE_IT choice_it; choice_it.set_to_list(&choices); for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); @@ -1448,13 +1437,13 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, } if (id == best_class) { - threshold = matcher_good_threshold; + threshold = tesseract_->matcher_good_threshold; } else { /* the blob was incorrectly classified - find the rating threshold needed to create a template which will correct the error with some margin. However, don't waste time trying to make templates which are too tight. */ - threshold = tesseract_->GetBestRatingFor(blob, &LineStats, id); + threshold = tesseract_->GetBestRatingFor(blob, id); threshold *= .9; const float max_threshold = .125; const float min_threshold = .02; @@ -1469,22 +1458,24 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, } if (blob->outlines) - tesseract_->AdaptToChar(blob, &LineStats, id, threshold); - free_blob(blob); + tesseract_->AdaptToChar(blob, id, threshold); + delete blob; } PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { - PAGE_RES *page_res = new PAGE_RES(block_list); - tesseract_->recog_all_words(page_res, NULL, NULL, 1); + PAGE_RES *page_res = new PAGE_RES(block_list, + &(tesseract_->prev_word_best_choice_)); + tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1); return page_res; } PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result) { if (!pass1_result) - pass1_result = new PAGE_RES(block_list); - tesseract_->recog_all_words(pass1_result, NULL, NULL, 2); + pass1_result = new PAGE_RES(block_list, + &(tesseract_->prev_word_best_choice_)); + tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2); return pass1_result; } @@ -1517,7 +1508,7 @@ static void add_space(TESS_CHAR_IT* it) { static float rating_to_cost(float rating) { - rating = 100 + 5*rating; + rating = 100 + rating; // cuddled that to save from coverage profiler // (I have never seen ratings worse than -100, // but the check won't hurt) @@ -1536,54 +1527,19 @@ static void extract_result(TESS_CHAR_IT* out, WERD_RES *word = page_res_it.word(); const char *str = word->best_choice->unichar_string().string(); const char *len = word->best_choice->unichar_lengths().string(); + TBOX real_rect = word->word->bounding_box(); if (word_count) add_space(out); - TBOX bln_rect; - PBLOB_LIST *blobs = word->outword->blob_list(); - PBLOB_IT it(blobs); int n = strlen(len); - TBOX** boxes_to_fix = new TBOX*[n]; for (int i = 0; i < n; i++) { - PBLOB *blob = it.data(); - TBOX current = blob->bounding_box(); - bln_rect = bln_rect.bounding_union(current); - TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->certainty()), + TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), str, *len); - tc->box = current; - boxes_to_fix[i] = &tc->box; - + tc->box = real_rect.intersection(word->box_word->BlobBox(i)); out->add_after_then_move(tc); - it.forward(); - str += *len; + str += *len; len++; } - - // Find the word bbox before normalization. - // Here we can't use the C_BLOB bboxes directly, - // since connected letters are not yet cut. - TBOX real_rect = word->word->bounding_box(); - - // Denormalize boxes by transforming the bbox of the whole bln word - // into the denorm bbox (`real_rect') of the whole word. - double x_stretch = static_cast(real_rect.width()) - / bln_rect.width(); - double y_stretch = static_cast(real_rect.height()) - / bln_rect.height(); - for (int j = 0; j < n; j++) { - TBOX *box = boxes_to_fix[j]; - int x0 = static_cast(real_rect.left() + - x_stretch * (box->left() - bln_rect.left()) + 0.5); - int x1 = static_cast(real_rect.left() + - x_stretch * (box->right() - bln_rect.left()) + 0.5); - int y0 = static_cast(real_rect.bottom() + - y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5); - int y1 = static_cast(real_rect.bottom() + - y_stretch * (box->top() - bln_rect.bottom()) + 0.5); - *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1)); - } - delete [] boxes_to_fix; - page_res_it.forward(); word_count++; } @@ -1637,91 +1593,77 @@ int TessBaseAPI::TesseractExtractResult(char** text, return n; } -// This method returns the features associated with the current image. -// Make sure setimage has been called before calling this method. -void TessBaseAPI::GetFeatures(INT_FEATURE_ARRAY int_features, - int* num_features) { - if (page_res_ != NULL) - ClearResults(); - if (!threshold_done_) - Threshold(NULL); - // We have only one block, which is of the size of the page. - BLOCK_LIST* blocks = new BLOCK_LIST; - BLOCK *block = new BLOCK("", // filename. - TRUE, // proportional. - 0, // kerning. - 0, // spacing. - 0, // Left. - 0, // Bottom. - page_image.get_xsize(), // Right. - page_image.get_ysize()); // Top. - ICOORD bleft, tright; - block->bounding_box (bleft, tright); - - BLOCK_IT block_it_add = blocks; - block_it_add.add_to_end(block); - - ICOORD page_tr(page_image.get_xsize(), page_image.get_ysize()); - TEXTROW tessrow; - make_tess_row(NULL, // Denormalizer. - &tessrow); // Output row. - LINE_STATS line_stats; - GetLineStatsFromRow(&tessrow, &line_stats); - - // Perform a CC analysis to detect the blobs. - BLOCK_IT block_it = blocks; - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - BLOCK* block = block_it.data(); -#ifndef GRAPHICS_DISABLED - extract_edges(NULL, // Scrollview window. - &page_image, // Image. - &page_image, // Thresholded image. - page_tr, // corner of page. - block); // block. -#else - extract_edges(&page_image, // Image. - &page_image, // Thresholded image. - page_tr, // corner of page. - block); // block. -#endif - C_BLOB_IT blob_it = block->blob_list(); - PBLOB *pblob = new PBLOB; - // Iterate over all blobs found and get their features. - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); - blob_it.forward()) { - C_BLOB* blob = blob_it.data(); - blob = blob; - PBLOB c_as_p(blob, page_image.get_ysize()); - merge_blobs(pblob, &c_as_p); - } +// This method returns the features associated with the input blob. +void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm, + INT_FEATURE_ARRAY int_features, + int* num_features, + int* FeatureOutlineIndex) { + if (tesseract_) { + tesseract_->ResetFeaturesHaveBeenExtracted(); + } + tesseract_->set_denorm(&denorm); + CLASS_NORMALIZATION_ARRAY norm_array; + inT32 len; + *num_features = tesseract_->GetIntCharNormFeatures( + blob, tesseract_->PreTrainedTemplates, + int_features, norm_array, &len, FeatureOutlineIndex); +} - PBLOB_LIST *pblob_list = new PBLOB_LIST; - PBLOB_IT pblob_it(pblob_list); - pblob_it.add_after_then_move(pblob); - WERD word(pblob_list, // Blob list. - 0, // Blanks in front. - " "); // Correct text. - ROW *row = make_tess_ocrrow(0, // baseline. - page_image.get_ysize(), // xheight. - 0, // ascent. - 0); // descent. - word.baseline_normalise(row); - delete row; - if (pblob->out_list () == NULL) { - tprintf("Blob list is empty"); +// This method returns the row to which a box of specified dimensions would +// belong. If no good match is found, it returns NULL. +ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, + int left, int top, int right, int bottom) { + TBOX box(left, bottom, right, top); + BLOCK_IT b_it(blocks); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOCK* block = b_it.data(); + if (!box.major_overlap(block->bounding_box())) + continue; + ROW_IT r_it(block->row_list()); + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { + ROW* row = r_it.data(); + if (!box.major_overlap(row->bounding_box())) + continue; + WERD_IT w_it(row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + if (box.major_overlap(word->bounding_box())) + return row; + } } - TBLOB* tblob = make_tess_blob(pblob, // Blob. - TRUE); // Flatten. - - CLASS_NORMALIZATION_ARRAY norm_array; - inT32 len; - *num_features = tesseract_->GetCharNormFeatures( - tblob, &line_stats, - tesseract_->PreTrainedTemplates, - int_features, norm_array, &len); } - delete blocks; + return NULL; +} + +// Method to run adaptive classifier on a blob. +void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm, + int num_max_matches, + int* unichar_ids, + char* configs, + float* ratings, + int* num_matches_returned) { + BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; + tesseract_->set_denorm(&denorm); + tesseract_->AdaptiveClassifier(blob, choices, NULL); + BLOB_CHOICE_IT choices_it(choices); + int& index = *num_matches_returned; + index = 0; + for (choices_it.mark_cycle_pt(); + !choices_it.cycled_list() && index < num_max_matches; + choices_it.forward()) { + BLOB_CHOICE* choice = choices_it.data(); + unichar_ids[index] = choice->unichar_id(); + configs[index] = choice->config(); + ratings[index] = choice->rating(); + ++index; + } + *num_matches_returned = index; + delete choices; +} + +// This method returns the string form of the specified unichar. +const char* TessBaseAPI::GetUnichar(int unichar_id) { + return tesseract_->unicharset.id_to_unichar(unichar_id); } // Return the pointer to the i-th dawg loaded into tesseract_ object. @@ -1740,4 +1682,9 @@ const char* TessBaseAPI::GetLastInitLanguage() const { return (tesseract_ == NULL || tesseract_->lang.string() == NULL) ? "" : tesseract_->lang.string(); } + +// Return a pointer to underlying CubeRecoContext object if present. +CubeRecoContext *TessBaseAPI::GetCubeRecoContext() const { + return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext(); +} } // namespace tesseract. diff --git a/api/baseapi.h b/api/baseapi.h index 39e307fac6..d06a0ee5f3 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -17,28 +17,38 @@ // /////////////////////////////////////////////////////////////////////// -#ifndef TESSERACT_CCMAIN_BASEAPI_H__ -#define TESSERACT_CCMAIN_BASEAPI_H__ +#ifndef TESSERACT_API_BASEAPI_H__ +#define TESSERACT_API_BASEAPI_H__ +// To avoid collision with other typenames include the ABSOLUTE MINIMUM +// complexity of includes here. Use forward declarations wherever possible +// and hide includes of complex types in baseapi.cpp. +#include "apitypes.h" #include "thresholder.h" +#include "unichar.h" class PAGE_RES; class PAGE_RES_IT; class BLOCK_LIST; +class DENORM; class IMAGE; +class PBLOB; +class ROW; class STRING; +class WERD; struct Pix; struct Box; struct Pixa; struct Boxa; -struct ETEXT_STRUCT; +class ETEXT_DESC; struct OSResults; -struct TBOX; +class TBOX; #define MAX_NUM_INT_FEATURES 512 struct INT_FEATURE_STRUCT; typedef INT_FEATURE_STRUCT *INT_FEATURE; typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; +struct TBLOB; #ifdef TESSDLL_EXPORTS #define TESSDLL_API __declspec(dllexport) @@ -51,37 +61,21 @@ typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; namespace tesseract { +class CubeRecoContext; +class Dawg; class Dict; +class PageIterator; +class ResultIterator; class Tesseract; class Trie; -class CubeRecoContext; -class TesseractCubeCombiner; -class CubeObject; -class CubeLineObject; -class Dawg; -typedef int (Dict::*DictFunc)(void* void_dawg_args, int char_index, - const void *word, bool word_end); - -enum PageSegMode { - PSM_AUTO, ///< Fully automatic page segmentation. - PSM_SINGLE_COLUMN, ///< Assume a single column of text of variable sizes. - PSM_SINGLE_BLOCK, ///< Assume a single uniform block of text. (Default.) - PSM_SINGLE_LINE, ///< Treat the image as a single text line. - PSM_SINGLE_WORD, ///< Treat the image as a single word. - PSM_SINGLE_CHAR, ///< Treat the image as a single character. - - PSM_COUNT ///< Number of enum entries. -}; +typedef int (Dict::*DictFunc)(void* void_dawg_args, + UNICHAR_ID unichar_id, bool word_end); +typedef double (Dict::*ProbabilityInContextFunc)(const char* context, + int context_bytes, + const char* character, + int character_bytes); -/** - * The values in the AccuracyVSpeed enum provide hints for how the engine - * should trade speed for accuracy. There is no guarantee of any effect. - */ -enum AccuracyVSpeed { - AVS_FASTEST = 0, ///< Fastest speed, but lowest accuracy. - AVS_MOST_ACCURATE = 100 ///< Greatest accuracy, but slowest speed. -}; /** * Base class for all tesseract APIs. @@ -106,47 +100,66 @@ class TESSDLL_API TessBaseAPI { void SetOutputName(const char* name); /** - * Set the value of an internal "variable" (of either old or new types). - * Supply the name of the variable and the value as a string, just as + * Set the value of an internal "parameter." + * Supply the name of the parameter and the value as a string, just as * you would in a config file. * Returns false if the name lookup failed. * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. * Or SetVariable("bln_numericmode", "1"); to set numeric-only mode. * SetVariable may be used before Init, but settings will revert to * defaults on End(). - */ - bool SetVariable(const char* variable, const char* value); - - /** - * Eventually instances will be thread-safe and totally independent, - * but for now, they all point to the same underlying engine, - * and are NOT RE-ENTRANT OR THREAD-SAFE. For now: - * it is safe to Init multiple TessBaseAPIs in the same language, use them - * sequentially, and End or delete them all, but once one is Ended, you can't - * do anything other than End the others. After End, it is safe to Init - * again on the same one. + * TODO(rays) Add a command-line option to dump the parameters to stdout + * and add a pointer to it in the FAQ + */ + bool SetVariable(const char* name, const char* value); + // Same as above, but the parameter is set only if it is one of the "init" + // parameters (defined with *_INIT_* macro). + bool SetVariableIfInit(const char *name, const char *value); + + // Returns true if the parameter was found among Tesseract parameters. + // Fills in value with the value of the parameter. + bool GetIntVariable(const char *name, int *value) const; + bool GetBoolVariable(const char *name, bool *value) const; + bool GetDoubleVariable(const char *name, double *value) const; + // Returns the pointer to the string that represents the value of the + // parameter if it was found among Tesseract parameters. + const char *GetStringVariable(const char *name) const; + + // Print Tesseract parameters to the given file. + void PrintVariables(FILE *fp) const; + + /** + * Instances are now mostly thread-safe and totally independent, + * but some global parameters remain. Basically it is safe to use multiple + * TessBaseAPIs in different threads in parallel, UNLESS: + * you use SetVariable on some of the Params in classify and textord. + * If you do, then the effect will be to change it for all your instances. * * Start tesseract. Returns zero on success and -1 on failure. * NOTE that the only members that may be called before Init are those * listed above here in the class definition. * - * The datapath must be the name of the data directory (no ending /) or - * some other file in which the data directory resides (for instance argv[0].) + * The datapath must be the name of the parent directory of tessdata and + * must end in / . Any name after the last / will be stripped. * The language is (usually) an ISO 639-3 string or NULL will default to eng. * It is entirely safe (and eventually will be efficient too) to call * Init multiple times on the same instance to change language, or just * to reset the classifier. - * WARNING: On changing languages, all Variables are reset back to their - * default values. If you have a rare need to set a Variable that controls + * WARNING: On changing languages, all Tesseract parameters are reset + * back to their default values. (Which may vary between languages.) + * If you have a rare need to set a Variable that controls * initialization for a second call to Init you should explicitly * call End() and then use SetVariable before Init. This is only a very - * rare use case, since there are very few uses that require any variables + * rare use case, since there are very few uses that require any parameters * to be set before Init. */ - int Init(const char* datapath, const char* language, - char **configs, int configs_size, bool configs_global_only); + int Init(const char* datapath, const char* language, OcrEngineMode mode, + char **configs, int configs_size, bool configs_init_only); + int Init(const char* datapath, const char* language, OcrEngineMode oem) { + return Init(datapath, language, oem, NULL, 0, false); + } int Init(const char* datapath, const char* language) { - return Init(datapath, language, 0, 0, false); + return Init(datapath, language, OEM_DEFAULT, NULL, 0, false); } /** @@ -157,22 +170,24 @@ class TESSDLL_API TessBaseAPI { */ int InitLangMod(const char* datapath, const char* language); - /** - * Init everything except the language model. Used to allow initialization for - * the specified language without any available dawg models. - */ - int InitWithoutLangModel(const char* datapath, const char* language); + // Init only for page layout analysis. Use only for calls to SetImage and + // AnalysePage. Calls that attempt recognition will generate an error. + void InitForAnalysePage(); /** * Read a "config" file containing a set of variable, value pairs. * Searches the standard places: tessdata/configs, tessdata/tessconfigs * and also accepts a relative or absolute path name. + * If init_only is true, only sets the parameters marked with a special + * INIT flag, which are typically of functional/algorithmic effect + * rather than debug effect. Used to separate debug settings from + * working settings. */ - void ReadConfigFile(const char* filename, bool global_only); + void ReadConfigFile(const char* filename, bool init_only); /** * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. - * The mode is stored as an INT_VARIABLE so it can also be modified by + * The mode is stored as an IntParam so it can also be modified by * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ void SetPageSegMode(PageSegMode mode); @@ -180,19 +195,6 @@ class TESSDLL_API TessBaseAPI { /** Return the current page segmentation mode. */ PageSegMode GetPageSegMode() const; - /** - * Set the hint for trading accuracy against speed. - * Default is AVS_FASTEST, which is the old behaviour. - * Note that this is only a hint. Depending on the language and/or - * build configuration, speed and accuracy may not be tradeable. - * Also note that despite being an enum, any value in the range - * AVS_FASTEST to AVS_MOST_ACCURATE can be provided, and may or may not - * have an effect, depending on the implementation. - * The mode is stored as an INT_VARIABLE so it can also be modified by - * ReadConfigFile or SetVariable("tessedit_accuracyvspeed", mode as string). - */ - void SetAccuracyVSpeed(AccuracyVSpeed mode); - /** * Recognize a rectangle from an image and return the result as a string. * May be called many times for a single Init. @@ -267,7 +269,7 @@ class TESSDLL_API TessBaseAPI { * delete it when it it is replaced or the API is destructed. */ void SetThresholder(ImageThresholder* thresholder) { - if (thresholder_ != 0) + if (thresholder_ != NULL) delete thresholder_; thresholder_ = thresholder; ClearResults(); @@ -291,8 +293,8 @@ class TESSDLL_API TessBaseAPI { * Get the textlines as a leptonica-style * Boxa, Pixa pair, in reading order. * Can be called before or after Recognize. - * If blockids is not NULL, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. + * If blockids is not NULL, the block-id of each line is also returned + * as an array of one element per line. delete [] after use. */ Boxa* GetTextlines(Pixa** pixa, int** blockids); @@ -303,6 +305,22 @@ class TESSDLL_API TessBaseAPI { */ Boxa* GetWords(Pixa** pixa); + // Gets the individual connected (text) components (created + // after pages segmentation step, but before recognition) + // as a leptonica-style Boxa, Pixa pair, in reading order. + // Can be called before or after Recognize. + // Note: the caller is responsible for calling boxaDestroy() + // on the returned Boxa array and pixaDestroy() on cc array. + Boxa* GetConnectedComponents(Pixa** cc); + + // Get the given level kind of components (block, textline, word etc.) as a + // leptonica-style Boxa, Pixa pair, in reading order. + // Can be called before or after Recognize. + // If blockids is not NULL, the block-id of each component is also returned + // as an array of one element per component. delete [] after use. + Boxa* GetComponentImages(PageIteratorLevel level, + Pixa** pixa, int** blockids); + /** * Dump the internal binary image to a PGM file. * @deprecated Use GetThresholdedImage and write the image using pixWrite @@ -310,13 +328,24 @@ class TESSDLL_API TessBaseAPI { */ void DumpPGM(const char* filename); + // Runs page layout analysis in the mode set by SetPageSegMode. + // May optionally be called prior to Recognize to get access to just + // the page layout results. Returns an iterator to the results. + // Returns NULL on error. + // The returned iterator must be deleted after use. + // WARNING! This class points to data held within the TessBaseAPI class, and + // therefore can only be used while the TessBaseAPI class still exists and + // has not been subjected to a call of Init, SetImage, Recognize, Clear, End + // DetectOS, or anything else that changes the internal PAGE_RES. + PageIterator* AnalyseLayout(); + /** * Recognize the image from SetAndThresholdImage, generating Tesseract * internal structures. Returns 0 on success. * Optional. The Get*Text functions below will call Recognize if needed. * After Recognize, the output is kept internally until the next SetImage. */ - int Recognize(ETEXT_STRUCT* monitor); + int Recognize(ETEXT_DESC* monitor); /** * Methods to retrieve information after SetAndThresholdImage(), @@ -324,7 +353,15 @@ class TESSDLL_API TessBaseAPI { */ /** Variant on Recognize used for testing chopper. */ - int RecognizeForChopTest(struct ETEXT_STRUCT* monitor); + int RecognizeForChopTest(ETEXT_DESC* monitor); + + // Get an iterator to the results of LayoutAnalysis and/or Recognize. + // The returned iterator must be deleted after use. + // WARNING! This class points to data held within the TessBaseAPI class, and + // therefore can only be used while the TessBaseAPI class still exists and + // has not been subjected to a call of Init, SetImage, Recognize, Clear, End + // DetectOS, or anything else that changes the internal PAGE_RES. + ResultIterator* GetIterator(); /** * The recognized text is returned as a char* which is coded @@ -334,16 +371,15 @@ class TESSDLL_API TessBaseAPI { /** * Make a HTML-formatted string with hOCR markup from the internal * data structures. - * STL removed from original patch submission and refactored by rays. - * page_id is 1-based and will appear in the output. + * page_number is 0-based but will appear in the output as 1-based. */ - char* GetHOCRText(int page_id); + char* GetHOCRText(int page_number); /** * The recognized text is returned as a char* which is coded in the same * format as a box file used in training. Returned string must be freed with * the delete [] operator. * Constructs coordinates in the original image - not just the rectangle. - * page_number is a 0-base page index that will appear in the box file. + * page_number is a 0-based page index that will appear in the box file. */ char* GetBoxText(int page_number); /** @@ -388,9 +424,14 @@ class TESSDLL_API TessBaseAPI { bool GetTextDirection(int* out_offset, float* out_slope); - /** Set the letter_is_okay function to point somewhere else. */ + /** Sets Dict::letter_is_okay_ function to point to the given function. */ void SetDictFunc(DictFunc f); + /** Sets Dict::probability_in_context_ function to point to the given + * function. + */ + void SetProbabilityInContextFunc(ProbabilityInContextFunc f); + /** * Estimates the Orientation And Script of the image. * @return true if the image was processed successfully. @@ -398,8 +439,26 @@ class TESSDLL_API TessBaseAPI { bool DetectOS(OSResults*); /** This method returns the features associated with the input image. */ - void GetFeatures(INT_FEATURE_ARRAY int_features, - int* num_features); + void GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm, + INT_FEATURE_ARRAY int_features, + int* num_features, int* FeatureOutlineIndex); + + // This method returns the row to which a box of specified dimensions would + // belong. If no good match is found, it returns NULL. + static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, + int right, int bottom); + + // Method to run adaptive classifier on a blob. + // It returns at max num_max_matches results. + void RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm, + int num_max_matches, + int* unichar_ids, + char* configs, + float* ratings, + int* num_matches_returned); + + // This method returns the string form of the specified unichar. + const char* GetUnichar(int unichar_id); /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ const Dawg *GetDawg(int i) const; @@ -410,6 +469,42 @@ class TESSDLL_API TessBaseAPI { /** Return the language used in the last valid initialization. */ const char* GetLastInitLanguage() const; + // Returns a ROW object created from the input row specification. + static ROW *MakeTessOCRRow(float baseline, float xheight, + float descender, float ascender); + + // Returns a TBLOB corresponding to the entire input image. + static TBLOB *MakeTBLOB(Pix *pix); + + // This method baseline normalizes a TBLOB in-place. The input row is used + // for normalization. The denorm is an optional parameter in which the + // normalization-antidote is returned. + static void NormalizeTBLOB(TBLOB *tblob, ROW *row, + bool numeric_mode, DENORM *denorm); + + Tesseract* const tesseract() const { + return tesseract_; + } + + // Return a pointer to underlying CubeRecoContext object if present. + CubeRecoContext *GetCubeRecoContext() const; + + void set_min_orientation_margin(double margin); + + // Return text orientation of each block as determined by an earlier run + // of layout analysis. + void GetBlockTextOrientations(int** block_orientation, + bool** vertical_writing); + + /** Find lines from the image making the BLOCK_LIST. */ + BLOCK_LIST* FindLinesCreateBlockList(); + + /** + * Delete a block list. + * This is to keep BLOCK_LIST pointer opaque + * and let go of including the other headers. + */ + static void DeleteBlockList(BLOCK_LIST* block_list); /* @} */ protected: @@ -441,17 +536,7 @@ class TESSDLL_API TessBaseAPI { int TextLength(int* blob_count); /** @defgroup ocropusAddOns ocropus add-ons */ - /* @{ */ - /** Find lines from the image making the BLOCK_LIST. */ - BLOCK_LIST* FindLinesCreateBlockList(); - - /** - * Delete a block list. - * This is to keep BLOCK_LIST pointer opaque - * and let go of including the other headers. - */ - static void DeleteBlockList(BLOCK_LIST* block_list); /** * Adapt to recognize the current image as the given character. @@ -465,9 +550,8 @@ class TESSDLL_API TessBaseAPI { float ascender); /** Recognize text doing one pass only, using settings for a given pass. */ - /*static*/ PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); - /*static*/ PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, - PAGE_RES* pass1_result); + PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); + PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result); /** * Extract the OCR results, costs (penalty points for uncertainty), @@ -482,67 +566,25 @@ class TESSDLL_API TessBaseAPI { int** y1, PAGE_RES* page_res); - /** - * Call the Cube OCR engine. Takes the Region, line and word segmentation - * information from Tesseract as inputs. Makes changes or populates the - * output PAGE_RES object which contains the recogntion results. - * The behavior of this function depends on the - * current language and the value of the tessedit_accuracyvspeed: - * For English (and other Latin based scripts): - * If the accuracyvspeed flag is set to any value other than AVS_FASTEST, - * Cube uses the word information passed by Tesseract. - * Cube will run on a subset of the words segmented and recognized by - * Tesseract. The value of the accuracyvspeed and the Tesseract - * confidence of a word determines whether Cube runs on it or not and - * whether Cube's results override Tesseract's - * For Arabic & Hindi: - * Cube uses the Region information passed by Tesseract. It then performs - * its own line segmentation. This will change once Tesseract's line - * segmentation works for Arabic. Cube then segments each line into - * phrases. Each phrase is then recognized in phrase mode which allows - * spaces in the results. - * Note that at this point, the line segmentation algorithm might have - * some problems with ill spaced Arabic document. - */ - int Cube(); - /** Run Cube on the lines extracted by Tesseract. */ - int RunCubeOnLines(); - /** - * Run Cube on a subset of the words already present in the page_res_ object - * The subset, and whether Cube overrides the results is determined by - * the SpeedVsAccuracy flag - */ - int CubePostProcessWords(); - /** Create a Cube line object for each line */ - CubeLineObject **CreateLineObjects(Pixa* pixa_lines); - /** - * Create a TBox array corresponding to the phrases in the array of - * line objects - */ - TBOX *CreatePhraseBoxes(Boxa* boxa_lines, CubeLineObject **line_objs, - int *phrase_cnt); - /** Recognize the phrases saving the results to the page_res_ object */ - bool RecognizePhrases(int line_cnt, int phrase_cnt, - CubeLineObject **line_objs, TBOX *phrase_boxes); - /** Recognize a single phrase saving the results to the page_res_ object */ - bool RecognizePhrase(CubeObject *phrase, PAGE_RES_IT *result); - /** Create the necessary Cube Objects */ - bool CreateCubeObjects(); - /* @} */ + const PAGE_RES* GetPageRes() const { + return page_res_; + }; protected: - Tesseract* tesseract_; ///< The underlying data object. - ImageThresholder* thresholder_; ///< Image thresholding module. - bool threshold_done_; ///< Image has been passed to page_image. - BLOCK_LIST* block_list_; ///< The page layout. - PAGE_RES* page_res_; ///< The page-level data. - STRING* input_file_; ///< Name used by training code. - STRING* output_file_; ///< Name used by debug code. - STRING* datapath_; ///< Current location of tessdata. - STRING* language_; ///< Last initialized language. - - /** - * @defgroup ThresholderParams + Tesseract* tesseract_; ///< The underlying data object. + Tesseract* osd_tesseract_; ///< For orientation & script detection. + ImageThresholder* thresholder_; ///< Image thresholding module. + BLOCK_LIST* block_list_; ///< The page layout. + PAGE_RES* page_res_; ///< The page-level data. + STRING* input_file_; ///< Name used by training code. + STRING* output_file_; ///< Name used by debug code. + STRING* datapath_; ///< Current location of tessdata. + STRING* language_; ///< Last initialized language. + OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. + bool recognition_done_; ///< page_res_ contains recognition data. + + /** + * @defgroup ThresholderParams * Parameters saved from the Thresholder. Needed to rebuild coordinates. */ /* @{ */ @@ -555,6 +597,6 @@ class TESSDLL_API TessBaseAPI { /* @} */ }; -} // namespace tesseract. +} // namespace tesseract. -#endif // TESSERACT_CCMAIN_BASEAPI_H__ +#endif // TESSERACT_API_BASEAPI_H__ diff --git a/api/pageiterator.cpp b/api/pageiterator.cpp new file mode 100644 index 0000000000..030c091f21 --- /dev/null +++ b/api/pageiterator.cpp @@ -0,0 +1,388 @@ +/////////////////////////////////////////////////////////////////////// +// File: pageiterator.cpp +// Description: Iterator for tesseract page structure that avoids using +// tesseract internal data structures. +// Author: Ray Smith +// Created: Fri Feb 26 14:32:09 PST 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "pageiterator.h" +#include "allheaders.h" +#include "helpers.h" +#include "pageres.h" +#include "tesseractclass.h" + +namespace tesseract { + +PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract, + int scale, int scaled_yres, + int rect_left, int rect_top, + int rect_width, int rect_height) + : page_res_(page_res), tesseract_(tesseract), + word_(NULL), word_length_(0), blob_index_(0), cblob_it_(NULL), + scale_(scale), scaled_yres_(scaled_yres), + rect_left_(rect_left), rect_top_(rect_top), + rect_width_(rect_width), rect_height_(rect_height) { + it_ = new PAGE_RES_IT(page_res); + Begin(); +} + +PageIterator::~PageIterator() { + delete it_; + delete cblob_it_; +} + +// PageIterators may be copied! This makes it possible to iterate over +// all the objects at a lower level, while maintaining an iterator to +// objects at a higher level. +PageIterator::PageIterator(const PageIterator& src) + : page_res_(src.page_res_), tesseract_(src.tesseract_), + word_(NULL), word_length_(src.word_length_), + blob_index_(src.blob_index_), cblob_it_(NULL), + scale_(src.scale_), scaled_yres_(src.scaled_yres_), + rect_left_(src.rect_left_), rect_top_(src.rect_top_), + rect_width_(src.rect_width_), rect_height_(src.rect_height_) { + it_ = new PAGE_RES_IT(*src.it_); + BeginWord(src.blob_index_); +} + +const PageIterator& PageIterator::operator=(const PageIterator& src) { + page_res_ = src.page_res_; + tesseract_ = src.tesseract_; + scale_ = src.scale_; + scaled_yres_ = src.scaled_yres_; + rect_left_ = src.rect_left_; + rect_top_ = src.rect_top_; + rect_width_ = src.rect_width_; + rect_height_ = src.rect_height_; + if (it_ != NULL) delete it_; + it_ = new PAGE_RES_IT(*src.it_); + BeginWord(src.blob_index_); + return *this; +} + +// ============= Moving around within the page ============. + +// Resets the iterator to point to the start of the page. +void PageIterator::Begin() { + it_->restart_page_with_empties(); + BeginWord(0); +} + +// Moves to the start of the next object at the given level in the +// page hierarchy, and returns false if the end of the page was reached. +// NOTE that RIL_SYMBOL will skip non-text blocks, but all other +// PageIteratorLevel level values will visit each non-text block once. +// Think of non text blocks as containing a single para, with a single line, +// with a single imaginary word. +// Calls to Next with different levels may be freely intermixed. +// This function iterates words in right-to-left scripts correctly, if +// the appropriate language has been loaded into Tesseract. +bool PageIterator::Next(PageIteratorLevel level) { + if (it_->block() == NULL) return false; // Already at the end! + if (it_->word() == NULL) + level = RIL_BLOCK; + + switch (level) { + case RIL_BLOCK: + case RIL_PARA: + it_->forward_block(); + break; + case RIL_TEXTLINE: + for (it_->forward_with_empties(); it_->row() == it_->prev_row(); + it_->forward_with_empties()); + break; + case RIL_WORD: + it_->forward_with_empties(); + break; + case RIL_SYMBOL: + if (cblob_it_ != NULL) + cblob_it_->forward(); + ++blob_index_; + if (blob_index_ >= word_length_) + it_->forward(); + else + return true; + break; + } + BeginWord(0); + return it_->block() != NULL; +} + +// Returns true if the iterator is at the start of an object at the given +// level. Possible uses include determining if a call to Next(RIL_WORD) +// moved to the start of a RIL_PARA. +bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const { + if (it_->block() == NULL) return false; // Already at the end! + if (it_->word() == NULL) return true; // In an image block. + switch (level) { + case RIL_BLOCK: + case RIL_PARA: + return it_->block() != it_->prev_block(); + case RIL_TEXTLINE: + return it_->row() != it_->prev_row(); + case RIL_WORD: + return blob_index_ == 0; + case RIL_SYMBOL: + return true; + } + return false; +} + +// Returns whether the iterator is positioned at the last element in a +// given level. (e.g. the last word in a line, the last line in a block) +bool PageIterator::IsAtFinalElement(PageIteratorLevel level, + PageIteratorLevel element) const { + if (it_->word() == NULL) return true; // Already at the end! + // The result is true if we step forward by element and find we are + // at the the end of the page or at beginning of *all* levels in: + // [level, element). + // When there is more than one level difference between element and level, + // we could for instance move forward one symbol and still be at the first + // word on a line, so we also have to be at the first symbol in a word. + PageIterator next(*this); + next.Next(element); + if (next.it_->word() == NULL) return true; // Reached the end of the page. + while (element > level) { + element = static_cast(element - 1); + if (!next.IsAtBeginningOf(element)) + return false; + } + return true; +} + +// ============= Accessing data ==============. +// Coordinate system: +// Integer coordinates are at the cracks between the pixels. +// The top-left corner of the top-left pixel in the image is at (0,0). +// The bottom-right corner of the bottom-right pixel in the image is at +// (width, height). +// Every bounding box goes from the top-left of the top-left contained +// pixel to the bottom-right of the bottom-right contained pixel, so +// the bounding box of the single top-left pixel in the image is: +// (0,0)->(1,1). +// If an image rectangle has been set in the API, then returned coordinates +// relate to the original (full) image, rather than the rectangle. + +// Returns the bounding rectangle of the current object at the given level. +// See comment on coordinate system above. +// Returns false if there is no such object at the current position. +bool PageIterator::BoundingBox(PageIteratorLevel level, + int* left, int* top, + int* right, int* bottom) const { + if (it_->block() == NULL) return false; // Already at the end! + if (it_->word() == NULL && level != RIL_BLOCK) return false; + if (level == RIL_SYMBOL && blob_index_ >= word_length_) + return false; // Zero length word, or already at the end of it. + TBOX box; + switch (level) { + case RIL_BLOCK: + case RIL_PARA: + box = it_->block()->block->bounding_box(); + break; + case RIL_TEXTLINE: + box = it_->row()->row->bounding_box(); + break; + case RIL_WORD: + box = it_->word()->word->bounding_box(); + break; + case RIL_SYMBOL: + if (cblob_it_ == NULL) + box = it_->word()->box_word->BlobBox(blob_index_); + else + box = cblob_it_->data()->bounding_box(); + // Intersect with the word box. + const TBOX& word_box = it_->word()->word->bounding_box(); + if (box.overlap(word_box)) + box -= word_box; + else + box = word_box; + } + if (level != RIL_SYMBOL || cblob_it_ != NULL) + box.rotate(it_->block()->block->re_rotation()); + // Now we have a box in tesseract coordinates relative to the image rectangle, + // we have to convert the coords to global page coords in a top-down system. + *left = ClipToRange(box.left() / scale_ + rect_left_, + rect_left_, rect_left_ + rect_width_); + *top = ClipToRange((rect_height_ - box.top()) / scale_ + rect_top_, + rect_top_, rect_top_ + rect_height_); + *right = ClipToRange((box.right() + scale_ - 1) / scale_ + rect_left_, + *left, rect_left_ + rect_width_); + *bottom = ClipToRange((rect_height_ - box.bottom() + scale_ - 1) / scale_ + + rect_top_, + *top, rect_top_ + rect_height_); + return true; +} + +// Returns the type of the current block. See apitypes.h for PolyBlockType. +PolyBlockType PageIterator::BlockType() const { + if (it_->block() == NULL || it_->block()->block == NULL) + return PT_UNKNOWN; // Already at the end! + if (it_->block()->block->poly_block() == NULL) + return PT_FLOWING_TEXT; // No layout analysis used - assume text. + return it_->block()->block->poly_block()->isA(); +} + +// Returns a binary image of the current object at the given level. +// The position and size match the return from BoundingBox. +// Use pixDestroy to delete the image after use. +// The following methods are used to generate the images: +// RIL_BLOCK: mask the page image with the block polygon. +// RIL_TEXTLINE: Clip the rectangle of the line box from the page image. +// TODO(rays) fix this to generate and use a line polygon. +// RIL_WORD: Clip the rectangle of the word box from the page image. +// RIL_SYMBOL: Render the symbol outline to an image for cblobs (prior +// to recognition) or the bounding box otherwise. +// A reconstruction of the original image (using xor to check for double +// representation) should be reasonably accurate, +// apart from removed noise, at the block level. Below the block level, the +// reconstruction will be missing images and line separators. +// At the symbol level, kerned characters will be invade the bounding box +// if rendered after recognition, making an xor reconstruction inaccurate, but +// an or construction better. Before recognition, symbol-level reconstruction +// should be good, even with xor, since the images come from the connected +// components. +Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const { + int left, top, right, bottom; + if (!BoundingBox(level, &left, &top, &right, &bottom)) + return NULL; + Pix* pix = NULL; + switch (level) { + case RIL_BLOCK: + case RIL_PARA: + pix = it_->block()->block->render_mask(); + // AND the mask and the image. + pixRasterop(pix, 0, 0, pixGetWidth(pix), pixGetHeight(pix), + PIX_SRC & PIX_DST, tesseract_->pix_binary(), + left, top); + break; + case RIL_TEXTLINE: + case RIL_WORD: + case RIL_SYMBOL: + if (level == RIL_SYMBOL && cblob_it_ != NULL) + return cblob_it_->data()->render(); + // Just clip from the bounding box. + Box* box = boxCreate(left, top, right - left, bottom - top); + pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL); + boxDestroy(&box); + break; + } + return pix; +} + +// Returns an image of the current object at the given level in greyscale +// if available in the input. To guarantee a binary image use BinaryImage. +// NOTE that in order to give the best possible image, the bounds are +// expanded slightly over the binary connected component, by the supplied +// padding, so the top-left position of the returned image is returned +// in (left,top). These will most likely not match the coordinates +// returned by BoundingBox. +// Use pixDestroy to delete the image after use. +Pix* PageIterator::GetImage(PageIteratorLevel level, int padding, + int* left, int* top) const { + int right, bottom; + if (!BoundingBox(level, left, top, &right, &bottom)) + return NULL; + Pix* pix = tesseract_->pix_grey(); + if (pix == NULL) + return GetBinaryImage(level); + + // Expand the box. + *left = MAX(*left - padding, 0); + *top = MAX(*top - padding, 0); + right = MIN(right + padding, rect_width_); + bottom = MIN(bottom + padding, rect_height_); + Box* box = boxCreate(*left, *top, right - *left, bottom - *top); + Pix* grey_pix = pixClipRectangle(pix, box, NULL); + boxDestroy(&box); + if (level == RIL_BLOCK || level == RIL_PARA) { + Pix* mask = it_->block()->block->render_mask(); + Pix* expanded_mask = pixCreate(right - *left, bottom - *top, 1); + pixRasterop(expanded_mask, padding, padding, + pixGetWidth(mask), pixGetHeight(mask), + PIX_SRC, mask, 0, 0); + pixDestroy(&mask); + pixDilateBrick(expanded_mask, expanded_mask, 2*padding + 1, 2*padding + 1); + pixInvert(expanded_mask, expanded_mask); + pixSetMasked(grey_pix, expanded_mask, 255); + pixDestroy(&expanded_mask); + } + return grey_pix; +} + + +// Returns the baseline of the current object at the given level. +// The baseline is the line that passes through (x1, y1) and (x2, y2). +// WARNING: with vertical text, baselines may be vertical! +bool PageIterator::Baseline(PageIteratorLevel level, + int* x1, int* y1, int* x2, int* y2) const { + if (it_->word() == NULL) return false; // Already at the end! + ROW* row = it_->row()->row; + WERD* word = it_->word()->word; + TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) + ? word->bounding_box() + : row->bounding_box(); + int left = box.left(); + ICOORD startpt(left, static_cast(row->base_line(left) + 0.5)); + int right = box.right(); + ICOORD endpt(right, static_cast(row->base_line(right) + 0.5)); + // Rotate to image coordinates and convert to global image coords. + startpt.rotate(it_->block()->block->re_rotation()); + endpt.rotate(it_->block()->block->re_rotation()); + *x1 = startpt.x() / scale_ + rect_left_; + *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_; + *x2 = endpt.x() / scale_ + rect_left_; + *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_; + return true; +} + +// Sets up the internal data for iterating the blobs of a new word, then +// moves the iterator to the given offset. +void PageIterator::BeginWord(int offset) { + WERD_RES* word_res = it_->word(); + if (word_res == NULL) { + // This is a non-text block, so there is no word. + word_length_ = 0; + blob_index_ = 0; + word_ = NULL; + return; + } + if (word_res->best_choice != NULL) { + // Recognition has been done, so we are using the box_word, which + // is already baseline denormalized. + word_length_ = word_res->best_choice->length(); + ASSERT_HOST(word_res->box_word != NULL); + ASSERT_HOST(word_res->box_word->length() == word_length_); + word_ = NULL; + // We will be iterating the box_word. + if (cblob_it_ != NULL) { + delete cblob_it_; + cblob_it_ = NULL; + } + } else { + // No recognition yet, so a "symbol" is a cblob. + word_ = word_res->word; + ASSERT_HOST(word_->cblob_list() != NULL); + word_length_ = word_->cblob_list()->length(); + if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT; + cblob_it_->set_to_list(word_->cblob_list()); + } + for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) { + if (cblob_it_ != NULL) + cblob_it_->forward(); + } +} + +} // namespace tesseract. diff --git a/api/pageiterator.h b/api/pageiterator.h new file mode 100644 index 0000000000..6d6d3498c4 --- /dev/null +++ b/api/pageiterator.h @@ -0,0 +1,184 @@ +/////////////////////////////////////////////////////////////////////// +// File: pageiterator.h +// Description: Iterator for tesseract page structure that avoids using +// tesseract internal data structures. +// Author: Ray Smith +// Created: Fri Feb 26 11:01:06 PST 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_API_PAGEITERATOR_H__ +#define TESSERACT_API_PAGEITERATOR_H__ + +#include "apitypes.h" + +class C_BLOB_IT; +class PBLOB_IT; +class PAGE_RES; +class PAGE_RES_IT; +class WERD; +struct Pix; + +namespace tesseract { + +class Tesseract; + +// Class to iterate over tesseract page structure, providing access to all +// levels of the page hierarchy, without including any tesseract headers or +// having to handle any tesseract structures. +// WARNING! This class points to data held within the TessBaseAPI class, and +// therefore can only be used while the TessBaseAPI class still exists and +// has not been subjected to a call of Init, SetImage, Recognize, Clear, End +// DetectOS, or anything else that changes the internal PAGE_RES. +// See apitypes.h for the definition of PageIteratorLevel. +// See also ResultIterator, derived from PageIterator, which adds in the +// ability to access OCR output with text-specific methods. + +class PageIterator { + public: + // page_res and tesseract come directly from the BaseAPI. + // The rectangle parameters are copied indirectly from the Thresholder, + // via the BaseAPI. They represent the coordinates of some rectangle in an + // original image (in top-left-origin coordinates) and therefore the top-left + // needs to be added to any output boxes in order to specify coordinates + // in the original image. See TessBaseAPI::SetRectangle. + // The scale and scaled_yres are in case the Thresholder scaled the image + // rectangle prior to thresholding. Any coordinates in tesseract's image + // must be divided by scale before adding (rect_left, rect_top). + // The scaled_yres indicates the effective resolution of the binary image + // that tesseract has been given by the Thresholder. + // After the constructor, Begin has already been called. + PageIterator(PAGE_RES* page_res, Tesseract* tesseract, + int scale, int scaled_yres, + int rect_left, int rect_top, + int rect_width, int rect_height); + virtual ~PageIterator(); + + // Page/ResultIterators may be copied! This makes it possible to iterate over + // all the objects at a lower level, while maintaining an iterator to + // objects at a higher level. These constructors DO NOT CALL Begin, so + // iterations will continue from the location of src. + PageIterator(const PageIterator& src); + const PageIterator& operator=(const PageIterator& src); + + // ============= Moving around within the page ============. + + // Moves the iterator to point to the start of the page to begin an iteration. + void Begin(); + + // Moves to the start of the next object at the given level in the + // page hierarchy, and returns false if the end of the page was reached. + // NOTE that RIL_SYMBOL will skip non-text blocks, but all other + // PageIteratorLevel level values will visit each non-text block once. + // Think of non text blocks as containing a single para, with a single line, + // with a single imaginary word. + // Calls to Next with different levels may be freely intermixed. + // This function iterates words in right-to-left scripts correctly, if + // the appropriate language has been loaded into Tesseract. + bool Next(PageIteratorLevel level); + + // Returns true if the iterator is at the start of an object at the given + // level. Possible uses include determining if a call to Next(RIL_WORD) + // moved to the start of a RIL_PARA. + bool IsAtBeginningOf(PageIteratorLevel level) const; + + // Returns whether the iterator is positioned at the last element in a + // given level. (e.g. the last word in a line, the last line in a block) + bool IsAtFinalElement(PageIteratorLevel level, + PageIteratorLevel element) const; + + // ============= Accessing data ==============. + // Coordinate system: + // Integer coordinates are at the cracks between the pixels. + // The top-left corner of the top-left pixel in the image is at (0,0). + // The bottom-right corner of the bottom-right pixel in the image is at + // (width, height). + // Every bounding box goes from the top-left of the top-left contained + // pixel to the bottom-right of the bottom-right contained pixel, so + // the bounding box of the single top-left pixel in the image is: + // (0,0)->(1,1). + // If an image rectangle has been set in the API, then returned coordinates + // relate to the original (full) image, rather than the rectangle. + + // Returns the bounding rectangle of the current object at the given level. + // See comment on coordinate system above. + // Returns false if there is no such object at the current position. + // The returned bounding box is guaranteed to match the size and position + // of the image returned by GetBinaryImage, but may clip foreground pixels + // from a grey image. The padding argument to GetImage can be used to expand + // the image to include more foreground pixels. See GetImage below. + bool BoundingBox(PageIteratorLevel level, + int* left, int* top, int* right, int* bottom) const; + + // Returns the type of the current block. See apitypes.h for PolyBlockType. + PolyBlockType BlockType() const; + + // Returns a binary image of the current object at the given level. + // The position and size match the return from BoundingBox. + // Use pixDestroy to delete the image after use. + Pix* GetBinaryImage(PageIteratorLevel level) const; + + // Returns an image of the current object at the given level in greyscale + // if available in the input. To guarantee a binary image use BinaryImage. + // NOTE that in order to give the best possible image, the bounds are + // expanded slightly over the binary connected component, by the supplied + // padding, so the top-left position of the returned image is returned + // in (left,top). These will most likely not match the coordinates + // returned by BoundingBox. + // Use pixDestroy to delete the image after use. + Pix* GetImage(PageIteratorLevel level, int padding, + int* left, int* top) const; + + // Returns the baseline of the current object at the given level. + // The baseline is the line that passes through (x1, y1) and (x2, y2). + // WARNING: with vertical text, baselines may be vertical! + // Returns false if there is no baseline at the current position. + bool Baseline(PageIteratorLevel level, + int* x1, int* y1, int* x2, int* y2) const; + + protected: + // Sets up the internal data for iterating the blobs of a new word, then + // moves the iterator to the given offset. + void BeginWord(int offset); + + // Pointer to the page_res owned by the API. + PAGE_RES* page_res_; + // Pointer to the Tesseract object owned by the API. + Tesseract* tesseract_; + // The iterator to the page_res_. Owned by this ResultIterator. + // A pointer just to avoid dragging in Tesseract includes. + PAGE_RES_IT* it_; + // The current input WERD being iterated. If there is an output from OCR, + // then word_ is NULL. Owned by the API. + WERD* word_; + // The length of the current word_. + int word_length_; + // The current blob index within the word. + int blob_index_; + // Iterator to the blobs within the word. If NULL, then we are iterating + // OCR results in the box_word. + // Owned by this ResultIterator. + C_BLOB_IT* cblob_it_; + // Parameters saved from the Thresholder. Needed to rebuild coordinates. + int scale_; + int scaled_yres_; + int rect_left_; + int rect_top_; + int rect_width_; + int rect_height_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_API_PAGEITERATOR_H__ diff --git a/api/resultiterator.cpp b/api/resultiterator.cpp new file mode 100644 index 0000000000..6c9c1b6ed1 --- /dev/null +++ b/api/resultiterator.cpp @@ -0,0 +1,249 @@ +/////////////////////////////////////////////////////////////////////// +// File: resultiterator.cpp +// Description: Iterator for tesseract results that avoids using tesseract +// internal data structures +// Author: Ray Smith +// Created: Fri Feb 26 14:32:09 PST 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "resultiterator.h" +#include "allheaders.h" +#include "pageres.h" +#include "tesseractclass.h" + +namespace tesseract { + +ResultIterator::ResultIterator(PAGE_RES* page_res, Tesseract* tesseract, + int scale, int scaled_yres, + int rect_left, int rect_top, + int rect_width, int rect_height) + : PageIterator(page_res, tesseract, scale, scaled_yres, + rect_left, rect_top, rect_width, rect_height) { +} + +ResultIterator::~ResultIterator() { +} + +// Returns the null terminated UTF-8 encoded text string for the current +// object at the given level. Use delete [] to free after use. +char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const { + if (it_->word() == NULL) return NULL; // Already at the end! + STRING text; + PAGE_RES_IT res_it(*it_); + WERD_CHOICE* best_choice = res_it.word()->best_choice; + ASSERT_HOST(best_choice != NULL); + switch (level) { + case RIL_BLOCK: + case RIL_PARA: + do { + best_choice = res_it.word()->best_choice; + ASSERT_HOST(best_choice != NULL); + text += best_choice->unichar_string(); + text += res_it.word()->word->flag(W_EOL) ? "\n" : " "; + res_it.forward(); + } while (res_it.block() == res_it.prev_block()); + break; + case RIL_TEXTLINE: + do { + best_choice = res_it.word()->best_choice; + ASSERT_HOST(best_choice != NULL); + text += best_choice->unichar_string(); + text += res_it.word()->word->flag(W_EOL) ? "\n" : " "; + res_it.forward(); + } while (res_it.row() == res_it.prev_row()); + break; + case RIL_WORD: + text = best_choice->unichar_string(); + break; + case RIL_SYMBOL: + text = tesseract_->unicharset.id_to_unichar( + best_choice->unichar_id(blob_index_)); + } + int length = text.length() + 1; + char* result = new char[length]; + strncpy(result, text.string(), length); + return result; +} + +// Returns the mean confidence of the current object at the given level. +// The number should be interpreted as a percent probability. (0.0f-100.0f) +float ResultIterator::Confidence(PageIteratorLevel level) const { + if (it_->word() == NULL) return 0.0f; // Already at the end! + float mean_certainty = 0.0f; + int certainty_count = 0; + PAGE_RES_IT res_it(*it_); + WERD_CHOICE* best_choice = res_it.word()->best_choice; + ASSERT_HOST(best_choice != NULL); + switch (level) { + case RIL_BLOCK: + case RIL_PARA: + do { + best_choice = res_it.word()->best_choice; + ASSERT_HOST(best_choice != NULL); + mean_certainty += best_choice->certainty(); + ++certainty_count; + res_it.forward(); + } while (res_it.block() == res_it.prev_block()); + break; + case RIL_TEXTLINE: + do { + best_choice = res_it.word()->best_choice; + ASSERT_HOST(best_choice != NULL); + mean_certainty += best_choice->certainty(); + ++certainty_count; + res_it.forward(); + } while (res_it.row() == res_it.prev_row()); + break; + case RIL_WORD: + mean_certainty += best_choice->certainty(); + ++certainty_count; + break; + case RIL_SYMBOL: + BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices(); + if (choices != NULL) { + BLOB_CHOICE_LIST_C_IT blob_choices_it(choices); + for (int blob = 0; blob < blob_index_; ++blob) + blob_choices_it.forward(); + BLOB_CHOICE_IT choice_it(blob_choices_it.data()); + for (choice_it.mark_cycle_pt(); + !choice_it.cycled_list(); + choice_it.forward()) { + if (choice_it.data()->unichar_id() == + best_choice->unichar_id(blob_index_)) + break; + } + mean_certainty += choice_it.data()->certainty(); + } else { + mean_certainty += best_choice->certainty(); + } + ++certainty_count; + } + if (certainty_count > 0) { + mean_certainty /= certainty_count; + float confidence = 100 + 5 * mean_certainty; + if (confidence < 0.0f) confidence = 0.0f; + if (confidence > 100.0f) confidence = 100.0f; + return confidence; + } + return 0.0f; +} + +// Returns the font attributes of the current word. If iterating at a higher +// level object than words, eg textlines, then this will return the +// attributes of the first word in that textline. +// The actual return value is a string representing a font name. It points +// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as +// the iterator itself, ie rendered invalid by various members of +// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. +// Pointsize is returned in printers points (1/72 inch.) +const char* ResultIterator::WordFontAttributes(bool* is_bold, + bool* is_italic, + bool* is_underlined, + bool* is_monospace, + bool* is_serif, + int* pointsize, + int* font_id) const { + if (it_->word() == NULL) return NULL; // Already at the end! + *font_id = it_->word()->font1; + if (*font_id < 0) return NULL; // No font available. + const UnicityTable &font_table = tesseract_->get_fontinfo_table(); + FontInfo font_info = font_table.get(*font_id); + *is_bold = font_info.is_bold(); + *is_italic = font_info.is_italic(); + *is_underlined = false; // TODO(rays) fix this! + *is_monospace = font_info.is_fixed_pitch(); + *is_serif = font_info.is_serif(); + // The font size is calculated from a multiple of the x-height + // that came from the block. + float row_height = it_->row()->row->x_height() * + it_->block()->block->cell_over_xheight(); + // Convert from pixels to printers points. + *pointsize = scaled_yres_ > 0 + ? static_cast(row_height * kPointsPerInch / scaled_yres_ + 0.5) + : 0; + + return font_info.name; +} + +// Returns true if the current word was found in a dictionary. +bool ResultIterator::WordIsFromDictionary() const { + if (it_->word() == NULL) return false; // Already at the end! + int permuter = it_->word()->best_choice->permuter(); + return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || + permuter == USER_DAWG_PERM; +} + +// Returns true if the current word is numeric. +bool ResultIterator::WordIsNumeric() const { + if (it_->word() == NULL) return false; // Already at the end! + int permuter = it_->word()->best_choice->permuter(); + return permuter == NUMBER_PERM; +} + +ChoiceIterator::ChoiceIterator(const ResultIterator& result_it) { + ASSERT_HOST(result_it.it_->word() != NULL); + tesseract_ = result_it.tesseract_; + PAGE_RES_IT res_it(*result_it.it_); + WERD_CHOICE* best_choice = res_it.word()->best_choice; + BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices(); + if (choices != NULL) { + BLOB_CHOICE_LIST_C_IT blob_choices_it(choices); + for (int blob = 0; blob < result_it.blob_index_; ++blob) + blob_choices_it.forward(); + choice_it_ = new BLOB_CHOICE_IT(blob_choices_it.data()); + choice_it_->mark_cycle_pt(); + } else { + choice_it_ = NULL; + } +} + +ChoiceIterator::~ChoiceIterator() { + delete choice_it_; +} + +// Moves to the next choice for the symbol and returns false if there +// are none left. +bool ChoiceIterator::Next() { + if (choice_it_ == NULL) + return false; + choice_it_->forward(); + return !choice_it_->cycled_list(); +} + +// Returns the null terminated UTF-8 encoded text string for the current +// choice. Use delete [] to free after use. +const char* ChoiceIterator::GetUTF8Text() const { + if (choice_it_ == NULL) + return NULL; + UNICHAR_ID id = choice_it_->data()->unichar_id(); + if (id < 0 || id >= tesseract_->unicharset.size() || + id == INVALID_UNICHAR_ID) + return NULL; + return tesseract_->unicharset.id_to_unichar(id); +} + +// Returns the confidence of the current choice. +// The number should be interpreted as a percent probability. (0.0f-100.0f) +float ChoiceIterator::Confidence() const { + if (choice_it_ == NULL) + return 0.0f; + float confidence = 100 + 5 * choice_it_->data()->certainty(); + if (confidence < 0.0f) confidence = 0.0f; + if (confidence > 100.0f) confidence = 100.0f; + return confidence; +} + + +} // namespace tesseract. diff --git a/api/resultiterator.h b/api/resultiterator.h new file mode 100644 index 0000000000..ba8957be37 --- /dev/null +++ b/api/resultiterator.h @@ -0,0 +1,144 @@ +/////////////////////////////////////////////////////////////////////// +// File: resultiterator.h +// Description: Iterator for tesseract results that avoids using tesseract +// internal data structures. +// Author: Ray Smith +// Created: Fri Feb 26 11:01:06 PST 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_API_RESULTITERATOR_H__ +#define TESSERACT_API_RESULTITERATOR_H__ + +#include "pageiterator.h" + +class BLOB_CHOICE_IT; + +namespace tesseract { + +class Tesseract; + +// Class to iterate over tesseract results, providing access to all levels +// of the page hierarchy, without including any tesseract headers or having +// to handle any tesseract structures. +// WARNING! This class points to data held within the TessBaseAPI class, and +// therefore can only be used while the TessBaseAPI class still exists and +// has not been subjected to a call of Init, SetImage, Recognize, Clear, End +// DetectOS, or anything else that changes the internal PAGE_RES. +// See apitypes.h for the definition of PageIteratorLevel. +// See also base class PageIterator, which contains the bulk of the interface. +// ResultIterator adds text-specific methods for access to OCR output. + +class ResultIterator : public PageIterator { + friend class ChoiceIterator; + public: + // page_res and tesseract come directly from the BaseAPI. + // The rectangle parameters are copied indirectly from the Thresholder, + // via the BaseAPI. They represent the coordinates of some rectangle in an + // original image (in top-left-origin coordinates) and therefore the top-left + // needs to be added to any output boxes in order to specify coordinates + // in the original image. See TessBaseAPI::SetRectangle. + // The scale and scaled_yres are in case the Thresholder scaled the image + // rectangle prior to thresholding. Any coordinates in tesseract's image + // must be divided by scale before adding (rect_left, rect_top). + // The scaled_yres indicates the effective resolution of the binary image + // that tesseract has been given by the Thresholder. + // After the constructor, Begin has already been called. + ResultIterator(PAGE_RES* page_res, Tesseract* tesseract, + int scale, int scaled_yres, + int rect_left, int rect_top, + int rect_width, int rect_height); + virtual ~ResultIterator(); + + // ResultIterators may be copied! This makes it possible to iterate over + // all the objects at a lower level, while maintaining an iterator to + // objects at a higher level. These constructors DO NOT CALL Begin, so + // iterations will continue from the location of src. + // TODO: For now the copy constructor and operator= only need the base class + // versions, but if new data members are added, don't forget to add them! + + // ============= Moving around within the page ============. + + // See PageIterator. + + // ============= Accessing data ==============. + + // Returns the null terminated UTF-8 encoded text string for the current + // object at the given level. Use delete [] to free after use. + char* GetUTF8Text(PageIteratorLevel level) const; + + // Returns the mean confidence of the current object at the given level. + // The number should be interpreted as a percent probability. (0.0f-100.0f) + float Confidence(PageIteratorLevel level) const; + + // ============= Functions that refer to words only ============. + + // Returns the font attributes of the current word. If iterating at a higher + // level object than words, eg textlines, then this will return the + // attributes of the first word in that textline. + // The actual return value is a string representing a font name. It points + // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as + // the iterator itself, ie rendered invalid by various members of + // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. + // Pointsize is returned in printers points (1/72 inch.) + const char* WordFontAttributes(bool* is_bold, + bool* is_italic, + bool* is_underlined, + bool* is_monospace, + bool* is_serif, + int* pointsize, + int* font_id) const; + + // Returns true if the current word was found in a dictionary. + bool WordIsFromDictionary() const; + + // Returns true if the current word is numeric. + bool WordIsNumeric() const; +}; + +// Class to iterate over the classifier choices for a single RIL_SYMBOL. +class ChoiceIterator { + public: + // Construction is from a ResultIterator that points to the symbol of + // interest. The ChoiceIterator allows a one-shot iteration over the + // choices for this symbol and after that is is useless. + explicit ChoiceIterator(const ResultIterator& result_it); + ~ChoiceIterator(); + + // Moves to the next choice for the symbol and returns false if there + // are none left. + bool Next(); + + // ============= Accessing data ==============. + + // Returns the null terminated UTF-8 encoded text string for the current + // choice. + // NOTE: Unlike ResultIterator::GetUTF8Text, the return points to an + // internal structure and should NOT be delete[]ed to free after use. + const char* GetUTF8Text() const; + + // Returns the confidence of the current choice. + // The number should be interpreted as a percent probability. (0.0f-100.0f) + float Confidence() const; + + private: + // Pointer to the Tesseract object owned by the API. + Tesseract* tesseract_; + // Iterator over the blob choices. + BLOB_CHOICE_IT* choice_it_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_API_RESULT_ITERATOR_H__ diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 4fa2b11c55..88aae4884f 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -1,21 +1,21 @@ /********************************************************************** - * File: tessedit.cpp (Formerly tessedit.c) - * Description: Main program for merge of tess and editor. - * Author: Ray Smith - * Created: Tue Jan 07 15:21:46 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ +* File: tessedit.cpp (Formerly tessedit.c) +* Description: Main program for merge of tess and editor. +* Author: Ray Smith +* Created: Tue Jan 07 15:21:46 GMT 1992 +* +* (C) Copyright 1992, Hewlett-Packard Ltd. +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** http://www.apache.org/licenses/LICENSE-2.0 +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +* +**********************************************************************/ #include "mfcpch.h" //#define USE_VLD //Uncomment for Visual Leak Detector. @@ -23,7 +23,6 @@ #include #endif #include -#include "applybox.h" #include "control.h" #include "tessvars.h" #include "tessedit.h" @@ -31,18 +30,16 @@ #include "thresholder.h" #include "pageres.h" #include "imgs.h" -#include "varabled.h" +#include "params.h" +#include "paramsd.h" #include "tprintf.h" #include "tesseractmain.h" #include "stderr.h" #include "notdll.h" -#include "mainblk.h" #include "output.h" #include "globals.h" -#include "helpers.h" #include "blread.h" #include "tfacep.h" -#include "callnet.h" // Include automatically generated configuration file if running autoconf #ifdef HAVE_CONFIG_H @@ -55,33 +52,15 @@ #else #define _(x) (x) #endif -#ifdef HAVE_LIBTIFF -#include "tiffio.h" +#ifndef HAVE_LIBLEPT +#error "Sorry: Tesseract no longer compiles or runs without Leptonica!"; #endif -#ifdef HAVE_LIBLEPT #include "allheaders.h" -#else -class Pix; -#endif -#ifdef _TIFFIO_ -void read_tiff_image(TIFF* tif, IMAGE* image); -#endif #define VARDIR "configs/" /*variables files */ //config under api #define API_CONFIG "configs/api_config" -#define EXTERN - -BOOL_VAR(tessedit_create_boxfile, FALSE, "Output text with boxes"); -BOOL_VAR(tessedit_create_hocr, FALSE, "Output HTML with hOCR markup"); -BOOL_VAR(tessedit_read_image, TRUE, "Ensure the image is read"); -INT_VAR(tessedit_serial_unlv, 0, - "0->Whole page, 1->serial no adapt, 2->serial with adapt"); -INT_VAR(tessedit_page_number, -1, - "-1 -> All pages, else specific page to process"); -BOOL_VAR(tessedit_write_images, FALSE, "Capture the image from the IPE"); -BOOL_VAR(tessedit_debug_to_screen, FALSE, "Dont use debug file"); const int kMaxIntSize = 22; char szAppName[] = "Tessedit"; //app name @@ -112,444 +91,373 @@ char szAppName[] = "Tessedit"; //app name // the value of input_file is ignored - ugly, but true - a consequence of // the way that unlv zone file reading takes the place of a page layout // analyzer. -void TesseractImage(const char* input_file, IMAGE* image, Pix* pix, int page_index, +void TesseractImage(const char* input_file, Pix* pix, int page_index, tesseract::TessBaseAPI* api, STRING* text_out) { - api->SetInputName(input_file); -#ifdef HAVE_LIBLEPT - if (pix != NULL) { - api->SetImage(pix); - } else { -#endif - int bytes_per_line = check_legal_image_size(image->get_xsize(), - image->get_ysize(), - image->get_bpp()); - api->SetImage(image->get_buffer(), image->get_xsize(), image->get_ysize(), - image->get_bpp() / 8, bytes_per_line); -#ifdef HAVE_LIBLEPT - } -#endif - if (tessedit_serial_unlv == 0) { - char* text; - if (tessedit_create_boxfile) - text = api->GetBoxText(page_index); - else if (tessedit_write_unlv) - text = api->GetUNLVText(); - else if (tessedit_create_hocr) - text = api->GetHOCRText(page_index + 1); - else - text = api->GetUTF8Text(); - *text_out += text; - delete [] text; - } else { - BLOCK_LIST blocks; - STRING filename = input_file; - const char* lastdot = strrchr(filename.string(), '.'); - if (lastdot != NULL) { - filename[lastdot - filename.string()] = '\0'; - } - if (!read_unlv_file(filename, image->get_xsize(), image->get_ysize(), - &blocks)) { - fprintf(stderr, _("Error: Must have a unlv zone file %s to read!\n"), - filename.string()); - return; - } - BLOCK_IT b_it = &blocks; - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - TBOX box = block->bounding_box(); - api->SetRectangle(box.left(), image->get_ysize() - box.top(), - box.width(), box.height()); - char* text = api->GetUNLVText(); - *text_out += text; - delete [] text; - if (tessedit_serial_unlv == 1) - api->ClearAdaptiveClassifier(); - } - } - if (tessedit_write_images) { - page_image.write("tessinput.tif"); - } + api->SetInputName(input_file); + api->SetImage(pix); + int serial_unlv; + ASSERT_HOST(api->GetIntVariable("tessedit_serial_unlv", &serial_unlv)); + if (serial_unlv == 0) { + char* text; + bool bool_value; + if ((api->GetBoolVariable("tessedit_create_boxfile", &bool_value) && + bool_value) || + (api->GetBoolVariable("tessedit_make_boxes_from_boxes", &bool_value) && + bool_value)) { + text = api->GetBoxText(page_index); + } else if (api->GetBoolVariable("tessedit_write_unlv", &bool_value) && + bool_value) { + text = api->GetUNLVText(); + } else if (api->GetBoolVariable("tessedit_create_hocr", &bool_value) + && bool_value) { + text = api->GetHOCRText(page_index); + } else { + text = api->GetUTF8Text(); + } + *text_out += text; + delete [] text; + } else { + BLOCK_LIST blocks; + STRING filename = input_file; + const char* lastdot = strrchr(filename.string(), '.'); + if (lastdot != NULL) { + filename[lastdot - filename.string()] = '\0'; + } + if (!read_unlv_file(filename, pixGetWidth(pix), pixGetHeight(pix), + &blocks)) { + fprintf(stderr, _("Error: Must have a unlv zone file %s to read!\n"), + filename.string()); + return; + } + BLOCK_IT b_it = &blocks; + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOCK* block = b_it.data(); + TBOX box = block->bounding_box(); + api->SetRectangle(box.left(), pixGetHeight(pix) - box.top(), + box.width(), box.height()); + char* text = api->GetUNLVText(); + *text_out += text; + delete [] text; + if (serial_unlv == 1) + api->ClearAdaptiveClassifier(); + } + } + bool bool_value; + if (api->GetBoolVariable("tessedit_write_images", + &bool_value) && bool_value) { + Pix* page_pix = api->GetThresholdedImage(); + pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); + } } /********************************************************************** - * main() - * - **********************************************************************/ +* main() +* +**********************************************************************/ int main(int argc, char **argv) { - STRING outfile; //output file + STRING outfile; //output file #ifdef USING_GETTEXT - setlocale (LC_ALL, ""); - bindtextdomain (PACKAGE, LOCALEDIR); - textdomain (PACKAGE); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); #endif - // Detect incorrectly placed -l option. - for (int arg = 0; arg < argc; ++arg) { - if (arg != 3 && strcmp(argv[arg], "-l") == 0) { - fprintf(stderr, _("Error: -l must be arg3, not %d\n"), arg); - argc = 0; - } - } + // Detect incorrectly placed -l option. + for (int arg = 0; arg < argc; ++arg) { + if (arg != 3 && strcmp(argv[arg], "-l") == 0) { + fprintf(stderr, _("Error: -l must be arg3, not %d\n"), arg); + argc = 0; + } + } #ifdef HAVE_CONFIG_H /* Assume that only Unix users care about -v */ - if (argc == 2 && strcmp(argv[1], "-v") == 0) { - fprintf(stderr, "tesseract %s\n", PACKAGE_VERSION); - exit(1); - } + if (argc == 2 && strcmp(argv[1], "-v") == 0) { + fprintf(stderr, "tesseract %s\n", PACKAGE_VERSION); + exit(1); + } #endif - if (argc < 3) { - fprintf(stderr, "Usage:%s imagename outputbase [-l lang]" - " [configfile [[+|-]varfile]...]\n" + if (argc < 3) { + fprintf(stderr, "Usage:%s imagename outputbase [-l lang]" + " [configfile [[+|-]varfile]...]\n" #if !defined(HAVE_LIBLEPT) && !defined(_TIFFIO_) - "Warning - no liblept or libtiff - cannot read compressed" - " tiff files.\n" -#endif - , argv[0]); - exit(1); - } - // Find the required language. - const char* lang = "eng"; - int arg = 3; - if (argc >= 5 && strcmp(argv[3], "-l") == 0) { - lang = argv[4]; - arg = 5; - } - - tesseract::TessBaseAPI api; - - api.SetOutputName(argv[2]); - api.SetPageSegMode(tesseract::PSM_AUTO); - api.Init(argv[0], lang, &(argv[arg]), argc-arg, false); - - tprintf (_("Tesseract Open Source OCR Engine")); -#if defined(HAVE_LIBLEPT) - tprintf (_(" with Leptonica\n")); -#elif defined(_TIFFIO_) - tprintf (_(" with LibTiff\n")); -#else - tprintf ("\n"); -#endif - - IMAGE image; - STRING text_out; - int page_number = tessedit_page_number; - if (page_number < 0) - page_number = 0; - FILE* fp = fopen(argv[1], "rb"); - if (fp == NULL) { - tprintf(_("Image file %s cannot be opened!\n"), argv[1]); - fclose(fp); - exit(1); - } -#ifdef HAVE_LIBLEPT - int page = page_number; - int npages = 0; - bool is_tiff = fileFormatIsTiff(fp); - if (is_tiff) - { - int tiffstat = tiffGetCount(fp, &npages); - if (tiffstat == 1) - { - fprintf(stderr, _("Error reading file %s!\n"), argv[1]); - fclose(fp); - exit(1); - } - else - { - fprintf(stderr, _("Number of found pages: %d.\n"), npages); - } - } - fclose(fp); - fp = NULL; - - Pix *pix; - if (is_tiff) - { - for (; page < npages; ++page) - { - pix = pixReadTiff(argv[1], page); - if (!pix) - continue; - if (npages > 1) - { - tprintf(_("Page %d\n"), page); - } - char page_str[kMaxIntSize]; - snprintf(page_str, kMaxIntSize - 1, "%d", page); - api.SetVariable("applybox_page", page_str); - // Run tesseract on the page! - TesseractImage(argv[1], NULL, pix, page, &api, &text_out); - pixDestroy(&pix); - if (tessedit_page_number >= 0 || npages == 1) - { - break; - } - } - } - else - { - // The file is not a tiff file, so use the general pixRead function. - // If the image fails to read, try it as a list of filenames. - PIX* pix = pixRead(argv[1]); - if (pix == NULL) { - FILE* fimg = fopen(argv[1], "r"); - if (fimg == NULL) { - tprintf(_("File %s cannot be opened!\n"), argv[1]); - fclose(fimg); - exit(1); - } - char filename[MAX_PATH]; - while (fgets(filename, sizeof(filename), fimg) != NULL) { - chomp_string(filename); - pix = pixRead(filename); - if (pix == NULL) { - tprintf(_("Image file %s cannot be read!\n"), filename); - fclose(fimg); - exit(1); - } - tprintf(_("Page %d : %s\n"), page, filename); - TesseractImage(filename, NULL, pix, page, &api, &text_out); - pixDestroy(&pix); - ++page; - } - fclose(fimg); - } else { - TesseractImage(argv[1], NULL, pix, 0, &api, &text_out); - pixDestroy(&pix); - } - } -#else -#ifdef _TIFFIO_ - int len = strlen(argv[1]); - char* ext = new char[5]; - for (int i=4; i>=0; i--) - ext[4-i] = (char) tolower((int) argv[1][len - i]); - if (len > 3 && (strcmp("tif", ext + 1) == 0 || strcmp("tiff", ext) == 0)) { - // Use libtiff to read a tif file so multi-page can be handled. - // The page number so the tiff file can be closed and reopened. - TIFF* archive = NULL; - do { - // Since libtiff keeps all read images in memory we have to close the - // file and reopen it for every page, and seek to the appropriate page. - if (archive != NULL) - TIFFClose(archive); - archive = TIFFOpen(argv[1], "r"); - if (archive == NULL) { - tprintf(_("Read of file %s failed.\n"), argv[1]); - exit(1); - } - if (page_number > 0) - tprintf(_("Page %d\n"), page_number); - - // Seek to the appropriate page. - for (int i = 0; i < page_number; ++i) { - TIFFReadDirectory(archive); - } - char page_str[kMaxIntSize]; - snprintf(page_str, kMaxIntSize - 1, "%d", page_number); - api.SetVariable("applybox_page", page_str); - // Read the current page into the Tesseract image. - IMAGE image; - read_tiff_image(archive, &image); - - // Run tesseract on the page! - TesseractImage(argv[1], &image, NULL, page_number, &api, &text_out); - ++page_number; - // Do this while there are more pages in the tiff file. - } while (TIFFReadDirectory(archive) && - (page_number <= tessedit_page_number || tessedit_page_number < 0)); - TIFFClose(archive); - } else { -#endif - // Using built-in image library to read bmp, or tiff without libtiff. - if (image.read_header(argv[1]) < 0) { - tprintf(_("Read of file %s failed.\n"), argv[1]); - exit(1); - } - if (image.read(image.get_ysize ()) < 0) - MEMORY_OUT.error(argv[0], EXIT, _("Read of image %s"), argv[1]); - invert_image(&image); - TesseractImage(argv[1], &image, NULL, 0, &api, &text_out); -#ifdef _TIFFIO_ - } - delete[] ext; + "Warning - no liblept or libtiff - cannot read compressed" + " tiff files.\n" #endif -#endif // HAVE_LIBLEPT - - //no longer using fp - if (fp != NULL) fclose(fp); - - bool output_hocr = tessedit_create_hocr; - outfile = argv[2]; - outfile += output_hocr ? ".html" : tessedit_create_boxfile ? ".box" : ".txt"; - FILE* fout = fopen(outfile.string(), "w"); - if (fout == NULL) { - tprintf(_("Cannot create output file %s\n"), outfile.string()); - fclose(fout); - exit(1); - } - if (output_hocr) { - const char html_header[] = - "\n" - "\n\n" - " OCR Output\n" - " \n \n \n \n\n"; - fprintf(fout, "%s", html_header); - } - fwrite(text_out.string(), 1, text_out.length(), fout); - if (output_hocr) - fprintf(fout, "\n\n"); - fclose(fout); - - return 0; //Normal exit + , argv[0]); + exit(1); + } + // Find the required language. + const char* lang = "eng"; + int arg = 3; + if (argc >= 5 && strcmp(argv[3], "-l") == 0) { + lang = argv[4]; + arg = 5; + } + + tesseract::TessBaseAPI api; + + api.SetOutputName(argv[2]); + api.Init(argv[0], lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc-arg, false); + + tprintf (_("Tesseract Open Source OCR Engine with Leptonica\n")); + + STRING text_out; + int tessedit_page_number; + ASSERT_HOST(api.GetIntVariable("tessedit_page_number", + &tessedit_page_number)); + int page_number = tessedit_page_number; + if (page_number < 0) + page_number = 0; + FILE* fp = fopen(argv[1], "rb"); + if (fp == NULL) { + tprintf(_("Image file %s cannot be opened!\n"), argv[1]); + fclose(fp); + exit(1); + } + int page = page_number; + int npages = 0; + bool is_tiff = fileFormatIsTiff(fp); + if (is_tiff) + { + int tiffstat = tiffGetCount(fp, &npages); + if (tiffstat == 1) + { + fprintf (stderr, _("Error reading file %s!\n"), argv[1]); + fclose(fp); + exit(1); + } + else + fprintf(stderr, _("Number of found pages: %d.\n"), npages); + } + fclose(fp); + fp = NULL; + + Pix *pix; + if (is_tiff) { + for (; page < npages; ++page) + { + pix = pixReadTiff(argv[1], page); + if (!pix) + continue; + if (npages > 1) + tprintf(_("Page %d\n"), page); + char page_str[kMaxIntSize]; + snprintf(page_str, kMaxIntSize - 1, "%d", page); + api.SetVariable("applybox_page", page_str); + + // Run tesseract on the page! + TesseractImage(argv[1], pix, page, &api, &text_out); + pixDestroy(&pix); + if (tessedit_page_number >= 0 || npages == 1) + { + break; + } + } + } else + { + // The file is not a tiff file, so use the general pixRead function. + // If the image fails to read, try it as a list of filenames. + pix = pixRead(argv[1]); + if (pix == NULL) { + FILE* fimg = fopen(argv[1], "r"); + if (fimg == NULL) { + tprintf(_("File %s cannot be opened!\n"), argv[1]); + fclose(fimg); + exit(1); + } + char filename[MAX_PATH]; + while (fgets(filename, sizeof(filename), fimg) != NULL) { + chomp_string(filename); + pix = pixRead(filename); + if (pix == NULL) { + tprintf(_("Image file %s cannot be read!\n"), filename); + fclose(fimg); + exit(1); + } + tprintf(_("Page %d : %s\n"), page, filename); + TesseractImage(filename, pix, page, &api, &text_out); + pixDestroy(&pix); + ++page; + } + fclose(fimg); + } else { + TesseractImage(argv[1], pix, 0, &api, &text_out); + pixDestroy(&pix); + } + } + + bool output_hocr = false; + api.GetBoolVariable("tessedit_create_hocr", &output_hocr); + bool output_box = false; + api.GetBoolVariable("tessedit_create_boxfile", &output_box); + outfile = argv[2]; + outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt"; + FILE* fout = fopen(outfile.string(), "w"); + if (fout == NULL) { + tprintf(_("Cannot create output file %s\n"), outfile.string()); + fclose(fout); + exit(1); + } + if (output_hocr) { + const char html_header[] = + "\n" + "\n\n\n" + "\n\n" + " \n\n"; + fprintf(fout, "%s", html_header); + } + fwrite(text_out.string(), 1, text_out.length(), fout); + if (output_hocr) + fprintf(fout, "\n\n"); + fclose(fout); + + return 0; //Normal exit } #ifdef __MSW32__ int initialized = 0; /********************************************************************** - * WinMain - * - * Main function for a windows program. - **********************************************************************/ +* WinMain +* +* Main function for a windows program. +**********************************************************************/ int WINAPI WinMain( //main for windows //command line - HINSTANCE hInstance, - HINSTANCE hPrevInstance, - LPSTR lpszCmdLine, - int nCmdShow) { - WNDCLASS wc; - HWND hwnd; - MSG msg; - - char **argv; - char *argsin[2]; - int argc; - int exit_code; - - wc.style = CS_NOCLOSE | CS_OWNDC; - wc.lpfnWndProc = (WNDPROC) WndProc; - wc.cbClsExtra = 0; - wc.cbWndExtra = 0; - wc.hInstance = hInstance; - wc.hIcon = NULL; //LoadIcon (NULL, IDI_APPLICATION); - wc.hCursor = NULL; //LoadCursor (NULL, IDC_ARROW); - wc.hbrBackground = (HBRUSH) (COLOR_WINDOW + 1); - wc.lpszMenuName = NULL; - wc.lpszClassName = szAppName; - - RegisterClass(&wc); - - hwnd = CreateWindow (szAppName, szAppName, - WS_OVERLAPPEDWINDOW | WS_DISABLED, - CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, - CW_USEDEFAULT, HWND_DESKTOP, NULL, hInstance, NULL); - - argsin[0] = strdup (szAppName); - argsin[1] = strdup (lpszCmdLine); - /*allocate memory for the args. There can never be more than half*/ - /*the total number of characters in the arguments.*/ - argv = - (char **) malloc (((strlen (argsin[0]) + strlen (argsin[1])) / 2 + 1) * - sizeof (char *)); - - /*now construct argv as it should be for C.*/ - argc = parse_args (2, argsin, argv); - - // ShowWindow (hwnd, nCmdShow); - // UpdateWindow (hwnd); - - if (initialized) { - exit_code = main (argc, argv); - free (argsin[0]); - free (argsin[1]); - free(argv); - return exit_code; - } - while (GetMessage (&msg, NULL, 0, 0)) { - TranslateMessage(&msg); - DispatchMessage(&msg); - if (initialized) { - exit_code = main (argc, argv); - break; - } - else - exit_code = msg.wParam; - } - free (argsin[0]); - free (argsin[1]); - free(argv); - return exit_code; + HINSTANCE hInstance, + HINSTANCE hPrevInstance, + LPSTR lpszCmdLine, + int nCmdShow) { + WNDCLASS wc; + HWND hwnd; + MSG msg; + + char **argv; + char *argsin[2]; + int argc; + int exit_code; + + wc.style = CS_NOCLOSE | CS_OWNDC; + wc.lpfnWndProc = (WNDPROC) WndProc; + wc.cbClsExtra = 0; + wc.cbWndExtra = 0; + wc.hInstance = hInstance; + wc.hIcon = NULL; //LoadIcon (NULL, IDI_APPLICATION); + wc.hCursor = NULL; //LoadCursor (NULL, IDC_ARROW); + wc.hbrBackground = (HBRUSH) (COLOR_WINDOW + 1); + wc.lpszMenuName = NULL; + wc.lpszClassName = szAppName; + + RegisterClass(&wc); + + hwnd = CreateWindow (szAppName, szAppName, + WS_OVERLAPPEDWINDOW | WS_DISABLED, + CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, + CW_USEDEFAULT, HWND_DESKTOP, NULL, hInstance, NULL); + + argsin[0] = strdup (szAppName); + argsin[1] = strdup (lpszCmdLine); + /*allocate memory for the args. There can never be more than half*/ + /*the total number of characters in the arguments.*/ + argv = + (char **) malloc (((strlen (argsin[0]) + strlen (argsin[1])) / 2 + 1) * + sizeof (char *)); + + /*now construct argv as it should be for C.*/ + argc = parse_args (2, argsin, argv); + + // ShowWindow (hwnd, nCmdShow); + // UpdateWindow (hwnd); + + if (initialized) { + exit_code = main (argc, argv); + free (argsin[0]); + free (argsin[1]); + free(argv); + return exit_code; + } + while (GetMessage (&msg, NULL, 0, 0)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + if (initialized) { + exit_code = main (argc, argv); + break; + } + else + exit_code = msg.wParam; + } + free (argsin[0]); + free (argsin[1]); + free(argv); + return exit_code; } /********************************************************************** - * WndProc - * - * Function to respond to messages. - **********************************************************************/ +* WndProc +* +* Function to respond to messages. +**********************************************************************/ LONG WINAPI WndProc( //message handler - HWND hwnd, //window with message - UINT msg, //message typ - WPARAM wParam, - LPARAM lParam) { - HDC hdc; - - if (msg == WM_CREATE) { - // - // Create a rendering context. - // - hdc = GetDC (hwnd); - ReleaseDC(hwnd, hdc); - initialized = 1; - return 0; - } - return DefWindowProc (hwnd, msg, wParam, lParam); + HWND hwnd, //window with message + UINT msg, //message typ + WPARAM wParam, + LPARAM lParam) { + HDC hdc; + + if (msg == WM_CREATE) { + // + // Create a rendering context. + // + hdc = GetDC (hwnd); + ReleaseDC(hwnd, hdc); + initialized = 1; + return 0; + } + return DefWindowProc (hwnd, msg, wParam, lParam); } /********************************************************************** - * parse_args - * - * Turn a list of args into a new list of args with each separate - * whitespace spaced string being an arg. - **********************************************************************/ +* parse_args +* +* Turn a list of args into a new list of args with each separate +* whitespace spaced string being an arg. +**********************************************************************/ int parse_args ( /*refine arg list */ -int argc, /*no of input args */ -char *argv[], /*input args */ -char *arglist[] /*output args */ -) { - int argcount; /*converted argc */ - char *testchar; /*char in option string */ - int arg; /*current argument */ - - argcount = 0; /*no of options */ - for (arg = 0; arg < argc; arg++) { - testchar = argv[arg]; /*start of arg */ - do { - while (*testchar - && (*testchar == ' ' || *testchar == '\n' - || *testchar == '\t')) - testchar++; /*skip white space */ - if (*testchar) { - /*new arg */ - arglist[argcount++] = testchar; - /*skip to white space */ - for (testchar++; *testchar && *testchar != ' ' && *testchar != '\n' && *testchar != '\t'; testchar++); - if (*testchar) - *testchar++ = '\0'; /*turn to separate args */ - } - } - while (*testchar); - } - return argcount; /*new number of args */ + int argc, /*no of input args */ + char *argv[], /*input args */ + char *arglist[] /*output args */ + ) { + int argcount; /*converted argc */ + char *testchar; /*char in option string */ + int arg; /*current argument */ + + argcount = 0; /*no of options */ + for (arg = 0; arg < argc; arg++) { + testchar = argv[arg]; /*start of arg */ + do { + while (*testchar + && (*testchar == ' ' || *testchar == '\n' + || *testchar == '\t')) + testchar++; /*skip white space */ + if (*testchar) { + /*new arg */ + arglist[argcount++] = testchar; + /*skip to white space */ + for (testchar++; *testchar && *testchar != ' ' && *testchar != '\n' && *testchar != '\t'; testchar++) ; + if (*testchar) + *testchar++ = '\0'; /*turn to separate args */ + } + } + while (*testchar); + } + return argcount; /*new number of args */ } #endif diff --git a/api/tesseractmain.h b/api/tesseractmain.h index f6c2bbcaa7..a8a043a1ec 100644 --- a/api/tesseractmain.h +++ b/api/tesseractmain.h @@ -20,41 +20,10 @@ #ifndef TESSERACTMAIN_H #define TESSERACTMAIN_H -#include "varable.h" -#include "tessclas.h" +#include "params.h" +#include "blobs.h" #include "notdll.h" -extern BOOL_VAR_H(tessedit_create_boxfile, FALSE, "Output text with boxes"); -extern BOOL_VAR_H(tessedit_read_image, TRUE, "Ensure the image is read"); -extern INT_VAR_H(tessedit_serial_unlv, 0, - "0->Whole page, 1->serial no adapt, 2->serial with adapt"); -extern INT_VAR_H(tessedit_page_number, -1, - "-1 -> All pages, else specific page to process"); -extern BOOL_VAR_H(tessedit_write_images, FALSE, - "Capture the image from the IPE"); -extern BOOL_VAR_H(tessedit_debug_to_screen, FALSE, "Dont use debug file"); - -/** - * run from api - * @param arg0 program name - * @param lang language - */ -inT32 api_main(const char *arg0, - uinT16 lang); -/** - * setup dummy engine info - * @param lang user language - * @param name of engine - * @param version of engine - */ -inT16 setup_info(uinT16 lang, - const char *name, - const char *version); -/** - * read dummy image info - * @param im_out read dummy image info - */ -inT16 read_image(IMAGE *im_out); #ifdef __MSW32__ /** * main for windows command line diff --git a/ccmain/Makefile.am b/ccmain/Makefile.am index 4de8f3f210..c2a79ed441 100644 --- a/ccmain/Makefile.am +++ b/ccmain/Makefile.am @@ -1,35 +1,37 @@ SUBDIRS = AM_CPPFLAGS = \ + -DUSE_STD_NAMESPACE \ -I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \ -I$(top_srcdir)/image -I$(top_srcdir)/viewer \ -I$(top_srcdir)/ccops -I$(top_srcdir)/dict \ -I$(top_srcdir)/classify \ -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \ + -I$(top_srcdir)/neural_networks/runtime -I$(top_srcdir)/cube \ -I$(top_srcdir)/textord -EXTRA_DIST = tessembedded.cpp ccmain.vcproj +EXTRA_DIST = tessembedded.cpp include_HEADERS = \ - adaptions.h applybox.h blobcmp.h \ - callnet.h charcut.h charsample.h control.h \ - docqual.h expandblob.h fixspace.h fixxht.h \ - imgscale.h matmatch.h osdetect.h output.h \ - pagewalk.h paircmp.h pgedit.h reject.h scaleimg.h \ + charcut.h control.h cube_reco_context.h \ + docqual.h fixspace.h \ + imgscale.h osdetect.h output.h \ + paramsd.h pgedit.h reject.h scaleimg.h \ tessbox.h tessedit.h tessembedded.h tesseractclass.h \ - tessio.h tessvars.h tfacep.h tfacepp.h thresholder.h tstruct.h \ - varabled.h werdit.h + tesseract_cube_combiner.h \ + tessvars.h tfacep.h tfacepp.h thresholder.h tstruct.h \ + werdit.h lib_LTLIBRARIES = libtesseract_main.la libtesseract_main_la_SOURCES = \ - adaptions.cpp ambigsrecog.cpp applybox.cpp \ - blobcmp.cpp \ - callnet.cpp charcut.cpp charsample.cpp control.cpp \ - docqual.cpp expandblob.cpp fixspace.cpp fixxht.cpp \ - imgscale.cpp matmatch.cpp osdetect.cpp output.cpp \ - pagewalk.cpp paircmp.cpp pgedit.cpp reject.cpp scaleimg.cpp \ + adaptions.cpp applybox.cpp \ + charcut.cpp control.cpp cube_control.cpp cube_reco_context.cpp \ + docqual.cpp fixspace.cpp fixxht.cpp \ + imgscale.cpp osdetect.cpp output.cpp pagesegmain.cpp \ + pagewalk.cpp paramsd.cpp pgedit.cpp reject.cpp scaleimg.cpp \ + recogtraining.cpp tesseract_cube_combiner.cpp \ tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \ tfacepp.cpp thresholder.cpp tstruct.cpp \ - varabled.cpp werdit.cpp + werdit.cpp libtesseract_main_la_LIBADD = \ ../wordrec/libtesseract_wordrec.la libtesseract_main_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) diff --git a/ccmain/Makefile.in b/ccmain/Makefile.in index 50455eae6b..a2b1e649bc 100644 --- a/ccmain/Makefile.in +++ b/ccmain/Makefile.in @@ -72,13 +72,13 @@ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) libtesseract_main_la_DEPENDENCIES = \ ../wordrec/libtesseract_wordrec.la -am_libtesseract_main_la_OBJECTS = adaptions.lo ambigsrecog.lo \ - applybox.lo blobcmp.lo callnet.lo charcut.lo charsample.lo \ - control.lo docqual.lo expandblob.lo fixspace.lo fixxht.lo \ - imgscale.lo matmatch.lo osdetect.lo output.lo pagewalk.lo \ - paircmp.lo pgedit.lo reject.lo scaleimg.lo tessbox.lo \ - tessedit.lo tesseractclass.lo tessvars.lo tfacepp.lo \ - thresholder.lo tstruct.lo varabled.lo werdit.lo +am_libtesseract_main_la_OBJECTS = adaptions.lo applybox.lo charcut.lo \ + control.lo cube_control.lo cube_reco_context.lo docqual.lo \ + fixspace.lo fixxht.lo imgscale.lo osdetect.lo output.lo \ + pagesegmain.lo pagewalk.lo paramsd.lo pgedit.lo reject.lo \ + scaleimg.lo recogtraining.lo tesseract_cube_combiner.lo \ + tessbox.lo tessedit.lo tesseractclass.lo tessvars.lo \ + tfacepp.lo thresholder.lo tstruct.lo werdit.lo libtesseract_main_la_OBJECTS = $(am_libtesseract_main_la_OBJECTS) libtesseract_main_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ @@ -251,7 +251,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -269,35 +268,37 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = AM_CPPFLAGS = \ + -DUSE_STD_NAMESPACE \ -I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \ -I$(top_srcdir)/image -I$(top_srcdir)/viewer \ -I$(top_srcdir)/ccops -I$(top_srcdir)/dict \ -I$(top_srcdir)/classify \ -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \ + -I$(top_srcdir)/neural_networks/runtime -I$(top_srcdir)/cube \ -I$(top_srcdir)/textord -EXTRA_DIST = tessembedded.cpp ccmain.vcproj +EXTRA_DIST = tessembedded.cpp include_HEADERS = \ - adaptions.h applybox.h blobcmp.h \ - callnet.h charcut.h charsample.h control.h \ - docqual.h expandblob.h fixspace.h fixxht.h \ - imgscale.h matmatch.h osdetect.h output.h \ - pagewalk.h paircmp.h pgedit.h reject.h scaleimg.h \ + charcut.h control.h cube_reco_context.h \ + docqual.h fixspace.h \ + imgscale.h osdetect.h output.h \ + paramsd.h pgedit.h reject.h scaleimg.h \ tessbox.h tessedit.h tessembedded.h tesseractclass.h \ - tessio.h tessvars.h tfacep.h tfacepp.h thresholder.h tstruct.h \ - varabled.h werdit.h + tesseract_cube_combiner.h \ + tessvars.h tfacep.h tfacepp.h thresholder.h tstruct.h \ + werdit.h lib_LTLIBRARIES = libtesseract_main.la libtesseract_main_la_SOURCES = \ - adaptions.cpp ambigsrecog.cpp applybox.cpp \ - blobcmp.cpp \ - callnet.cpp charcut.cpp charsample.cpp control.cpp \ - docqual.cpp expandblob.cpp fixspace.cpp fixxht.cpp \ - imgscale.cpp matmatch.cpp osdetect.cpp output.cpp \ - pagewalk.cpp paircmp.cpp pgedit.cpp reject.cpp scaleimg.cpp \ + adaptions.cpp applybox.cpp \ + charcut.cpp control.cpp cube_control.cpp cube_reco_context.cpp \ + docqual.cpp fixspace.cpp fixxht.cpp \ + imgscale.cpp osdetect.cpp output.cpp pagesegmain.cpp \ + pagewalk.cpp paramsd.cpp pgedit.cpp reject.cpp scaleimg.cpp \ + recogtraining.cpp tesseract_cube_combiner.cpp \ tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \ tfacepp.cpp thresholder.cpp tstruct.cpp \ - varabled.cpp werdit.cpp + werdit.cpp libtesseract_main_la_LIBADD = \ ../wordrec/libtesseract_wordrec.la @@ -378,34 +379,32 @@ distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adaptions.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ambigsrecog.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/applybox.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blobcmp.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callnet.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/charcut.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/charsample.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/control.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_control.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_reco_context.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/docqual.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/expandblob.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fixspace.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fixxht.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/imgscale.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmatch.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/osdetect.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/output.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pagesegmain.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pagewalk.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/paircmp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/paramsd.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pgedit.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/recogtraining.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reject.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaleimg.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tessbox.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tessedit.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tesseract_cube_combiner.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tesseractclass.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tessvars.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tfacepp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/thresholder.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tstruct.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/varabled.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/werdit.Plo@am__quote@ .cpp.o: diff --git a/ccmain/adaptions.cpp b/ccmain/adaptions.cpp index 95cf6c00f8..0c0d13975c 100644 --- a/ccmain/adaptions.cpp +++ b/ccmain/adaptions.cpp @@ -33,16 +33,12 @@ #include "tessbox.h" #include "tessvars.h" #include "memry.h" -#include "mainblk.h" #include "charcut.h" #include "imgs.h" #include "scaleimg.h" #include "reject.h" #include "control.h" -#include "adaptions.h" #include "stopper.h" -#include "charsample.h" -#include "matmatch.h" #include "secname.h" #include "tesseractclass.h" @@ -51,61 +47,6 @@ #include "config_auto.h" #endif -inT32 demo_word = 0; - -#define WINDOWNAMESIZE 13 /*max size of name */ - -#define EXTERN - -EXTERN BOOL_VAR (tessedit_reject_ems, FALSE, "Reject all m's"); -EXTERN BOOL_VAR (tessedit_reject_suspect_ems, FALSE, "Reject suspect m's"); - -EXTERN double_VAR (tessedit_cluster_t1, 0.20, -"t1 threshold for clustering samples"); -EXTERN double_VAR (tessedit_cluster_t2, 0.40, -"t2 threshold for clustering samples"); -EXTERN double_VAR (tessedit_cluster_t3, 0.12, -"Extra threshold for clustering samples, only keep a new sample if best score greater than this value"); -EXTERN double_VAR (tessedit_cluster_accept_fraction, 0.80, -"Largest fraction of characters in cluster for it to be used for adaption"); -EXTERN INT_VAR (tessedit_cluster_min_size, 3, -"Smallest number of samples in a cluster for it to be used for adaption"); -EXTERN BOOL_VAR (tessedit_cluster_debug, FALSE, -"Generate and print debug information for adaption by clustering"); -EXTERN BOOL_VAR (tessedit_use_best_sample, FALSE, -"Use best sample from cluster when adapting"); -EXTERN BOOL_VAR (tessedit_test_cluster_input, FALSE, -"Set reject map to enable cluster input to be measured"); - -EXTERN BOOL_VAR (tessedit_matrix_match, TRUE, "Use matrix matcher"); -EXTERN BOOL_VAR (tessedit_mm_use_non_adaption_set, FALSE, -"Don't try to adapt to characters on this list"); -EXTERN STRING_VAR (tessedit_non_adaption_set, ",.;:'~@*", -"Characters to be avoided when adapting"); -EXTERN BOOL_VAR (tessedit_mm_adapt_using_prototypes, TRUE, -"Use prototypes when adapting"); -EXTERN BOOL_VAR (tessedit_mm_use_prototypes, TRUE, -"Use prototypes as clusters are built"); -EXTERN BOOL_VAR (tessedit_mm_use_rejmap, FALSE, -"Adapt to characters using reject map"); -EXTERN BOOL_VAR (tessedit_mm_all_rejects, FALSE, -"Adapt to all characters using, matrix matcher"); -EXTERN BOOL_VAR (tessedit_mm_only_match_same_char, FALSE, -"Only match samples against clusters for the same character"); -EXTERN BOOL_VAR (tessedit_process_rns, FALSE, "Handle m - rn ambigs"); - -EXTERN BOOL_VAR (tessedit_demo_adaption, FALSE, -"Display cut images and matrix match for demo purposes"); -EXTERN INT_VAR (tessedit_demo_word1, 62, -"Word number of first word to display"); -EXTERN INT_VAR (tessedit_demo_word2, 64, -"Word number of second word to display"); -EXTERN STRING_VAR (tessedit_demo_file, "academe", -"Name of document containing demo words"); -EXTERN BOOL_VAR(tessedit_adapt_to_char_fragments, TRUE, - "Adapt to words that contain " - " a character composed form fragments"); - namespace tesseract { BOOL8 Tesseract::word_adaptable( //should we adapt? WERD_RES *word, @@ -201,938 +142,6 @@ BOOL8 Tesseract::word_adaptable( //should we adapt? tprintf("returning status %d\n", status); } return status; - } - -void Tesseract::collect_ems_for_adaption(WERD_RES *word, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting) { - PBLOB_LIST *blobs = word->outword->blob_list (); - PBLOB_IT blob_it(blobs); - inT16 i; - CHAR_SAMPLE *sample; - PIXROW_LIST *pixrow_list; - PIXROW_IT pixrow_it; - IMAGELINE *imlines; // lines of the image - TBOX pix_box; // box of imlines - // extent - WERD copy_outword; // copy to denorm - PBLOB_IT copy_blob_it; - OUTLINE_IT copy_outline_it; - inT32 resolution = page_image.get_res (); - - if (tessedit_reject_ems || tessedit_reject_suspect_ems) - return; // Do nothing - - if (word->word->bounding_box ().height () > resolution / 3) - return; - - if (tessedit_demo_adaption) - // Make sure not set - tessedit_display_mm.set_value (FALSE); - - if (word_adaptable (word, tessedit_em_adaption_mode) - && word->reject_map.reject_count () == 0 - && (strchr (word->best_choice->unichar_string().string (), 'm') != NULL - || (tessedit_process_rns - && strstr (word->best_choice->unichar_string().string (), - "rn") != NULL))) { - if (tessedit_process_rns - && strstr (word->best_choice->unichar_string().string (), "rn") != NULL) { - copy_outword = *(word->outword); - copy_blob_it.set_to_list (copy_outword.blob_list ()); - i = 0; - while (word->best_choice->unichar_string()[i] != '\0') { - if (word->best_choice->unichar_string()[i] == 'r' - && word->best_choice->unichar_string()[i + 1] == 'n') { - copy_outline_it.set_to_list (copy_blob_it.data ()-> - out_list ()); - copy_outline_it.add_list_after (copy_blob_it. - data_relative (1)-> - out_list ()); - copy_blob_it.forward (); - delete (copy_blob_it.extract ()); - i++; - } - copy_blob_it.forward (); - i++; - } - } - else - copy_outword = *(word->outword); - - copy_outword.baseline_denormalise (&word->denorm); - char_clip_word(©_outword, page_image, pixrow_list, imlines, pix_box); - pixrow_it.set_to_list (pixrow_list); - pixrow_it.move_to_first (); - - blob_it.move_to_first (); - for (i = 0; - word->best_choice->unichar_string()[i] != '\0'; - i++, pixrow_it.forward (), blob_it.forward ()) { - - if (word->best_choice->unichar_string()[i] == 'm' - || (word->best_choice->unichar_string()[i] == 'r' - && word->best_choice->unichar_string()[i + 1] == 'n')) { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample %c for adaption found in %s, index %d\n", - word->best_choice->unichar_string()[i], - word->best_choice->unichar_string().string (), i); - #endif - if (tessedit_matrix_match) { - sample = clip_sample (pixrow_it.data (), - imlines, - pix_box, - copy_outword.flag (W_INVERSE), - word->best_choice->unichar_string()[i]); - - if (sample == NULL) { //Clip failed - #ifndef SECURE_NAMES - tprintf ("Unable to clip sample from %s, index %d\n", - word->best_choice->unichar_string().string (), i); - #endif - if (word->best_choice->unichar_string()[i] == 'r') - i++; - - continue; - } - } - else - sample = new CHAR_SAMPLE (blob_it.data (), - &word->denorm, - word->best_choice->unichar_string()[i]); - - cluster_sample(sample, char_clusters, chars_waiting); - - if (word->best_choice->unichar_string()[i] == 'r') - i++; // Skip next character - } - } - delete[]imlines; // Free array of imlines - delete pixrow_list; - } -} - - -void Tesseract::collect_characters_for_adaption( - WERD_RES *word, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting) { - PBLOB_LIST *blobs = word->outword->blob_list (); - PBLOB_IT blob_it(blobs); - inT16 i; - CHAR_SAMPLE *sample; - PIXROW_LIST *pixrow_list; - PIXROW_IT pixrow_it; - IMAGELINE *imlines; // lines of the image - TBOX pix_box; // box of imlines - // extent - WERD copy_outword; // copy to denorm - inT32 resolution = page_image.get_res (); - - if (word->word->bounding_box ().height () > resolution / 3) - return; - - if (tessedit_demo_adaption) - // Make sure not set - tessedit_display_mm.set_value (FALSE); - - if ((word_adaptable (word, tessedit_cluster_adaption_mode) - && word->reject_map.reject_count () == 0) || tessedit_mm_use_rejmap) { - if (tessedit_test_cluster_input && !tessedit_mm_use_rejmap) - return; // Reject map set to acceptable - /* Collect information about good matches */ - copy_outword = *(word->outword); - copy_outword.baseline_denormalise (&word->denorm); - char_clip_word(©_outword, page_image, pixrow_list, imlines, pix_box); - pixrow_it.set_to_list (pixrow_list); - pixrow_it.move_to_first (); - - blob_it.move_to_first (); - for (i = 0; - word->best_choice->unichar_string()[i] != '\0'; - i++, pixrow_it.forward (), blob_it.forward ()) { - - if (!(tessedit_mm_use_non_adaption_set - && STRING(tessedit_non_adaption_set).contains( - word->best_choice->unichar_string()[i])) - || (tessedit_mm_use_rejmap && word->reject_map[i].accepted ())) { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample %c for adaption found in %s, index %d\n", - word->best_choice->unichar_string()[i], - word->best_choice->unichar_string().string (), i); - #endif - sample = clip_sample (pixrow_it.data (), - imlines, - pix_box, - copy_outword.flag (W_INVERSE), - word->best_choice->unichar_string()[i]); - - if (sample == NULL) { //Clip failed - #ifndef SECURE_NAMES - tprintf ("Unable to clip sample from %s, index %d\n", - word->best_choice->unichar_string().string (), i); - #endif - continue; - } - cluster_sample(sample, char_clusters, chars_waiting); - } - } - delete[]imlines; // Free array of imlines - delete pixrow_list; - } - else if (tessedit_test_cluster_input && !tessedit_mm_use_rejmap) - // Set word to all rejects - word->reject_map.rej_word_tess_failure (); - -} - - -void Tesseract::cluster_sample(CHAR_SAMPLE *sample, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting) { - CHAR_SAMPLES *best_cluster = NULL; - CHAR_SAMPLES_IT c_it = char_clusters; - CHAR_SAMPLE_IT cw_it = chars_waiting; - float score; - float best_score = MAX_INT32; - - if (c_it.empty ()) - c_it.add_to_end (new CHAR_SAMPLES (sample)); - else { - for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) { - score = c_it.data ()->match_score (sample, this); - if (score < best_score) { - best_score = score; - best_cluster = c_it.data (); - } - } - - if (tessedit_cluster_debug) - tprintf ("Sample's best score %f\n", best_score); - - if (best_score < tessedit_cluster_t1) { - if (best_score > tessedit_cluster_t3 || tessedit_mm_use_prototypes) { - best_cluster->add_sample (sample, this); - check_wait_list(chars_waiting, sample, best_cluster); - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample added to an existing cluster\n"); - #endif - } - else { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf - ("Sample dropped, good match to an existing cluster\n"); - #endif - } - } - else if (best_score > tessedit_cluster_t2) { - c_it.add_to_end (new CHAR_SAMPLES (sample)); - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("New cluster created for this sample\n"); - #endif - } - else { - cw_it.add_to_end (sample); - if (tessedit_cluster_debug) - tprintf ("Sample added to the wait list\n"); - } - } -} - -void Tesseract::check_wait_list(CHAR_SAMPLE_LIST *chars_waiting, - CHAR_SAMPLE *sample, - CHAR_SAMPLES *best_cluster) { - CHAR_SAMPLE *wait_sample; - CHAR_SAMPLE *test_sample = sample; - CHAR_SAMPLE_IT cw_it = chars_waiting; - CHAR_SAMPLE_LIST add_list; //Samples added to best cluster - CHAR_SAMPLE_IT add_it = &add_list; - float score; - - add_list.clear (); - - if (!cw_it.empty ()) { - do { - if (!add_list.empty ()) { - add_it.forward (); - test_sample = add_it.extract (); - best_cluster->add_sample (test_sample, this); - } - - for (cw_it.mark_cycle_pt (); - !cw_it.cycled_list (); cw_it.forward ()) { - wait_sample = cw_it.data (); - if (tessedit_mm_use_prototypes) - score = best_cluster->match_score (wait_sample, this); - else - score = sample->match_sample (wait_sample, FALSE, this); - if (score < tessedit_cluster_t1) { - if (score > tessedit_cluster_t3 - || tessedit_mm_use_prototypes) { - add_it.add_after_stay_put (cw_it.extract ()); - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf - ("Wait sample added to an existing cluster\n"); - #endif - } - else { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf - ("Wait sample dropped, good match to an existing cluster\n"); - #endif - } - } - } - } - while (!add_list.empty ()); - } -} - - -void Tesseract::complete_clustering(CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting) { - CHAR_SAMPLES *best_cluster; - CHAR_SAMPLES_IT c_it = char_clusters; - CHAR_SAMPLE_IT cw_it = chars_waiting; - CHAR_SAMPLE *sample; - inT32 total_sample_count = 0; - - while (!cw_it.empty ()) { - cw_it.move_to_first (); - sample = cw_it.extract (); - best_cluster = new CHAR_SAMPLES (sample); - c_it.add_to_end (best_cluster); - check_wait_list(chars_waiting, sample, best_cluster); - } - - for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) { - c_it.data ()->assign_to_char (); - if (tessedit_use_best_sample) - c_it.data ()->find_best_sample (); - else if (tessedit_mm_adapt_using_prototypes) - c_it.data ()->build_prototype (); - - if (tessedit_cluster_debug) - total_sample_count += c_it.data ()->n_samples (); - } - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Clustering completed, %d samples in all\n", total_sample_count); - #endif - -#ifndef GRAPHICS_DISABLED - if (tessedit_demo_adaption) - display_cluster_prototypes(char_clusters); -#endif - -} - -void Tesseract::adapt_to_good_ems(WERD_RES *word, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting) { - PBLOB_LIST *blobs = word->outword->blob_list (); - PBLOB_IT blob_it(blobs); - inT16 i; - CHAR_SAMPLE *sample; - CHAR_SAMPLES_IT c_it = char_clusters; - CHAR_SAMPLE_IT cw_it = chars_waiting; - float score; - float best_score; - char best_char; - CHAR_SAMPLES *best_cluster; - PIXROW_LIST *pixrow_list; - PIXROW_IT pixrow_it; - IMAGELINE *imlines; // lines of the image - TBOX pix_box; // box of imlines - // extent - WERD copy_outword; // copy to denorm - TBOX b_box; - PBLOB_IT copy_blob_it; - OUTLINE_IT copy_outline_it; - PIXROW *pixrow = NULL; - - static inT32 word_number = 0; - -#ifndef GRAPHICS_DISABLED - ScrollView* demo_win = NULL; -#endif - - inT32 resolution = page_image.get_res (); - - if (word->word->bounding_box ().height () > resolution / 3) - return; - - word_number++; - - if (strchr (word->best_choice->unichar_string().string (), 'm') == NULL - && (tessedit_process_rns - && strstr (word->best_choice->unichar_string().string (), "rn") == NULL)) - return; - - if (tessedit_reject_ems) - reject_all_ems(word); - else if (tessedit_reject_suspect_ems) - reject_suspect_ems(word); - else { - if (char_clusters->length () == 0) { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("No clusters to use for em adaption\n"); - #endif - return; - } - - if (!cw_it.empty ()) { - complete_clustering(char_clusters, chars_waiting); - print_em_stats(char_clusters, chars_waiting); - } - - if ((!word_adaptable (word, tessedit_em_adaption_mode) || - word->reject_map.reject_count () != 0) - && (strchr (word->best_choice->unichar_string().string (), 'm') != NULL - || (tessedit_process_rns - && strstr (word->best_choice->unichar_string().string (), - "rn") != NULL))) { - if (tessedit_process_rns - && strstr (word->best_choice->unichar_string().string (), - "rn") != NULL) { - copy_outword = *(word->outword); - copy_blob_it.set_to_list (copy_outword.blob_list ()); - i = 0; - while (word->best_choice->unichar_string()[i] != '\0') { - if (word->best_choice->unichar_string()[i] == 'r' - && word->best_choice->unichar_string()[i + 1] == 'n') { - copy_outline_it.set_to_list (copy_blob_it.data ()-> - out_list ()); - copy_outline_it.add_list_after (copy_blob_it. - data_relative (1)-> - out_list ()); - copy_blob_it.forward (); - delete (copy_blob_it.extract ()); - i++; - } - copy_blob_it.forward (); - i++; - } - } - else - copy_outword = *(word->outword); - - copy_outword.baseline_denormalise (&word->denorm); - copy_blob_it.set_to_list (copy_outword.blob_list ()); - char_clip_word(©_outword, page_image, pixrow_list, imlines, pix_box); - pixrow_it.set_to_list (pixrow_list); - pixrow_it.move_to_first (); - - // For debugging only - b_box = copy_outword.bounding_box (); - pixrow = pixrow_it.data (); - - blob_it.move_to_first (); - copy_blob_it.move_to_first (); - for (i = 0; - word->best_choice->unichar_string()[i] != '\0'; - i++, pixrow_it.forward (), blob_it.forward (), - copy_blob_it.forward ()) { - if ((word->best_choice->unichar_string()[i] == 'm' - || (word->best_choice->unichar_string()[i] == 'r' - && word->best_choice->unichar_string()[i + 1] == 'n')) - && !word->reject_map[i].perm_rejected ()) { - if (tessedit_cluster_debug) - tprintf ("Sample %c to check found in %s, index %d\n", - word->best_choice->unichar_string()[i], - word->best_choice->unichar_string().string (), i); - - if (tessedit_demo_adaption) - tprintf - ("Sample %c to check found in %s (%d), index %d\n", - word->best_choice->unichar_string()[i], - word->best_choice->unichar_string().string (), word_number, - i); - - if (tessedit_matrix_match) { - TBOX copy_box = copy_blob_it.data ()->bounding_box (); - - sample = clip_sample (pixrow_it.data (), - imlines, - pix_box, - copy_outword.flag (W_INVERSE), - word->best_choice->unichar_string()[i]); - - //Clip failed - if (sample == NULL) { - tprintf - ("Unable to clip sample from %s, index %d\n", - word->best_choice->unichar_string().string (), i); - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample rejected (no sample)\n"); - #endif - word->reject_map[i].setrej_mm_reject (); - if (word->best_choice->unichar_string()[i] == 'r') { - word->reject_map[i + 1].setrej_mm_reject (); - i++; - } - continue; - } - } - else - sample = new CHAR_SAMPLE(blob_it.data(), - &word->denorm, - word->best_choice->unichar_string()[i]); - - best_score = MAX_INT32; - best_char = '\0'; - best_cluster = NULL; - - for (c_it.mark_cycle_pt (); - !c_it.cycled_list (); c_it.forward ()) { - if (c_it.data ()->character () != '\0') { - score = c_it.data ()->match_score (sample, this); - if (score < best_score) { - best_cluster = c_it.data (); - best_score = score; - best_char = c_it.data ()->character (); - } - } - } - - if (best_score > tessedit_cluster_t1) { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample rejected (score %f)\n", best_score); - if (tessedit_demo_adaption) - tprintf ("Sample rejected (score %f)\n", best_score); - #endif - word->reject_map[i].setrej_mm_reject (); - if (word->best_choice->unichar_string()[i] == 'r') - word->reject_map[i + 1].setrej_mm_reject (); - } - else { - if (word->best_choice->unichar_string()[i] == best_char) { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample accepted (score %f)\n", - best_score); - if (tessedit_demo_adaption) - tprintf ("Sample accepted (score %f)\n", - best_score); - #endif - word->reject_map[i].setrej_mm_accept (); - if (word->best_choice->unichar_string()[i] == 'r') - word->reject_map[i + 1].setrej_mm_accept (); - } - else { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample rejected (char %c, score %f)\n", - best_char, best_score); - if (tessedit_demo_adaption) - tprintf ("Sample rejected (char %c, score %f)\n", - best_char, best_score); - #endif - word->reject_map[i].setrej_mm_reject (); - if (word->best_choice->unichar_string()[i] == 'r') - word->reject_map[i + 1].setrej_mm_reject (); - } - } - - if (tessedit_demo_adaption) { - if (strcmp (imagebasename.string (), - tessedit_demo_file.string ()) != 0 - || word_number == tessedit_demo_word1 - || word_number == tessedit_demo_word2) { -#ifndef GRAPHICS_DISABLED - demo_win = - display_clip_image(©_outword, - page_image, - pixrow_list, - pix_box); -#endif - demo_word = word_number; - best_cluster->match_score (sample, this); - demo_word = 0; - } - } - if (word->best_choice->unichar_string()[i] == 'r') - i++; // Skip next character - } - } - delete[]imlines; // Free array of imlines - delete pixrow_list; - } - } -} - - - -void Tesseract::adapt_to_good_samples(WERD_RES *word, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting) { - PBLOB_LIST *blobs = word->outword->blob_list (); - PBLOB_IT blob_it(blobs); - inT16 i; - CHAR_SAMPLE *sample; - CHAR_SAMPLES_IT c_it = char_clusters; - CHAR_SAMPLE_IT cw_it = chars_waiting; - float score; - float best_score; - char best_char; - CHAR_SAMPLES *best_cluster; - PIXROW_LIST *pixrow_list; - PIXROW_IT pixrow_it; - IMAGELINE *imlines; // lines of the image - TBOX pix_box; // box of imlines - // extent - WERD copy_outword; // copy to denorm - TBOX b_box; - PBLOB_IT copy_blob_it; - PIXROW *pixrow = NULL; - - static inT32 word_number = 0; - -#ifndef GRAPHICS_DISABLED - ScrollView* demo_win = NULL; -#endif - - inT32 resolution = page_image.get_res (); - - word_number++; - - if (tessedit_test_cluster_input) - return; - - if (word->word->bounding_box ().height () > resolution / 3) - return; - - if (char_clusters->length () == 0) { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("No clusters to use for adaption\n"); - #endif - return; - } - - if (!cw_it.empty ()) { - complete_clustering(char_clusters, chars_waiting); - print_em_stats(char_clusters, chars_waiting); - } - - if ((!word_adaptable (word, tessedit_cluster_adaption_mode) - && word->reject_map.reject_count () != 0) || tessedit_mm_use_rejmap) { - if (tessedit_cluster_debug) { - tprintf ("\nChecking: \"%s\" MAP ", - word->best_choice->unichar_string().string ()); - word->reject_map.print (debug_fp); - tprintf ("\n"); - } - - copy_outword = *(word->outword); - copy_outword.baseline_denormalise (&word->denorm); - copy_blob_it.set_to_list (copy_outword.blob_list ()); - char_clip_word(©_outword, page_image, pixrow_list, imlines, pix_box); - pixrow_it.set_to_list (pixrow_list); - pixrow_it.move_to_first (); - - // For debugging only - b_box = copy_outword.bounding_box (); - pixrow = pixrow_it.data (); - - blob_it.move_to_first (); - copy_blob_it.move_to_first (); - for (i = 0; - word->best_choice->unichar_string()[i] != '\0'; - i++, pixrow_it.forward (), blob_it.forward (), - copy_blob_it.forward ()) { - if (word->reject_map[i].recoverable () - || (tessedit_mm_all_rejects && word->reject_map[i].rejected ())) { - TBOX copy_box = copy_blob_it.data ()->bounding_box (); - - if (tessedit_cluster_debug) - tprintf ("Sample %c to check found in %s, index %d\n", - word->best_choice->unichar_string()[i], - word->best_choice->unichar_string().string (), i); - - if (tessedit_demo_adaption) - tprintf ("Sample %c to check found in %s (%d), index %d\n", - word->best_choice->unichar_string()[i], - word->best_choice->unichar_string().string (), - word_number, i); - - sample = clip_sample (pixrow_it.data (), - imlines, - pix_box, - copy_outword.flag (W_INVERSE), - word->best_choice->unichar_string()[i]); - - if (sample == NULL) { //Clip failed - tprintf ("Unable to clip sample from %s, index %d\n", - word->best_choice->unichar_string().string (), i); - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample rejected (no sample)\n"); - #endif - word->reject_map[i].setrej_mm_reject (); - - continue; - } - - best_score = MAX_INT32; - best_char = '\0'; - best_cluster = NULL; - - for (c_it.mark_cycle_pt (); - !c_it.cycled_list (); c_it.forward ()) { - if (c_it.data ()->character () != '\0') { - score = c_it.data ()->match_score (sample, this); - if (score < best_score) { - best_cluster = c_it.data (); - best_score = score; - best_char = c_it.data ()->character (); - } - } - } - - if (best_score > tessedit_cluster_t1) { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample rejected (score %f)\n", best_score); - if (tessedit_demo_adaption) - tprintf ("Sample rejected (score %f)\n", best_score); - #endif - word->reject_map[i].setrej_mm_reject (); - } - else { - if (word->best_choice->unichar_string()[i] == best_char) { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample accepted (score %f)\n", best_score); - if (tessedit_demo_adaption) - tprintf ("Sample accepted (score %f)\n", best_score); - #endif - if (tessedit_test_adaption) - word->reject_map[i].setrej_minimal_rej_accept (); - else - word->reject_map[i].setrej_mm_accept (); - } - else { - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - tprintf ("Sample rejected (char %c, score %f)\n", - best_char, best_score); - if (tessedit_demo_adaption) - tprintf ("Sample rejected (char %c, score %f)\n", - best_char, best_score); - #endif - word->reject_map[i].setrej_mm_reject (); - } - } - - if (tessedit_demo_adaption) { - if (strcmp (imagebasename.string (), - tessedit_demo_file.string ()) != 0 - || word_number == tessedit_demo_word1 - || word_number == tessedit_demo_word2) { -#ifndef GRAPHICS_DISABLED - demo_win = - display_clip_image(©_outword, - page_image, - pixrow_list, - pix_box); -#endif - demo_word = word_number; - best_cluster->match_score (sample, this); - demo_word = 0; - } - } - } - } - delete[]imlines; // Free array of imlines - delete pixrow_list; - - if (tessedit_cluster_debug) { - tprintf ("\nFinal: \"%s\" MAP ", - word->best_choice->unichar_string().string ()); - word->reject_map.print (debug_fp); - tprintf ("\n"); - } - } -} -} // namespace tesseract - - -void print_em_stats(CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting) { - CHAR_SAMPLES_IT c_it = char_clusters; - - if (!tessedit_cluster_debug) - return; - #ifndef SECURE_NAMES - tprintf ("There are %d clusters and %d samples waiting\n", - char_clusters->length (), chars_waiting->length ()); - - for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) - c_it.data ()->print (debug_fp); - #endif - tprintf ("\n"); -} - - -CHAR_SAMPLE *clip_sample( //lines of the image - PIXROW *pixrow, - IMAGELINE *imlines, - TBOX pix_box, //box of imlines extent - BOOL8 white_on_black, - char c) { - TBOX b_box = pixrow->bounding_box (); - float baseline_pos = 0; - inT32 resolution = page_image.get_res (); - - if (!b_box.null_box ()) { - ASSERT_HOST (b_box.width () < page_image.get_xsize () && - b_box.height () < page_image.get_ysize ()); - - if (b_box.width () > resolution || b_box.height () > resolution) { - tprintf ("clip sample: sample too big (%d x %d)\n", - b_box.width (), b_box.height ()); - - return NULL; - } - - IMAGE *image = new (IMAGE); - if (image->create (b_box.width (), b_box.height (), 1) == -1) { - tprintf ("clip sample: create image failed (%d x %d)\n", - b_box.width (), b_box.height ()); - - delete image; - return NULL; - } - - if (!white_on_black) - invert_image(image); // Set background to white - pixrow->char_clip_image (imlines, pix_box, NULL, *image, baseline_pos); - if (white_on_black) - invert_image(image); //invert white on black for scaling &NN - return new CHAR_SAMPLE (image, c); - } - else - return NULL; -} - - -#ifndef GRAPHICS_DISABLED -void display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters) { - inT16 proto_number = 0; - CHAR_SAMPLES_IT c_it = char_clusters; - char title[WINDOWNAMESIZE]; - - for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) { - proto_number++; - - #ifndef SECURE_NAMES - tprintf ("Displaying proto number %d\n", proto_number); - #endif - - if (c_it.data ()->prototype () != NULL) { - sprintf (title, "Proto - %d", proto_number); - display_image (c_it.data ()->prototype ()->make_image (), - title, (proto_number - 1) * 400, 0, FALSE); - } - } -} -#endif - -// ********************************************************************* -// Simplistic routines to test the effect of rejecting ems and fullstops -// ********************************************************************* - -void reject_all_ems(WERD_RES *word) { - inT16 i; - - for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { - if (word->best_choice->unichar_string()[i] == 'm') - // reject all ems - word->reject_map[i].setrej_mm_reject (); - } -} - - -void reject_all_fullstops(WERD_RES *word) { - inT16 i; - - for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { - if (word->best_choice->unichar_string()[i] == '.') - // reject all fullstops - word->reject_map[i].setrej_mm_reject (); - } -} - -namespace tesseract { -void Tesseract::reject_suspect_ems(WERD_RES *word) { - inT16 i; - - if (!word_adaptable (word, tessedit_cluster_adaption_mode)) - for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { - if (word->best_choice->unichar_string()[i] == 'm' && suspect_em (word, i)) - // reject all ems - word->reject_map[i].setrej_mm_reject (); - } -} } // namespace tesseract - - -void reject_suspect_fullstops(WERD_RES *word) { - inT16 i; - - for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { - if (word->best_choice->unichar_string()[i] == '.' - && suspect_fullstop (word, i)) - // reject all commas - word->reject_map[i].setrej_mm_reject (); - } -} - - -BOOL8 suspect_em(WERD_RES *word, inT16 index) { - PBLOB_LIST *blobs = word->outword->blob_list (); - PBLOB_IT blob_it(blobs); - inT16 j; - - for (j = 0; j < index; j++) - blob_it.forward (); - - return (blob_it.data ()->out_list ()->length () != 1); -} - - -BOOL8 suspect_fullstop(WERD_RES *word, inT16 i) { - float aspect_ratio; - PBLOB_LIST *blobs = word->outword->blob_list (); - PBLOB_IT blob_it(blobs); - inT16 j; - TBOX box; - inT16 width; - inT16 height; - - for (j = 0; j < i; j++) - blob_it.forward (); - - box = blob_it.data ()->bounding_box (); - - width = box.width (); - height = box.height (); - - aspect_ratio = ((width > height) ? ((float) width) / height : - ((float) height) / width); - - return (aspect_ratio > tessed_fullstop_aspect_ratio); -} diff --git a/ccmain/adaptions.h b/ccmain/adaptions.h deleted file mode 100644 index 7033045511..0000000000 --- a/ccmain/adaptions.h +++ /dev/null @@ -1,89 +0,0 @@ -/********************************************************************** - * File: adaptions.h (Formerly adaptions.h) - * Description: Functions used to adapt to blobs already confidently - * identified - * Author: Chris Newton - * Created: Thu Oct 7 10:17:28 BST 1993 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef ADAPTIONS_H -#define ADAPTIONS_H - -#include "charsample.h" -#include "charcut.h" -#include "notdll.h" - -extern BOOL_VAR_H (tessedit_reject_ems, FALSE, "Reject all m's"); -extern BOOL_VAR_H (tessedit_reject_suspect_ems, FALSE, "Reject suspect m's"); -extern double_VAR_H (tessedit_cluster_t1, 0.20, -"t1 threshold for clustering samples"); -extern double_VAR_H (tessedit_cluster_t2, 0.40, -"t2 threshold for clustering samples"); -extern double_VAR_H (tessedit_cluster_t3, 0.12, -"Extra threshold for clustering samples, only keep a new sample if best score greater than this value"); -extern double_VAR_H (tessedit_cluster_accept_fraction, 0.80, -"Largest fraction of characters in cluster for it to be used for adaption"); -extern INT_VAR_H (tessedit_cluster_min_size, 3, -"Smallest number of samples in a cluster for it to be used for adaption"); -extern BOOL_VAR_H (tessedit_cluster_debug, FALSE, -"Generate and print debug information for adaption by clustering"); -extern BOOL_VAR_H (tessedit_use_best_sample, FALSE, -"Use best sample from cluster when adapting"); -extern BOOL_VAR_H (tessedit_test_cluster_input, FALSE, -"Set reject map to enable cluster input to be measured"); -extern BOOL_VAR_H (tessedit_matrix_match, TRUE, "Use matrix matcher"); -extern BOOL_VAR_H (tessedit_old_matrix_match, FALSE, "Use matrix matcher"); -extern BOOL_VAR_H (tessedit_mm_use_non_adaption_set, FALSE, -"Don't try to adapt to characters on this list"); -extern STRING_VAR_H (tessedit_non_adaption_set, ",.;:'~@*", -"Characters to be avoided when adapting"); -extern BOOL_VAR_H (tessedit_mm_adapt_using_prototypes, TRUE, -"Use prototypes when adapting"); -extern BOOL_VAR_H (tessedit_mm_use_prototypes, TRUE, -"Use prototypes as clusters are built"); -extern BOOL_VAR_H (tessedit_mm_use_rejmap, FALSE, -"Adapt to characters using reject map"); -extern BOOL_VAR_H (tessedit_mm_all_rejects, FALSE, -"Adapt to all characters using, matrix matcher"); -extern BOOL_VAR_H (tessedit_mm_only_match_same_char, FALSE, -"Only match samples against clusters for the same character"); -extern BOOL_VAR_H (tessedit_process_rns, FALSE, "Handle m - rn ambigs"); -extern BOOL_VAR_H (tessedit_demo_adaption, FALSE, -"Display cut images and matrix match for demo purposes"); -extern INT_VAR_H (tessedit_demo_word1, 62, -"Word number of first word to display"); -extern INT_VAR_H (tessedit_demo_word2, 64, -"Word number of second word to display"); -extern STRING_VAR_H (tessedit_demo_file, "academe", -"Name of document containing demo words"); -extern BOOL_VAR_H(tessedit_adapt_to_char_fragments, TRUE, - "Adapt to words that contain " - " a character composed form fragments"); - -void print_em_stats(CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting); - //lines of the image -CHAR_SAMPLE *clip_sample(PIXROW *pixrow, - IMAGELINE *imlines, - TBOX pix_box, //box of imlines extent - BOOL8 white_on_black, - char c); -void display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters); -void reject_all_ems(WERD_RES *word); -void reject_all_fullstops(WERD_RES *word); -void reject_suspect_fullstops(WERD_RES *word); -BOOL8 suspect_em(WERD_RES *word, inT16 index); -BOOL8 suspect_fullstop(WERD_RES *word, inT16 i); -#endif diff --git a/ccmain/ambigsrecog.cpp b/ccmain/ambigsrecog.cpp deleted file mode 100644 index 9ffa2555d6..0000000000 --- a/ccmain/ambigsrecog.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: genericvector.h -// Description: Functions for producing classifications -// for the input to ambigstraining. -// Author: Daria Antonova -// Created: Mon Jun 23 11:26:43 PDT 2008 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#include "ambigs.h" - -#include "applybox.h" -#include "boxread.h" -#include "control.h" -#include "permute.h" -#include "ratngs.h" -#include "reject.h" -#include "stopper.h" -#include "tesseractclass.h" - -namespace tesseract { - -// Sets flags necessary for ambigs training mode. -// Opens and returns the pointer to the output file. -FILE *Tesseract::init_ambigs_training(const STRING &fname) { - permute_only_top = 1; // use only top choice permuter - tessedit_tess_adaption_mode.set_value(0); // turn off adaption - tessedit_ok_mode.set_value(0); // turn off context checking - tessedit_enable_doc_dict.set_value(0); // turn off document dictionary - save_best_choices.set_value(1); // save individual char choices - stopper_no_acceptable_choices.set_value(1); // explore all segmentations - save_raw_choices.set_value(1); // save raw choices - - // Open ambigs output file. - STRING output_fname = fname; - const char *lastdot = strrchr(output_fname.string(), '.'); - if (lastdot != NULL) { - output_fname[lastdot - output_fname.string()] = '\0'; - } - output_fname += ".txt"; - FILE *output_file; - if (!(output_file = fopen(output_fname.string(), "a+"))) { - CANTOPENFILE.error("ambigs_training", EXIT, - "Can't open box file %s\n", output_fname.string()); - } - return output_file; -} - -// This function takes tif/box pair of files and runs recognition on the image, -// while making sure that the word bounds that tesseract identified roughly -// match to those specified by the input box file. For each word (ngram in a -// single bounding box from the input box file) it outputs the ocred result, -// the correct label, rating and certainty. -void Tesseract::ambigs_training_segmented(const STRING &fname, - PAGE_RES *page_res, - volatile ETEXT_DESC *monitor, - FILE *output_file) { - STRING box_fname = fname; - const char *lastdot = strrchr(box_fname.string(), '.'); - if (lastdot != NULL) { - box_fname[lastdot - box_fname.string()] = '\0'; - } - box_fname += ".box"; - FILE *box_file; - if (!(box_file = fopen(box_fname.string(), "r"))) { - CANTOPENFILE.error("ambigs_training", EXIT, - "Can't open box file %s\n", box_fname.string()); - } - - static PAGE_RES_IT page_res_it; - page_res_it.page_res = page_res; - page_res_it.restart_page(); - int x_min, y_min, x_max, y_max; - char label[UNICHAR_LEN * 10]; - - // Process all the words on this page. - while (page_res_it.word() != NULL && - read_next_box(applybox_page, box_file, label, - &x_min, &y_min, &x_max, &y_max)) { - // Init bounding box of the current word bounding box and from box file. - TBOX box = TBOX(ICOORD(x_min, y_min), ICOORD(x_max, y_max)); - TBOX word_box(page_res_it.word()->word->bounding_box()); - bool one_word = true; - // Check whether the bounding box of the next word overlaps with the - // current box from box file. - while (page_res_it.next_word() != NULL && - box.x_overlap(page_res_it.next_word()->word->bounding_box())) { - word_box = word_box.bounding_union( - page_res_it.next_word()->word->bounding_box()); - page_res_it.forward(); - one_word = false; - } - if (!word_box.major_overlap(box)) { - if (!word_box.x_overlap(box)) { - // We must be looking at the word that belongs in the "next" bounding - // box from the box file. The ngram that was supposed to appear in - // the current box read from the box file must have been dropped by - // tesseract as noise. - tprintf("Word %s was dropped as noise.\n", label); - continue; // stay on this blob, but read next box from box file - } else { - tprintf("Error: Insufficient overlap for word box" - " and box from file for %s\n", label); - word_box.print(); - box.print(); - exit(1); - } - } - // Skip recognizing the ngram if tesseract is sure it's not - // one word, otherwise run one recognition pass on this word. - if (!one_word) { - tprintf("Tesseract segmented %s as multiple words\n", label); - } else { - ambigs_classify_and_output(&page_res_it, label, output_file); - } - page_res_it.forward(); - } - fclose(box_file); -} - -// Run classify_word_pass1() on the current word. Output tesseract's raw choice -// as a result of the classification. For words labeled with a single unichar -// also output all alternatives from blob_choices of the best choice. -void Tesseract::ambigs_classify_and_output(PAGE_RES_IT *page_res_it, - const char *label, - FILE *output_file) { - int offset; - // Classify word. - classify_word_pass1(page_res_it->word(), page_res_it->row()->row, - page_res_it->block()->block, - FALSE, NULL, NULL); - WERD_CHOICE *best_choice = page_res_it->word()->best_choice; - ASSERT_HOST(best_choice != NULL); - ASSERT_HOST(best_choice->blob_choices() != NULL); - - // Compute the number of unichars in the label. - int label_num_unichars = 0; - int step = 1; // should be non-zero on the first iteration - for (offset = 0; label[offset] != '\0' && step > 0; - step = getDict().getUnicharset().step(label + offset), - offset += step, ++label_num_unichars); - if (step == 0) { - tprintf("Not outputting illegal unichar %s\n", label); - return; - } - - // Output all classifier choices for the unigrams (1-1 classifications). - if (label_num_unichars == 1 && best_choice->blob_choices()->length() == 1) { - BLOB_CHOICE_LIST_C_IT outer_blob_choice_it; - outer_blob_choice_it.set_to_list(best_choice->blob_choices()); - BLOB_CHOICE_IT blob_choice_it; - blob_choice_it.set_to_list(outer_blob_choice_it.data()); - for (blob_choice_it.mark_cycle_pt(); - !blob_choice_it.cycled_list(); - blob_choice_it.forward()) { - BLOB_CHOICE *blob_choice = blob_choice_it.data(); - if (blob_choice->unichar_id() != INVALID_UNICHAR_ID) { - fprintf(output_file, "%s\t%s\t%.4f\t%.4f\n", - unicharset.id_to_unichar(blob_choice->unichar_id()), - label, blob_choice->rating(), blob_choice->certainty()); - } - } - } - // Output the raw choice for succesful non 1-1 classifications. - getDict().PrintAmbigAlternatives(output_file, label, label_num_unichars); -} - -} // namespace tesseract diff --git a/ccmain/applybox.cpp b/ccmain/applybox.cpp index b83ab5bdbc..8aa7a3c915 100644 --- a/ccmain/applybox.cpp +++ b/ccmain/applybox.cpp @@ -22,1084 +22,675 @@ #pragma warning(disable:4244) // Conversion warnings #endif -#include "applybox.h" #include #include #ifdef __UNIX__ #include #include #endif +#include "allheaders.h" #include "boxread.h" -#include "control.h" -#include "genblob.h" -#include "globals.h" -#include "fixxht.h" -#include "mainblk.h" -#include "matchdefs.h" -#include "secname.h" -#include "tessbox.h" +#include "chopper.h" +#include "pageres.h" #include "unichar.h" #include "unicharset.h" -#include "matchdefs.h" #include "tesseractclass.h" -#define SECURE_NAMES -#ifndef SECURE_NAMES -#include "wordstats.h" -#endif - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#ifdef HAVE_LIBLEPT -// Include leptonica library only if autoconf (or makefile etc) tell us to. -#include "allheaders.h" -#endif - -#define EXTERN -EXTERN BOOL_VAR (applybox_rebalance, TRUE, "Drop dead"); -EXTERN INT_VAR (applybox_debug, 5, "Debug level"); -EXTERN INT_VAR (applybox_page, 0, "Page number to apply boxes from"); -EXTERN STRING_VAR (applybox_test_exclusions, "", - "Chars ignored for testing"); -EXTERN double_VAR (applybox_error_band, 0.15, "Err band as fract of xht"); - -EXTERN STRING_VAR(exposure_pattern, ".exp", - "Exposure value follows this pattern in the image" - " filename. The name of the image files are expected" - " to be in the form [lang].[fontname].exp[num].tif"); - -EXTERN BOOL_VAR(learn_chars_and_char_frags_mode, FALSE, - "Learn both character fragments (as is done in the" - " special low exposure mode) as well as unfragmented" - " characters."); - -extern IMAGE page_image; - -// The unicharset used during box training -static UNICHARSET unicharset_boxes; - -static void clear_any_old_text(BLOCK_LIST *block_list); - -// Register uch with unicharset_boxes. -static UNICHAR_ID register_char(const char *uch); - -static BOOL8 read_next_box(int page, - FILE* box_file, - TBOX *box, - UNICHAR_ID *uch_id); - +// Max number of blobs to classify together in FindSegmentation. +const int kMaxGroupSize = 4; /************************************************************************* - * The code re-assigns outlines to form words each with ONE labelled blob. - * Noise is left in UNLABELLED words. The chars on the page are checked crudely - * for sensible position relative to baseline and xht. Failed boxes are - * compensated for by duplicating other believable instances of the character. - * * The box file is assumed to contain box definitions, one per line, of the - * following format: - * ... arbitrary trailing fields unused - * - * The approach taken is to search the WHOLE page for stuff overlapping each box. - * - This is not too inefficient and is SAFE. - * - We can detect overlapping blobs as we will be attempting to put a blob - * from a LABELLED word into the current word. - * - When all the boxes have been processed we can detect any stuff which is - * being ignored - it is the unlabelled words left on the page. - * - * A box should only overlap one row. + * following format for blob-level boxes: + * + * and for word/line-level boxes: + * WordStr # + * NOTES: + * The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT. * - * A warning is given if the box is on the same row as the previous box, but NOT - * on the same row as the previous blob. + * is 0-based, and the page number is used for multipage input (tiff). * - * Any OUTLINE which overlaps the box is put into the new word. - * - * ascender chars must ascend above xht significantly - * xht chars must not rise above row xht significantly - * bl chars must not descend below baseline significantly - * descender chars must descend below baseline significantly - * - * ?? Certain chars are DROPPED - to limit the training data. + * In the blob-level form, each line represents a recognizable unit, which may + * be several UTF-8 bytes, but there is a bounding box around each recognizable + * unit, and no classifier is needed to train in this mode (bootstrapping.) * + * In the word/line-level form, the line begins with the literal "WordStr", and + * the bounding box bounds either a whole line or a whole word. The recognizable + * units in the word/line are listed after the # at the end of the line and + * are space delimited, ignoring any original spaces on the line. + * Eg. + * word -> #w o r d + * multi word line -> #m u l t i w o r d l i n e + * The recognizable units must be space-delimited in order to allow multiple + * unicodes to be used for a single recognizable unit, eg Hindi. + * In this mode, the classifier must have been pre-trained with the desired + * character set, or it will not be able to find the character segmentations. *************************************************************************/ -namespace tesseract { -void Tesseract::apply_boxes(const STRING& fname, - BLOCK_LIST *block_list //real blocks - ) { - inT16 boxfile_lineno = 0; - inT16 boxfile_charno = 0; - TBOX box; //boxfile box - UNICHAR_ID uch_id; //correct ch from boxfile - ROW *row; - ROW *prev_row = NULL; - inT16 prev_box_right = MAX_INT16; - inT16 block_id; - inT16 row_id; - inT16 box_count = 0; - inT16 box_failures = 0; - inT16 labels_ok; - inT16 rows_ok; - inT16 bad_blobs; - inT16 *tgt_char_counts = NULL; // No. of box samples - inT16 i; - inT16 rebalance_count = 0; - UNICHAR_ID min_uch_id = INVALID_UNICHAR_ID; - inT16 min_samples; - inT16 final_labelled_blob_count; - bool low_exposure = false; - - // Clean the unichar set - unicharset_boxes.clear(); - // Space character needed to represent NIL classification - unicharset_boxes.unichar_insert(" "); - - // Figure out whether this image file's exposure is less than 1, in which - // case when learning we will only pay attention to character fragments. - const char *ptr = strstr(imagefile.string(), exposure_pattern.string()); - if (ptr != NULL && - strtol(ptr += strlen(exposure_pattern.string()), NULL, 10) < 0) { - low_exposure = true; - } - - FILE* box_file; - STRING filename = fname; - const char *lastdot; //of name - - lastdot = strrchr (filename.string (), '.'); - if (lastdot != NULL) - filename[lastdot - filename.string()] = '\0'; - - filename += ".box"; - if (!(box_file = fopen (filename.string(), "r"))) { - CANTOPENFILE.error ("read_next_box", EXIT, - "Cant open box file %s %d", - filename.string(), errno); - } - tgt_char_counts = new inT16[MAX_NUM_CLASSES]; - for (i = 0; i < MAX_NUM_CLASSES; i++) - tgt_char_counts[i] = 0; - - clear_any_old_text(block_list); - while (read_next_box(applybox_page, box_file, &box, &uch_id)) { - box_count++; - if (!low_exposure || learn_chars_and_char_frags_mode) { - tgt_char_counts[uch_id]++; - } - row = find_row_of_box (block_list, box, block_id, row_id); - if (box.left () < prev_box_right) { - boxfile_lineno++; - boxfile_charno = 1; - } - else - boxfile_charno++; +namespace tesseract { - if (row == NULL) { - box_failures++; - report_failed_box (boxfile_lineno, boxfile_charno, box, - unicharset_boxes.id_to_unichar(uch_id), - "FAILURE! box overlaps no blobs or blobs in multiple rows"); - } - else { - if ((box.left () >= prev_box_right) && (row != prev_row)) - report_failed_box (boxfile_lineno, boxfile_charno, box, - unicharset_boxes.id_to_unichar(uch_id), - "WARNING! false row break"); - box_failures += resegment_box (row, box, uch_id, block_id, row_id, - boxfile_lineno, boxfile_charno, tgt_char_counts, low_exposure, true); - prev_row = row; +static void clear_any_old_text(BLOCK_LIST *block_list) { + BLOCK_IT block_it(block_list); + for (block_it.mark_cycle_pt(); + !block_it.cycled_list(); block_it.forward()) { + ROW_IT row_it(block_it.data()->row_list()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + WERD_IT word_it(row_it.data()->word_list()); + for (word_it.mark_cycle_pt(); + !word_it.cycled_list(); word_it.forward()) { + word_it.data()->set_text(""); + } } - prev_box_right = box.right (); } - tidy_up(block_list, - labels_ok, - rows_ok, - bad_blobs, - tgt_char_counts, - rebalance_count, - &min_uch_id, - min_samples, - final_labelled_blob_count, - low_exposure, - true); - tprintf ("APPLY_BOXES:\n"); - tprintf (" Boxes read from boxfile: %6d\n", box_count); - tprintf (" Initially labelled blobs: %6d in %d rows\n", - labels_ok, rows_ok); - tprintf (" Box failures detected: %6d\n", box_failures); - tprintf (" Duped blobs for rebalance:%6d\n", rebalance_count); - tprintf (" \"%s\" has fewest samples:%6d\n", - unicharset_boxes.id_to_unichar(min_uch_id), min_samples); - tprintf (" Total unlabelled words: %6d\n", - bad_blobs); - tprintf (" Final labelled words: %6d\n", - final_labelled_blob_count); - - // Clean up. - delete[] tgt_char_counts; } -int Tesseract::Boxes2BlockList(int box_cnt, TBOX *boxes, - BLOCK_LIST *block_list, - bool right2left) { - inT16 boxfile_lineno = 0; - inT16 boxfile_charno = 0; - TBOX box; - ROW *row; - ROW *prev_row = NULL; - inT16 prev_box_right = MAX_INT16; - inT16 prev_box_left = 0; - inT16 block_id; - inT16 row_id; - inT16 box_failures = 0; - inT16 labels_ok; - inT16 rows_ok; - inT16 bad_blobs; - inT16 rebalance_count = 0; - UNICHAR_ID min_uch_id; - inT16 min_samples; - inT16 final_labelled_blob_count; - +// Applies the box file based on the image name fname, and resegments +// the words in the block_list (page), with: +// blob-mode: one blob per line in the box file, words as input. +// word/line-mode: one blob per space-delimited unit after the #, and one word +// per line in the box file. (See comment above for box file format.) +// If find_segmentation is true, (word/line mode) then the classifier is used +// to re-segment words/lines to match the space-delimited truth string for +// each box. In this case, the input box may be for a word or even a whole +// text line, and the output words will contain multiple blobs corresponding +// to the space-delimited input string. +// With find_segmentation false, no classifier is needed, but the chopper +// can still be used to correctly segment touching characters with the help +// of the input boxes. +// In the returned PAGE_RES, the WERD_RES are setup as they would be returned +// from normal classification, ie. with a word, chopped_word, rebuild_word, +// seam_array, denorm, box_word, and best_state, but NO best_choice or +// raw_choice, as they would require a UNICHARSET, which we aim to avoid. +// Instead, the correct_text member of WERD_RES is set, and this may be later +// converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords +// is not required before calling ApplyBoxTraining. +PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname, + bool find_segmentation, + BLOCK_LIST *block_list) { + // In word mode, we use the boxes to make a word for each box, but + // in blob mode we use the existing words and maximally chop them first. + PAGE_RES* page_res = find_segmentation ? NULL : SetupApplyBoxes(block_list); + int box_count = 0; + int box_failures = 0; + + FILE* box_file = OpenBoxFile(fname); clear_any_old_text(block_list); - for (int box_idx = 0; box_idx < box_cnt; box_idx++) { - box = boxes[box_idx]; - - row = find_row_of_box(block_list, box, block_id, row_id); - // check for a new row - if ((right2left && box.right () > prev_box_left) || - (!right2left && box.left () < prev_box_right)) { - boxfile_lineno++; - boxfile_charno = 1; - } - else { - boxfile_charno++; - } - - if (row == NULL) { - box_failures++; + TBOX prev_box, box, next_box; + bool found_box = false; + char text[kBoxReadBufSize]; + do { + prev_box = box; + box = next_box; + int line_number = 0; // Line number of the box file. + int x_min; + int y_min; + int x_max; + int y_max; + char next_text[kBoxReadBufSize]; + // Keep a look-ahead box, so we can pass the next box into the resegment + // functions. + found_box = read_next_box(applybox_page, &line_number, box_file, next_text, + &x_min, &y_min, &x_max, &y_max); + if (found_box) { + next_box = TBOX(ICOORD(x_min, y_min), ICOORD (x_max, y_max)); + ++box_count; + } else { + next_box = TBOX(); + next_text[0] = '\0'; } - else { - box_failures += resegment_box(row, box, 0, block_id, row_id, - boxfile_lineno, boxfile_charno, - NULL, false, false); - prev_row = row; + if (!box.null_box()) { + bool foundit = false; + if (page_res != NULL) + foundit = ResegmentCharBox(page_res, box, next_box, text); + else + foundit = ResegmentWordBox(block_list, box, next_box, text); + if (!foundit) { + box_failures++; + ReportFailedBox(box_count, box, text, + "FAILURE! Couldn't find a matching blob"); + } } - prev_box_right = box.right (); - prev_box_left = box.left (); + strcpy(text, next_text); + } while (found_box); + if (page_res == NULL) { + // In word/line mode, we now maximally chop all the words and resegment + // them with the classifier. + page_res = SetupApplyBoxes(block_list); + ReSegmentByClassification(page_res); } - - tidy_up(block_list, labels_ok, rows_ok, bad_blobs, NULL, - rebalance_count, &min_uch_id, min_samples, final_labelled_blob_count, - false, false); - - return box_failures; + if (applybox_debug > 0) { + tprintf("APPLY_BOXES:\n"); + tprintf(" Boxes read from boxfile: %6d\n", box_count); + tprintf(" Boxes failed resegmentation: %6d\n", box_failures); + } + TidyUp(page_res); + return page_res; } -} // namespace tesseract - - -static -void clear_any_old_text( //remove correct text - BLOCK_LIST *block_list //real blocks - ) { - BLOCK_IT block_it(block_list); - ROW_IT row_it; - WERD_IT word_it; - - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - row_it.set_to_list (block_it.data ()->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - word_it.set_to_list (row_it.data ()->word_list ()); - for (word_it.mark_cycle_pt (); - !word_it.cycled_list (); word_it.forward ()) { - word_it.data ()->set_text (""); +// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: +// All fuzzy spaces are removed, and all the words are maximally chopped. +PAGE_RES* Tesseract::SetupApplyBoxes(BLOCK_LIST *block_list) { + // Strip all fuzzy space markers to simplify the PAGE_RES. + BLOCK_IT b_it(block_list); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOCK* block = b_it.data(); + ROW_IT r_it(block->row_list()); + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) { + ROW* row = r_it.data(); + WERD_IT w_it(row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + if (word->cblob_list()->empty()) { + delete w_it.extract(); + } else { + word->set_flag(W_FUZZY_SP, false); + word->set_flag(W_FUZZY_NON, false); + } } } } + PAGE_RES* page_res = new PAGE_RES(block_list, NULL); + PAGE_RES_IT pr_it(page_res); + WERD_RES* word_res; + while ((word_res = pr_it.word()) != NULL) { + MaximallyChopWord(pr_it.block()->block, pr_it.row()->row, word_res); + pr_it.forward(); + } + return page_res; } -static -UNICHAR_ID register_char(const char *uch) { - if (!unicharset_boxes.contains_unichar(uch)) { - unicharset_boxes.unichar_insert(uch); - if (unicharset_boxes.size() > MAX_NUM_CLASSES) { - tprintf("Error: Size of unicharset of boxes is " - "greater than MAX_NUM_CLASSES (%d)\n", MAX_NUM_CLASSES); - exit(1); - } +// Helper to make a WERD_CHOICE from the BLOB_CHOICE_LIST_VECTOR using only +// the top choices. Avoids problems with very long words. +static void MakeWordChoice(const BLOB_CHOICE_LIST_VECTOR& char_choices, + const UNICHARSET& unicharset, + WERD_CHOICE* word_choice) { + word_choice->make_bad(); + for (int i = 0; i < char_choices.size(); ++i) { + BLOB_CHOICE_IT it(char_choices[i]); + BLOB_CHOICE* bc = it.data(); + word_choice->append_unichar_id(bc->unichar_id(), 1, + bc->rating(), bc->certainty()); } - return unicharset_boxes.unichar_to_id(uch); + word_choice->populate_unichars(unicharset); } -static -BOOL8 read_next_box(int page, - FILE* box_file, - TBOX *box, - UNICHAR_ID *uch_id) { - int x_min; - int y_min; - int x_max; - int y_max; - char uch[kBoxReadBufSize]; - - if (read_next_box(page, box_file, uch, &x_min, &y_min, &x_max, &y_max)) { - *uch_id = register_char(uch); - *box = TBOX (ICOORD (x_min, y_min), ICOORD (x_max, y_max)); - return TRUE; // read a box ok - } else { - return FALSE; // EOF +// Tests the chopper by exhaustively running chop_one_blob. +// The word_res will contain filled chopped_word, seam_array, denorm, +// box_word and best_state for the maximally chopped word. +void Tesseract::MaximallyChopWord(BLOCK* block, ROW* row, WERD_RES* word_res) { + if (!word_res->SetupForRecognition(unicharset, false, row, block)) + return; + if (chop_debug) { + tprintf("Maximally chopping word at:"); + word_res->word->bounding_box().print(); + } + blob_match_table.init_match_table(); + BLOB_CHOICE_LIST *match_result; + BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR(); + set_denorm(&word_res->denorm); + ASSERT_HOST(word_res->chopped_word->blobs != NULL); + float rating = static_cast(MAX_INT8); + for (TBLOB* blob = word_res->chopped_word->blobs; blob != NULL; + blob = blob->next) { + // The rating and certainty are not quite arbitrary. Since + // select_blob_to_chop uses the worst certainty to choose, they all have + // to be different, so starting with MAX_INT8, subtract 1/8 for each blob + // in here, and then divide by e each time they are chopped, which + // should guarantee a set of unequal values for the whole tree of blobs + // produced, however much chopping is required. The chops are thus only + // limited by the ability of the chopper to find suitable chop points, + // and not by the value of the certainties. + match_result = fake_classify_blob(0, rating, -rating); + modify_blob_choice(match_result, 0); + ASSERT_HOST(!match_result->empty()); + *char_choices += match_result; + rating -= 0.125f; + } + inT32 blob_number; + int right_chop_index = 0; + while (chop_one_blob(word_res->chopped_word, char_choices, + &blob_number, &word_res->seam_array, &right_chop_index)); + MakeWordChoice(*char_choices, unicharset, word_res->best_choice); + MakeWordChoice(*char_choices, unicharset, word_res->raw_choice); + word_res->CloneChoppedToRebuild(); + blob_match_table.end_match_table(); + if (char_choices != NULL) { + char_choices->delete_data_pointers(); + delete char_choices; } } +// Helper to compute the dispute resolution metric. +// Disputed blob resolution. The aim is to give the blob to the most +// appropriate boxfile box. Most of the time it is obvious, but if +// two boxfile boxes overlap significantly it is not. If a small boxfile +// box takes most of the blob, and a large boxfile box does too, then +// we want the small boxfile box to get it, but if the small box +// is much smaller than the blob, we don't want it to get it. +// Details of the disputed blob resolution: +// Given a box with area A, and a blob with area B, with overlap area C, +// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum +// miss metric gets the blob. +static double BoxMissMetric(const TBOX& box1, const TBOX& box2) { + int overlap_area = box1.intersection(box2).area(); + double miss_metric = box1.area()- overlap_area; + miss_metric /= box1.area(); + miss_metric *= box2.area() - overlap_area; + miss_metric /= box2.area(); + return miss_metric; +} -ROW *find_row_of_box( // - BLOCK_LIST *block_list, //real blocks - const TBOX &box, //from boxfile - inT16 &block_id, - inT16 &row_id_to_process) { - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - ROW *row_to_process = NULL; - inT16 row_id; - WERD_IT word_it; - WERD *word; - BOOL8 polyg; - PBLOB_IT blob_it; - PBLOB *blob; - OUTLINE_IT outline_it; - OUTLINE *outline; - - /* - Find row to process - error if box REALLY overlaps more than one row. (I.e - it overlaps blobs in the row - not just overlaps the bounding box of the - whole row.) - */ - - block_id = 0; - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - block_id++; - row_id = 0; - block = block_it.data (); - if (block->bounding_box ().overlap (box)) { - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); - !row_it.cycled_list (); row_it.forward ()) { - row_id++; - row = row_it.data (); - if (row->bounding_box ().overlap (box)) { - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); - !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - polyg = word->flag (W_POLYGON); - if (word->bounding_box ().overlap (box)) { - blob_it.set_to_list (word->gblob_list ()); - for (blob_it.mark_cycle_pt (); - !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (gblob_bounding_box (blob, polyg). - overlap (box)) { - outline_it. - set_to_list (gblob_out_list - (blob, polyg)); - for (outline_it.mark_cycle_pt (); - !outline_it.cycled_list (); - outline_it.forward ()) { - outline = outline_it.data (); - if (goutline_bounding_box - (outline, polyg).major_overlap (box)) { - if ((row_to_process == NULL) || - (row_to_process == row)) { - row_to_process = row; - row_id_to_process = row_id; - } - else - /* RETURN ERROR Box overlaps blobs in more than one row */ - return NULL; - } - } - } - } - } +// Gather consecutive blobs that match the given box into the best_state +// and corresponding correct_text. +// Fights over which box owns which blobs are settled by pre-chopping and +// applying the blobs to box or next_box with the least non-overlap. +// Returns false if the box was in error, which can only be caused by +// failing to find an appropriate blob for a box. +// This means that occasionally, blobs may be incorrectly segmented if the +// chopper fails to find a suitable chop point. +bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, + const TBOX& box, const TBOX& next_box, + const char* correct_text) { + if (applybox_debug > 1) { + tprintf("\nAPPLY_BOX: in ResegmentCharBox() for %s\n", correct_text); + } + PAGE_RES_IT page_res_it(page_res); + WERD_RES* word_res; + for (word_res = page_res_it.word(); word_res != NULL; + word_res = page_res_it.forward()) { + if (!word_res->box_word->bounding_box().major_overlap(box)) + continue; + if (applybox_debug > 1) { + tprintf("Checking word box:"); + word_res->box_word->bounding_box().print(); + } + int word_len = word_res->box_word->length(); + for (int i = 0; i < word_len; ++i) { + int blob_count = 0; + for (blob_count = 0; i + blob_count < word_len; ++blob_count) { + TBOX blob_box = word_res->box_word->BlobBox(i + blob_count); + if (!blob_box.major_overlap(box)) + break; + if (word_res->correct_text[i + blob_count].length() > 0) + break; // Blob is claimed already. + double current_box_miss_metric = BoxMissMetric(blob_box, box); + double next_box_miss_metric = BoxMissMetric(blob_box, next_box); + if (applybox_debug > 2) { + tprintf("Checking blob:"); + blob_box.print(); + tprintf("Current miss metric = %g, next = %g\n", + current_box_miss_metric, next_box_miss_metric); + } + if (current_box_miss_metric > next_box_miss_metric) + break; // Blob is a better match for next box. + } + if (blob_count > 0) { + // We refine just the box_word, best_state and correct_text here. + // The rebuild_word is made in TidyUp. + // blob_count blobs are put together to match the box. Merge the + // box_word boxes, save the blob_count in the state and the text. + word_res->box_word->MergeBoxes(i, i + blob_count); + word_res->best_state[i] = blob_count; + word_res->correct_text[i] = correct_text; + if (applybox_debug > 2) { + tprintf("%d Blobs match: blob box:", blob_count); + word_res->box_word->BlobBox(i).print(); + tprintf("Matches box:"); + box.print(); + tprintf("With next box:"); + next_box.print(); + } + // Eliminated best_state and correct_text entries for the consumed + // blobs. + for (int j = 1; j < blob_count; ++j) { + word_res->best_state.remove(i + 1); + word_res->correct_text.remove(i + 1); + } + // Assume that no box spans multiple source words, so we are done with + // this box. + if (applybox_debug > 1) { + tprintf("Best state = "); + for (int j = 0; j < word_res->best_state.size(); ++j) { + tprintf("%d ", word_res->best_state[j]); } + tprintf("\n"); } + return true; } } } - return row_to_process; + return false; // Failure. } - -inT16 resegment_box( // - ROW *row, - TBOX &box, - UNICHAR_ID uch_id, - inT16 block_id, - inT16 row_id, - inT16 boxfile_lineno, - inT16 boxfile_charno, - inT16 *tgt_char_counts, - bool learn_char_fragments, - bool learning) { - WERD_LIST new_word_list; - WERD_IT word_it; - WERD_IT new_word_it(&new_word_list); - WERD *word = NULL; - WERD *new_word = NULL; - BOOL8 polyg = false; - PBLOB_IT blob_it; - PBLOB_IT new_blob_it; - PBLOB *blob; - PBLOB *new_blob; - OUTLINE_IT outline_it; - OUTLINE_LIST dummy; // Just to initialize new_outline_it. - OUTLINE_IT new_outline_it = &dummy; - OUTLINE *outline; - TBOX new_word_box; - TBOX curr_outline_box; - TBOX prev_outline_box; - float word_x_centre; - float baseline; - inT16 error_count = 0; //number of chars lost - STRING label; - UNICHAR_ID fragment_uch_id; - int fragment_index; - int new_word_it_len; - - if (learning && applybox_debug > 6) { - tprintf("\nAPPLY_BOX: in resegment_box() for %s(%d)\n", - unicharset_boxes.id_to_unichar(uch_id), uch_id); +// Consume all source blobs that strongly overlap the given box, +// putting them into a new word, with the correct_text label. +// Fights over which box owns which blobs are settled by +// applying the blobs to box or next_box with the least non-overlap. +// Returns false if the box was in error, which can only be caused by +// failing to find an overlapping blob for a box. +bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, + const TBOX& box, const TBOX& next_box, + const char* correct_text) { + if (applybox_debug > 1) { + tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text); } - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - polyg = word->flag (W_POLYGON); - if (word->bounding_box ().overlap (box)) { - blob_it.set_to_list (word->gblob_list ()); - prev_outline_box = TBOX(); // clear prev_outline_box - curr_outline_box = TBOX(); // clear curr_outline_box - for (blob_it.mark_cycle_pt (); - !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (gblob_bounding_box (blob, polyg).overlap (box)) { - outline_it.set_to_list (gblob_out_list (blob, polyg)); - for (outline_it.mark_cycle_pt (); - !outline_it.cycled_list (); outline_it.forward ()) { - outline = outline_it.data (); - prev_outline_box += curr_outline_box; - curr_outline_box = goutline_bounding_box(outline, polyg); - if (curr_outline_box.major_overlap (box)) { - if (strlen (word->text ()) > 0) { - if (error_count == 0) { - error_count = 1; - if (learning && applybox_debug > 4) - report_failed_box (boxfile_lineno, - boxfile_charno, - box, unicharset_boxes.id_to_unichar(uch_id), - "FAILURE! box overlaps blob in labelled word"); - } - if (learning && applybox_debug > 4) - tprintf ("APPLY_BOXES: ALSO ignoring corrupted char" - " blk:%d row:%d \"%s\"\n", - block_id, row_id, word_it.data()->text()); - word_it.data ()->set_text (""); // UN label it - error_count++; - } - // Do not learn from fragments of characters that are broken - // into very small pieces to avoid picking up noise. - if ((learn_char_fragments || learn_chars_and_char_frags_mode) && - ((C_OUTLINE *)outline)->area() < kMinFragmentOutlineArea) { - if (applybox_debug > 6) { - tprintf("APPLY_BOX: fragment outline area %d is too small" - " - not recording fragments of this character\n", - ((C_OUTLINE *)outline)->area()); - } - error_count++; - } - - if (error_count == 0) { - if (applybox_debug > 6 ) { - tprintf("APPLY_BOX: Previous "); - prev_outline_box.print(); - tprintf("APPLY_BOX: Current area: %d ", - ((C_OUTLINE *)outline)->area()); - curr_outline_box.print(); - } - // When learning character fragments is enabled, we put - // outlines that do not overlap on x axis in separate WERDs. - bool start_new_word = - (learn_char_fragments || learn_chars_and_char_frags_mode) && - !curr_outline_box.major_x_overlap(prev_outline_box); - if (new_word == NULL || start_new_word) { - if (new_word != NULL) { // add prev new_word to new_word_list - new_word_it.add_to_end(new_word); - } - // Make a new word with a single blob. - new_word = word->shallow_copy(); - new_word->set_flag(W_FUZZY_NON, false); - new_word->set_flag(W_FUZZY_SP, false); - if (polyg){ - new_blob = new PBLOB; - } else { - new_blob = (PBLOB *) new C_BLOB; - } - new_blob_it.set_to_list(new_word->gblob_list()); - new_blob_it.add_to_end(new_blob); - new_outline_it.set_to_list( - gblob_out_list(new_blob, polyg)); - } - new_outline_it.add_to_end(outline_it.extract()); // move blob - } - } + WERD* new_word = NULL; + BLOCK_IT b_it(block_list); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOCK* block = b_it.data(); + if (!box.major_overlap(block->bounding_box())) + continue; + ROW_IT r_it(block->row_list()); + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) { + ROW* row = r_it.data(); + if (!box.major_overlap(row->bounding_box())) + continue; + WERD_IT w_it(row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + if (applybox_debug > 2) { + tprintf("Checking word:"); + word->bounding_box().print(); + } + if (word->text() != NULL && word->text()[0] != '\0') + continue; // Ignore words that are already done. + if (!box.major_overlap(word->bounding_box())) + continue; + C_BLOB_IT blob_it(word->cblob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); + blob_it.forward()) { + C_BLOB* blob = blob_it.data(); + TBOX blob_box = blob->bounding_box(); + if (!blob_box.major_overlap(box)) + continue; + double current_box_miss_metric = BoxMissMetric(blob_box, box); + double next_box_miss_metric = BoxMissMetric(blob_box, next_box); + if (applybox_debug > 2) { + tprintf("Checking blob:"); + blob_box.print(); + tprintf("Current miss metric = %g, next = %g\n", + current_box_miss_metric, next_box_miss_metric); } - if (outline_it.empty()) // no outlines in blob - delete blob_it.extract(); // so delete blob + if (current_box_miss_metric > next_box_miss_metric) + continue; // Blob is a better match for next box. + if (applybox_debug > 2) { + tprintf("Blob match: blob:"); + blob_box.print(); + tprintf("Matches box:"); + box.print(); + tprintf("With next box:"); + next_box.print(); + } + if (new_word == NULL) { + // Make a new word with a single blob. + new_word = word->shallow_copy(); + new_word->set_text(correct_text); + w_it.add_to_end(new_word); + } + C_BLOB_IT new_blob_it(new_word->cblob_list()); + new_blob_it.add_to_end(blob_it.extract()); } } - if (blob_it.empty()) // no blobs in word - delete word_it.extract(); // so delete word } } - if (new_word != NULL) { // add prev new_word to new_word_list - new_word_it.add_to_end(new_word); - } - new_word_it_len = new_word_it.length(); + return new_word != NULL; +} - // Check for failures. - if (error_count > 0) - return error_count; - if (learning && new_word_it_len <= 0) { - report_failed_box(boxfile_lineno, boxfile_charno, box, - unicharset_boxes.id_to_unichar(uch_id), - "FAILURE! Couldn't find any blobs"); - return 1; // failure +// Resegments the words by running the classifier in an attempt to find the +// correct segmentation that produces the required string. +void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) { + PAGE_RES_IT pr_it(page_res); + WERD_RES* word_res; + for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) { + WERD* word = word_res->word; + if (word->text() == NULL || word->text()[0] == '\0') + continue; // Ignore words that have no text. + // Convert the correct text to a vector of UNICHAR_ID + GenericVector target_text; + if (!ConvertStringToUnichars(word->text(), &target_text)) { + tprintf("APPLY_BOX: FAILURE: can't find class_id for '%s'\n", + word->text()); + pr_it.DeleteCurrentWord(); + continue; + } + if (!FindSegmentation(target_text, word_res)) { + tprintf("APPLY_BOX: FAILURE: can't find segmentation for '%s'\n", + word->text()); + pr_it.DeleteCurrentWord(); + continue; + } } +} - if (learning && new_word_it_len > CHAR_FRAGMENT::kMaxChunks) { - tprintf("APPLY_BOXES: too many fragments (%d) for char %s\n", - new_word_it_len, unicharset_boxes.id_to_unichar(uch_id)); - return 1; // failure +// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID. +// Returns false if an invalid UNICHAR_ID is encountered. +bool Tesseract::ConvertStringToUnichars(const char* utf8, + GenericVector* class_ids) { + for (int step = 0; *utf8 != '\0'; utf8 += step) { + const char* next_space = strchr(utf8, ' '); + if (next_space == NULL) + next_space = utf8 + strlen(utf8); + step = next_space - utf8; + UNICHAR_ID class_id = unicharset.unichar_to_id(utf8, step); + if (class_id == INVALID_UNICHAR_ID) { + return false; + } + while (utf8[step] == ' ') + ++step; + class_ids->push_back(class_id); } + return true; +} - // Add labelled character or character fragments to the word list. - fragment_index = 0; - new_word_it.move_to_first(); - for (new_word_it.mark_cycle_pt(); !new_word_it.cycled_list(); - new_word_it.forward()) { - new_word = new_word_it.extract(); - if (new_word_it_len > 1) { // deal with a fragment - if (learning) { - label = CHAR_FRAGMENT::to_string(unicharset_boxes.id_to_unichar(uch_id), - fragment_index, new_word_it_len); - fragment_uch_id = register_char(label.string()); - new_word->set_text(label.string()); - ++fragment_index; - // For now we cheat by setting the expected number of char fragments - // to the number of char fragments actually parsed and labelled. - // TODO(daria): find out whether this can be improved. - tgt_char_counts[fragment_uch_id]++; - } else { - // No learning involved, Just stick a place-holder string - new_word->set_text("*"); - } - if (applybox_debug > 5) { - tprintf("APPLY_BOX: adding char fragment %s\n", label.string()); - } - } else { // deal with a regular character - if (learning) { - if (!learn_char_fragments || learn_chars_and_char_frags_mode) { - new_word->set_text(unicharset_boxes.id_to_unichar(uch_id)); - } else { - // not interested in non-fragmented chars if learning fragments, so - // unlabel it. - new_word->set_text(""); - } - } else { - // No learning involved here. Just stick a place holder string - new_word->set_text("*"); +// Resegments the word to achieve the target_text from the classifier. +// Returns false if the re-segmentation fails. +// Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and +// applies a full search on the classifier results to find the best classified +// segmentation. As a compromise to obtain better recall, 1-1 ambigiguity +// substitutions ARE used. +bool Tesseract::FindSegmentation(const GenericVector& target_text, + WERD_RES* word_res) { + blob_match_table.init_match_table(); + // Classify all required combinations of blobs and save results in choices. + int word_length = word_res->box_word->length(); + GenericVector* choices = + new GenericVector[word_length]; + for (int i = 0; i < word_length; ++i) { + for (int j = 1; j <= kMaxGroupSize && i + j <= word_length; ++j) { + BLOB_CHOICE_LIST* match_result = classify_piece( + word_res->chopped_word->blobs, word_res->seam_array, + i, i + j - 1); + if (applybox_debug > 2) { + tprintf("%d+%d:", i, j); + print_ratings_list("Segment:", match_result, unicharset); } + choices[i].push_back(match_result); } - gblob_sort_list(new_word->gblob_list(), polyg); - word_it.add_to_end(new_word); - new_word_box = new_word->bounding_box(); - word_x_centre = (new_word_box.left() + new_word_box.right()) / 2.0f; - baseline = row->base_line(word_x_centre); } - - // All done. Now check if the EOL, BOL flags are set correctly. - word_it.move_to_first(); - for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); - word->set_flag(W_BOL, false); - word->set_flag(W_EOL, false); + // Search the segmentation graph for the target text. Must be an exact + // match. Using wildcards makes it difficult to find the correct + // segmentation even when it is there. + word_res->best_state.clear(); + GenericVector search_segmentation; + float best_rating = 0.0f; + SearchForText(choices, 0, word_length, target_text, 0, 0.0f, + &search_segmentation, &best_rating, &word_res->best_state); + blob_match_table.end_match_table(); + for (int i = 0; i < word_length; ++i) + choices[i].delete_data_pointers(); + delete [] choices; + if (word_res->best_state.empty()) + return false; + word_res->correct_text.clear(); + for (int i = 0; i < target_text.size(); ++i) { + word_res->correct_text.push_back( + STRING(unicharset.id_to_unichar(target_text[i]))); } - word->set_flag(W_EOL, true); - word_it.move_to_first(); - word_it.data()->set_flag(W_BOL, true); - return 0; //success + return true; +} -#if 0 - if (strlen(unicharset_boxes.id_to_unichar(uch_id)) == 1) { - if (STRING (chs_caps_ht).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && - (new_word_box.top () < - baseline + (1 + applybox_error_band) * row->x_height ())) { - report_failed_box (boxfile_lineno, boxfile_charno, box, - unicharset_boxes.id_to_unichar(uch_id), - "FAILURE! caps-ht char didn't ascend"); - new_word->set_text (""); - return 1; - } - if (STRING (chs_odd_top).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && - (new_word_box.top () < - baseline + (1 - applybox_error_band) * row->x_height ())) { - report_failed_box (boxfile_lineno, boxfile_charno, box, - unicharset_boxes.id_to_unichar(uch_id), - "FAILURE! Odd top char below xht"); - new_word->set_text (""); - return 1; - } - if (STRING (chs_x_ht).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && - ((new_word_box.top () > - baseline + (1 + applybox_error_band) * row->x_height ()) || - (new_word_box.top () < - baseline + (1 - applybox_error_band) * row->x_height ()))) { - report_failed_box (boxfile_lineno, boxfile_charno, box, - unicharset_boxes.id_to_unichar(uch_id), - "FAILURE! x-ht char didn't have top near xht"); - new_word->set_text (""); - return 1; - } - if (STRING (chs_non_ambig_bl).contains - (unicharset_boxes.id_to_unichar(uch_id)[0]) && - ((new_word_box.bottom () < - baseline - applybox_error_band * row->x_height ()) || - (new_word_box.bottom () > - baseline + applybox_error_band * row->x_height ()))) { - report_failed_box (boxfile_lineno, boxfile_charno, box, - unicharset_boxes.id_to_unichar(uch_id), - "FAILURE! non ambig BL char didnt have bottom near baseline"); - new_word->set_text (""); - return 1; - } - if (STRING (chs_odd_bot).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && - (new_word_box.bottom () > - baseline + applybox_error_band * row->x_height ())) { - report_failed_box (boxfile_lineno, boxfile_charno, box, - unicharset_boxes.id_to_unichar(uch_id), - "FAILURE! Odd bottom char above baseline"); - new_word->set_text (""); - return 1; +// Recursive helper to find a match to the target_text (from text_index +// position) in the choices (from choices_pos position). +// Choices is an array of GenericVectors, of length choices_length, with each +// element representing a starting position in the word, and the +// GenericVector holding classification results for a sequence of consecutive +// blobs, with index 0 being a single blob, index 1 being 2 blobs etc. +void Tesseract::SearchForText(const GenericVector* choices, + int choices_pos, int choices_length, + const GenericVector& target_text, + int text_index, + float rating, GenericVector* segmentation, + float* best_rating, + GenericVector* best_segmentation) { + const UnicharAmbigsVector& table = getDict().getUnicharAmbigs().dang_ambigs(); + for (int length = 1; length <= choices[choices_pos].size(); ++length) { + // Rating of matching choice or worst choice if no match. + float choice_rating = 0.0f; + // Find the corresponding best BLOB_CHOICE. + BLOB_CHOICE_IT choice_it(choices[choices_pos][length - 1]); + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); + choice_it.forward()) { + BLOB_CHOICE* choice = choice_it.data(); + choice_rating = choice->rating(); + UNICHAR_ID class_id = choice->unichar_id(); + if (class_id == target_text[text_index]) { + break; } - if (STRING (chs_desc).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && - (new_word_box.bottom () > - baseline - applybox_error_band * row->x_height ())) { - report_failed_box (boxfile_lineno, boxfile_charno, box, - unicharset_boxes.id_to_unichar(uch_id), - "FAILURE! Descender doesn't descend"); - new_word->set_text (""); - return 1; + // Search ambigs table. + if (class_id < table.size() && table[class_id] != NULL) { + AmbigSpec_IT spec_it(table[class_id]); + for (spec_it.mark_cycle_pt(); !spec_it.cycled_list(); + spec_it.forward()) { + const AmbigSpec *ambig_spec = spec_it.data(); + // We'll only do 1-1. + if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID && + ambig_spec->correct_ngram_id == target_text[text_index]) + break; + } + if (!spec_it.cycled_list()) + break; // Found an ambig. } } -#endif -} - - -/************************************************************************* - * tidy_up() - * - report >1 block - * - sort the words in each row. - * - report any rows with no labelled words. - * - report any remaining unlabelled words - * - report total labelled words - * - *************************************************************************/ -void tidy_up( // - BLOCK_LIST *block_list, //real blocks - inT16 &ok_char_count, - inT16 &ok_row_count, - inT16 &unlabelled_words, - inT16 *tgt_char_counts, - inT16 &rebalance_count, - UNICHAR_ID *min_uch_id, - inT16 &min_samples, - inT16 &final_labelled_blob_count, - bool learn_character_fragments, - bool learning) { - BLOCK_IT block_it(block_list); - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - WERD *duplicate_word; - inT16 block_idx = 0; - inT16 row_idx; - inT16 all_row_idx = 0; - BOOL8 row_ok; - BOOL8 rebalance_needed = FALSE; - inT16 *labelled_char_counts = NULL; // num unique labelled samples - inT16 i; - UNICHAR_ID uch_id; - UNICHAR_ID prev_uch_id = -1; - BOOL8 at_dupe_of_prev_word; - ROW *prev_row = NULL; - inT16 left; - inT16 prev_left = -1; - - labelled_char_counts = new inT16[MAX_NUM_CLASSES]; - for (i = 0; i < MAX_NUM_CLASSES; i++) - labelled_char_counts[i] = 0; - - ok_char_count = 0; - ok_row_count = 0; - unlabelled_words = 0; - if (learning && (applybox_debug > 4) && (block_it.length () != 1)) { - if (block_it.length() > 1) { - tprintf("APPLY_BOXES: More than one block??\n"); - } else { - tprintf("APPLY_BOXES: No blocks identified.\n"); - } - } - - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - block_idx++; - row_idx = 0; - row_ok = FALSE; - row_it.set_to_list (block_it.data ()->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row_idx++; - all_row_idx++; - row = row_it.data (); - word_it.set_to_list (row->word_list ()); - word_it.sort (word_comparator); - for (word_it.mark_cycle_pt (); - !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if (strlen (word->text ()) == 0 || - unicharset_boxes.unichar_to_id(word->text()) < 0) { - unlabelled_words++; - if (learning && applybox_debug > 4 && !learn_character_fragments) { - tprintf("APPLY_BOXES: Unlabelled word blk:%d row:%d allrows:%d\n", - block_idx, row_idx, all_row_idx); - } - } else { - if (word->gblob_list ()->length () != 1) - tprintf ("APPLY_BOXES: FATALITY - MULTIBLOB Labelled word blk:%d" - " row:%d allrows:%d\n", block_idx, row_idx, all_row_idx); - - ok_char_count++; - ++labelled_char_counts[unicharset_boxes.unichar_to_id(word->text())]; - row_ok = TRUE; - } + if (choice_it.cycled_list()) + continue; // No match. + segmentation->push_back(length); + if (choices_pos + length == choices_length && + text_index + 1 == target_text.size()) { + // This is a complete match. If the rating is good record a new best. + if (applybox_debug > 2) { + tprintf("Complete match, rating = %g, best=%g, seglength=%d, best=%d\n", + rating + choice_rating, *best_rating, segmentation->size(), + best_segmentation->size()); } - if ((applybox_debug > 6) && (!row_ok)) { - tprintf("APPLY_BOXES: Row with no labelled words blk:%d row:%d" - " allrows:%d\n", block_idx, row_idx, all_row_idx); + if (best_segmentation->empty() || rating + choice_rating < *best_rating) { + *best_segmentation = *segmentation; + *best_rating = rating + choice_rating; } - else - ok_row_count++; - } - } - - min_samples = 9999; - for (i = 0; i < unicharset_boxes.size(); i++) { - if (tgt_char_counts[i] > labelled_char_counts[i]) { - if (labelled_char_counts[i] <= 1) { - tprintf("APPLY_BOXES: FATALITY - %d labelled samples of \"%s\" -" - " target is %d:\n", - labelled_char_counts[i], unicharset_boxes.debug_str(i).string(), - tgt_char_counts[i]); + } else if (choices_pos + length < choices_length && + text_index + 1 < target_text.size()) { + if (applybox_debug > 3) { + tprintf("Match found for %d=%s:%s, at %d+%d, recursing...\n", + target_text[text_index], + unicharset.id_to_unichar(target_text[text_index]), + choice_it.data()->unichar_id() == target_text[text_index] + ? "Match" : "Ambig", + choices_pos, length); } - else { - rebalance_needed = TRUE; - if (applybox_debug > 0) - tprintf("APPLY_BOXES: REBALANCE REQD \"%s\" - target of" - " %d from %d labelled samples\n", - unicharset_boxes.debug_str(i).string(), tgt_char_counts[i], - labelled_char_counts[i]); + SearchForText(choices, choices_pos + length, choices_length, target_text, + text_index + 1, rating + choice_rating, segmentation, + best_rating, best_segmentation); + if (applybox_debug > 3) { + tprintf("End recursion for %d=%s\n", target_text[text_index], + unicharset.id_to_unichar(target_text[text_index])); } } - if ((min_samples > labelled_char_counts[i]) && (tgt_char_counts[i] > 0)) { - min_samples = labelled_char_counts[i]; - *min_uch_id = i; - } + segmentation->truncate(segmentation->size() - 1); } +} - while (applybox_rebalance && rebalance_needed) { - block_it.set_to_list (block_list); - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - row_it.set_to_list (block_it.data ()->row_list ()); - for (row_it.mark_cycle_pt (); - !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); - !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - left = word->bounding_box ().left (); - if (*word->text () != '\0') - uch_id = unicharset_boxes.unichar_to_id(word->text ()); - else - uch_id = -1; - at_dupe_of_prev_word = ((row == prev_row) && - (left = prev_left) && - (uch_id == prev_uch_id)); - if ((uch_id != -1) && - (labelled_char_counts[uch_id] > 1) && - (tgt_char_counts[uch_id] > labelled_char_counts[uch_id]) && - (!at_dupe_of_prev_word)) { - /* Duplicate the word to rebalance the labelled samples */ - if (applybox_debug > 9) { - tprintf ("Duping \"%s\" from ", unicharset_boxes.id_to_unichar(uch_id)); - word->bounding_box ().print (); - } - duplicate_word = new WERD; - *duplicate_word = *word; - word_it.add_after_then_move (duplicate_word); - rebalance_count++; - labelled_char_counts[uch_id]++; - } - prev_row = row; - prev_left = left; - prev_uch_id = uch_id; - } +// Counts up the labelled words and the blobs within. +// Deletes all unused or emptied words, counting the unused ones. +// Resets W_BOL and W_EOL flags correctly. +// Builds the rebuild_word and rebuilds the box_word. +void Tesseract::TidyUp(PAGE_RES* page_res) { + int ok_blob_count = 0; + int bad_blob_count = 0; + int ok_word_count = 0; + int unlabelled_words = 0; + PAGE_RES_IT pr_it(page_res); + WERD_RES* word_res; + for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) { + int ok_in_word = 0; + for (int i = 0; i < word_res->correct_text.size(); ++i) { + if (word_res->correct_text[i].length() > 0) { + ++ok_in_word; } } - rebalance_needed = FALSE; - for (i = 0; i < unicharset_boxes.size(); i++) { - if ((tgt_char_counts[i] > labelled_char_counts[i]) && - (labelled_char_counts[i] > 1)) { - rebalance_needed = TRUE; - break; + if (ok_in_word > 0) { + ok_blob_count += ok_in_word; + bad_blob_count += word_res->correct_text.size() - ok_in_word; + } else { + ++unlabelled_words; + if (applybox_debug > 0) { + tprintf("APPLY_BOXES: Unlabelled word at :"); + word_res->word->bounding_box().print(); } + pr_it.DeleteCurrentWord(); } } - - /* Now final check - count labeled blobs */ - final_labelled_blob_count = 0; - block_it.set_to_list (block_list); - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - row_it.set_to_list (block_it.data ()->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - word_it.set_to_list (row->word_list ()); - word_it.sort (word_comparator); - for (word_it.mark_cycle_pt (); - !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if ((strlen(word->text ()) > 0) && - (word->gblob_list()->length() == 1)) { - final_labelled_blob_count++; - } else { - delete word_it.extract(); - } - } - // delete the row if empty - if (row->word_list()->empty()) { - delete row_it.extract(); - } - } + pr_it.restart_page(); + for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) { + // Denormalize back to a BoxWord. + word_res->RebuildBestState(); + word_res->SetupBoxWord(); + word_res->word->set_flag(W_BOL, pr_it.prev_row() != pr_it.row()); + word_res->word->set_flag(W_EOL, pr_it.next_row() != pr_it.row()); + } + if (applybox_debug > 0) { + tprintf(" Found %d good blobs and %d unlabelled blobs in %d words.\n", + ok_blob_count, bad_blob_count, ok_word_count); + tprintf(" %d remaining unlabelled words deleted.\n", unlabelled_words); } - - // Clean up. - delete[] labelled_char_counts; } - -void report_failed_box(inT16 boxfile_lineno, - inT16 boxfile_charno, - TBOX box, - const char *box_ch, - const char *err_msg) { - if (applybox_debug > 4) - tprintf ("APPLY_BOXES: boxfile %1d/%1d/%s ((%1d,%1d),(%1d,%1d)): %s\n", - boxfile_lineno, - boxfile_charno, - box_ch, - box.left (), box.bottom (), box.right (), box.top (), err_msg); +// Logs a bad box by line in the box file and box coords. +void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box, + const char *box_ch, const char *err_msg) { + tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n", + boxfile_lineno, box_ch, + box.left(), box.bottom(), box.right(), box.top(), err_msg); } - -void apply_box_training(const STRING& filename, BLOCK_LIST *block_list) { - BLOCK_IT block_it(block_list); - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - WERD *bln_word; - WERD copy_outword; // copy to denorm - PBLOB_IT blob_it; - DENORM denorm; - inT16 count = 0; - char unichar[UNICHAR_LEN + 1]; - - unichar[UNICHAR_LEN] = '\0'; - tprintf ("Generating training data\n"); - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - row_it.set_to_list (block_it.data ()->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); - !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if ((strlen (word->text ()) > 0) && - (word->gblob_list ()->length () == 1)) { - // Here is a word with a single unichar label and a single blob so train on it. - bln_word = make_bln_copy(word, row, NULL, row->x_height (), &denorm); - blob_it.set_to_list (bln_word->blob_list ()); - strncpy(unichar, word->text (), UNICHAR_LEN); - tess_training_tester (filename, - blob_it.data (), //single blob - &denorm, TRUE, //correct - unichar, //correct character - strlen(unichar), //character length - NULL); - copy_outword = *(bln_word); - copy_outword.baseline_denormalise (&denorm); - blob_it.set_to_list (copy_outword.blob_list ()); - delete bln_word; - count++; - } - } +// Creates a fake best_choice entry in each WERD_RES with the correct text. +void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) { + PAGE_RES_IT pr_it(page_res); + for (WERD_RES *word_res = pr_it.word(); word_res != NULL; + word_res = pr_it.forward()) { + WERD_CHOICE* choice = new WERD_CHOICE(word_res->correct_text.size()); + for (int i = 0; i < word_res->correct_text.size(); ++i) { + UNICHAR_ID char_id = unicharset.unichar_to_id( + word_res->correct_text[i].string()); + choice->append_unichar_id_space_allocated(char_id, 1, 0.0f, 0.0f); } + choice->populate_unichars(unicharset); + if (word_res->best_choice != NULL) + delete word_res->best_choice; + word_res->best_choice = choice; } - tprintf ("Generated training data for %d blobs\n", count); } -namespace tesseract { -void Tesseract::apply_box_testing(BLOCK_LIST *block_list) { - BLOCK_IT block_it(block_list); - ROW_IT row_it; - ROW *row; - inT16 row_count = 0; - WERD_IT word_it; - WERD *word; - WERD *bln_word; - inT16 word_count = 0; - PBLOB_IT blob_it; - DENORM denorm; - inT16 count = 0; - char ch[2]; - WERD *outword; //bln best choice - //segmentation - WERD_CHOICE *best_choice; //tess output - WERD_CHOICE *raw_choice; //top choice permuter - //detailed results - BLOB_CHOICE_LIST_CLIST blob_choices; - inT16 char_count = 0; - inT16 correct_count = 0; - inT16 err_count = 0; - inT16 rej_count = 0; - #ifndef SECURE_NAMES - WERDSTATS wordstats; //As from newdiff - #endif - char tess_rej_str[3]; - char tess_long_str[3]; - - ch[1] = '\0'; - strcpy (tess_rej_str, "|A"); - strcpy (tess_long_str, "|B"); - - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - row_it.set_to_list (block_it.data ()->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - row_count++; - word_count = 0; - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); - !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - word_count++; - if ((strlen (word->text ()) == 1) && - !STRING (applybox_test_exclusions).contains (*word->text ()) - && (word->gblob_list ()->length () == 1)) { - // Here is a word with a single char label and a single blob so test it. - bln_word = make_bln_copy(word, row, NULL, row->x_height (), &denorm); - blob_it.set_to_list (bln_word->blob_list ()); - ch[0] = *word->text (); - char_count++; - best_choice = tess_segment_pass1 (bln_word, - &denorm, - &Tesseract::tess_default_matcher, - raw_choice, - &blob_choices, outword); - - /* - Test for TESS screw up on word. Recog_word has already ensured that the - choice list, outword blob lists and best_choice string are the same - length. A TESS screw up is indicated by a blank filled or 0 length string. - */ - if ((best_choice->length() == 0) || - (strspn(best_choice->unichar_string().string(), " ") == - best_choice->unichar_string().length())) { - rej_count++; - tprintf ("%d:%d: \"%s\" -> TESS FAILED\n", - row_count, word_count, ch); - #ifndef SECURE_NAMES - wordstats.word (tess_rej_str, 2, ch, 1); - #endif - } - else { - if ((best_choice->length() != outword->blob_list()->length()) || - (best_choice->length() != blob_choices.length())) { - tprintf - ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n", - best_choice->unichar_string().string(), - best_choice->length(), - outword->blob_list ()->length(), - blob_choices.length()); - } - ASSERT_HOST(best_choice->length() == - outword->blob_list()->length()); - ASSERT_HOST(best_choice->length() == blob_choices.length()); - fix_quotes (best_choice, - //turn to double - outword, &blob_choices); - if (strcmp (best_choice->unichar_string().string(), ch) != 0) { - err_count++; - tprintf ("%d:%d: \"%s\" -> \"%s\"\n", - row_count, word_count, ch, - best_choice->unichar_string().string()); - } - else - correct_count++; - #ifndef SECURE_NAMES - if (best_choice->unichar_string().length() > 2) - wordstats.word(tess_long_str, 2, ch, 1); - else - wordstats.word(best_choice->unichar_string().string(), - best_choice->unichar_string().length(), - ch, 1); - #endif - } - delete bln_word; - delete outword; - delete best_choice; - delete raw_choice; - blob_choices.deep_clear (); - count++; - } - } - } +// Calls LearnWord to extract features for labelled blobs within each word. +// Features are written to the given filename. +void Tesseract::ApplyBoxTraining(const STRING& filename, PAGE_RES* page_res) { + PAGE_RES_IT pr_it(page_res); + int word_count = 0; + for (WERD_RES *word_res = pr_it.word(); word_res != NULL; + word_res = pr_it.forward()) { + LearnWord(filename.string(), NULL, word_res); + ++word_count; } - #ifndef SECURE_NAMES - wordstats.print (1, 100.0); - wordstats.conf_matrix (); - tprintf ("Tested %d chars: %d correct; %d rejected by tess; %d errs\n", - char_count, correct_count, rej_count, err_count); - #endif + tprintf ("Generated training data for %d words\n", word_count); } + } // namespace tesseract diff --git a/ccmain/applybox.h b/ccmain/applybox.h deleted file mode 100644 index 66099cf5c7..0000000000 --- a/ccmain/applybox.h +++ /dev/null @@ -1,84 +0,0 @@ -/********************************************************************** - * File: applybox.h (Formerly applybox.h) - * Description: Re segment rows according to box file data - * Author: Phil Cheatle - * Created: Wed Nov 24 09:11:23 GMT 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef APPLYBOX_H -#define APPLYBOX_H - -#include "varable.h" -#include "ocrblock.h" -#include "ocrrow.h" -#include "notdll.h" -#include "unichar.h" - -extern BOOL_VAR_H (applybox_rebalance, TRUE, "Drop dead"); -extern INT_VAR_H (applybox_debug, 0, "Debug level"); -extern INT_VAR_H (applybox_page, 0, "Page number to apply boxes from"); -extern STRING_VAR_H (applybox_test_exclusions, "|", - "Chars ignored for testing"); -extern double_VAR_H (applybox_error_band, 0.15, "Err band as fract of xht"); -extern STRING_VAR_H(exposure_pattern, "exp", - "Exposure value follows this pattern in the image" - " filename. The name of the image files are expected" - " to be in the form [lang].[fontname].exp[num].tif"); - -static const int kMinFragmentOutlineArea = 10; - -void apply_boxes(const STRING& filename, - BLOCK_LIST *block_list //real blocks - ); - -ROW *find_row_of_box( - BLOCK_LIST *block_list, //real blocks - const TBOX &box, //from boxfile - inT16 &block_id, - inT16 &row_id_to_process); - -inT16 resegment_box( - ROW *row, - TBOX &box, - UNICHAR_ID uch_id, - inT16 block_id, - inT16 row_id, - inT16 boxfile_lineno, - inT16 boxfile_charno, - inT16 *tgt_char_counts, - bool learn_char_fragments, - bool learning); - -void tidy_up( - BLOCK_LIST *block_list, //real blocks - inT16 &ok_char_count, - inT16 &ok_row_count, - inT16 &unlabelled_words, - inT16 *tgt_char_counts, - inT16 &rebalance_count, - UNICHAR_ID *min_uch_id, - inT16 &min_samples, - inT16 &final_labelled_blob_count, - bool learn_character_fragments, - bool learning); - -void report_failed_box(inT16 boxfile_lineno, - inT16 boxfile_charno, - TBOX box, - const char *box_ch, - const char *err_msg); - -void apply_box_training(const STRING& filename, BLOCK_LIST *block_list); -#endif diff --git a/ccmain/blobcmp.cpp b/ccmain/blobcmp.cpp deleted file mode 100644 index 8d365358c5..0000000000 --- a/ccmain/blobcmp.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/********************************************************************** - * File: blobcmp.c (Formerly blobcmp.c) - * Description: Code to compare blobs using the adaptive matcher. - * Author: Ray Smith - * Created: Wed Apr 21 09:28:51 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mfcpch.h" -#include "fxdefs.h" -#include "ocrfeatures.h" -#include "intmatcher.h" -#include "intproto.h" -#include "adaptive.h" -#include "adaptmatch.h" -#include "const.h" -#include "tessvars.h" -#include "tesseractclass.h" - -#define CMP_CLASS 0 - -/********************************************************************** - * compare_tess_blobs - * - * Match 2 blobs using the adaptive classifier. - **********************************************************************/ -namespace tesseract { -float Tesseract::compare_tess_blobs(TBLOB *blob1, - TEXTROW *row1, - TBLOB *blob2, - TEXTROW *row2) { - int fcount; /*number of features */ - ADAPT_CLASS adapted_class; - ADAPT_TEMPLATES ad_templates; - LINE_STATS line_stats1, line_stats2; - INT_FEATURE_ARRAY int_features; - FEATURE_SET float_features; - INT_RESULT_STRUCT int_result; /*output */ - - BIT_VECTOR AllProtosOn = NewBitVector (MAX_NUM_PROTOS); - BIT_VECTOR AllConfigsOn = NewBitVector (MAX_NUM_CONFIGS); - set_all_bits (AllProtosOn, WordsInVectorOfSize (MAX_NUM_PROTOS)); - set_all_bits (AllConfigsOn, WordsInVectorOfSize (MAX_NUM_CONFIGS)); - - EnterClassifyMode; - ad_templates = NewAdaptedTemplates (false); - GetLineStatsFromRow(row1, &line_stats1); - /*copy baseline stuff */ - GetLineStatsFromRow(row2, &line_stats2); - adapted_class = NewAdaptedClass (); - AddAdaptedClass (ad_templates, adapted_class, CMP_CLASS); - InitAdaptedClass(blob1, &line_stats1, CMP_CLASS, adapted_class, ad_templates); - fcount = GetAdaptiveFeatures (blob2, &line_stats2, - int_features, &float_features); - if (fcount > 0) { - SetBaseLineMatch(); - IntegerMatcher (ClassForClassId (ad_templates->Templates, CMP_CLASS), - AllProtosOn, AllConfigsOn, fcount, fcount, - int_features, 0, &int_result, testedit_match_debug); - FreeFeatureSet(float_features); - if (int_result.Rating < 0) - int_result.Rating = MAX_FLOAT32; - } - - free_adapted_templates(ad_templates); - FreeBitVector(AllConfigsOn); - FreeBitVector(AllProtosOn); - - return fcount > 0 ? int_result.Rating * fcount : MAX_FLOAT32; -} -} // namespace tesseract diff --git a/ccmain/callnet.cpp b/ccmain/callnet.cpp deleted file mode 100644 index 506ed57520..0000000000 --- a/ccmain/callnet.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/********************************************************************** - * File: callnet.cpp (Formerly callnet.c) - * Description: Interface to Neural Net matcher - * Author: Phil Cheatle - * Created: Wed Nov 18 10:35:00 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mfcpch.h" -#include "errcode.h" -//#include "nmatch.h" -#include "globals.h" - -#define OUTPUT_NODES 94 - -const ERRCODE NETINIT = "NN init error"; - -//extern "C" -//{ -//extern char* demodir; /* where program lives */ - -void init_net() { /* Initialise net */ -#ifdef ASPIRIN_INCLUDED - char wts_filename[256]; - - if (nmatch_init_network () != 0) { - NETINIT.error ("Init_net", EXIT, "Errcode %s", nmatch_error_string ()); - } - strcpy(wts_filename, demodir); - strcat (wts_filename, "tessdata/netwts"); - - if (nmatch_load_network (wts_filename) != 0) { - NETINIT.error ("Init_net", EXIT, "Weights failed, Errcode %s", - nmatch_error_string ()); - } -#endif -} - - -void callnet( /* Apply image to net */ - float *input_vector, - char *top, - float *top_score, - char *next, - float *next_score) { -#ifdef ASPIRIN_INCLUDED - float *output_vector; - int i; - int max_out_i = 0; - int next_max_out_i = 0; - float max_out = -9; - float next_max_out = -9; - - nmatch_set_input(input_vector); - nmatch_propagate_forward(); - output_vector = nmatch_get_output (); - - /* Now find top two choices */ - - for (i = 0; i < OUTPUT_NODES; i++) { - if (output_vector[i] > max_out) { - next_max_out = max_out; - max_out = output_vector[i]; - next_max_out_i = max_out_i; - max_out_i = i; - } - else { - if (output_vector[i] > next_max_out) { - next_max_out = output_vector[i]; - next_max_out_i = i; - } - } - } - *top = max_out_i + '!'; - *next = next_max_out_i + '!'; - *top_score = max_out; - *next_score = next_max_out; -#endif -} - - -//}; diff --git a/ccmain/charcut.cpp b/ccmain/charcut.cpp index 1e48d7caff..bba7822109 100644 --- a/ccmain/charcut.cpp +++ b/ccmain/charcut.cpp @@ -18,12 +18,12 @@ **********************************************************************/ #include "mfcpch.h" -#include "charcut.h" -#include "imgs.h" -#include "svshowim.h" -//#include "evnts.h" -#include "notdll.h" -#include "scrollview.h" +#include "charcut.h" +#include "imgs.h" +#include "scrollview.h" +#include "svshowim.h" +#include "notdll.h" +#include "helpers.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -35,10 +35,6 @@ #define BUG_OFFSET 1 #define EXTERN -EXTERN INT_VAR (pix_word_margin, 3, "How far outside word BB to grow"); - -extern IMAGE page_image; - ELISTIZE (PIXROW) /************************************************************************* * PIXROW::PIXROW() @@ -58,8 +54,8 @@ PIXROW::PIXROW(inT16 pos, inT16 count, PBLOB *blob) { row_offset = pos; row_count = count; - min = (inT16 *) alloc_mem (count * sizeof (inT16)); - max = (inT16 *) alloc_mem (count * sizeof (inT16)); + min = (inT16 *) alloc_mem(count * sizeof(inT16)); + max = (inT16 *) alloc_mem(count * sizeof(inT16)); outline_list = blob->out_list (); outline_it.set_to_list (outline_list); @@ -67,27 +63,21 @@ PIXROW::PIXROW(inT16 pos, inT16 count, PBLOB *blob) { min[i] = MAX_INT16 - 1; max[i] = -MAX_INT16 + 1; y_coord = row_offset + i + 0.5; - for (outline_it.mark_cycle_pt (); - !outline_it.cycled_list (); outline_it.forward ()) { - pts_list = outline_it.data ()->polypts (); - pts_it.set_to_list (pts_list); - for (pts_it.mark_cycle_pt (); - !pts_it.cycled_list (); pts_it.forward ()) { - pt = pts_it.data ()->pos; - vec = pts_it.data ()->vec; - if ((vec.y () != 0) && - (((pt.y () <= y_coord) && (pt.y () + vec.y () >= y_coord)) - || ((pt.y () >= y_coord) - && (pt.y () + vec.y () <= y_coord)))) { + for (outline_it.mark_cycle_pt(); + !outline_it.cycled_list(); outline_it.forward()) { + pts_list = outline_it.data()->polypts(); + pts_it.set_to_list(pts_list); + for (pts_it.mark_cycle_pt(); !pts_it.cycled_list(); pts_it.forward()) { + pt = pts_it.data()->pos; + vec = pts_it.data()->vec; + if ((vec.y() != 0) && + (((pt.y() <= y_coord) && (pt.y() + vec.y() >= y_coord)) + || ((pt.y() >= y_coord) && (pt.y() + vec.y() <= y_coord)))) { /* The segment crosses y_coord so find x-point and check for min/max. */ - x_coord = (inT16) floor ((y_coord - - pt.y ()) * vec.x () / vec.y () + - pt.x () + 0.5); - if (x_coord < min[i]) - min[i] = x_coord; - x_coord--; //to get pix to left of line - if (x_coord > max[i]) - max[i] = x_coord; + x_coord = (inT16) floor((y_coord - pt.y()) * vec.x() / vec.y() + + pt.x() + 0.5); + // x_coord - 1 to get pix to left of line + UpdateRange(x_coord, x_coord - 1, &min[i], &max[i]); } } } @@ -154,20 +144,14 @@ TBOX PIXROW::bounding_box() const { for (i = 0; i < row_count; i++) { y_coord = row_offset + i; if (min[i] <= max[i]) { - if (y_coord < min_y) - min_y = y_coord; - if (y_coord + 1 > max_y) - max_y = y_coord + 1; - if (min[i] < min_x) - min_x = min[i]; - if (max[i] + 1 > max_x) - max_x = max[i] + 1; + UpdateRange(y_coord, y_coord + 1, &min_y, &max_y); + UpdateRange(min[i], max[i] + 1, &min_x, &max_x); } } if (min_x > max_x || min_y > max_y) - return TBOX (); + return TBOX(); else - return TBOX (ICOORD (min_x, min_y), ICOORD (max_x, max_y)); + return TBOX(ICOORD(min_x, min_y), ICOORD(max_x, max_y)); } @@ -479,10 +463,10 @@ void char_clip_word( // /* Define region for max pixrow expansion */ pix_box = word_box; - pix_box.move_bottom_edge (-pix_word_margin); - pix_box.move_top_edge (pix_word_margin); - pix_box.move_left_edge (-pix_word_margin); - pix_box.move_right_edge (pix_word_margin); + pix_box.move_bottom_edge (-kPixWordMargin); + pix_box.move_top_edge (kPixWordMargin); + pix_box.move_left_edge (-kPixWordMargin); + pix_box.move_right_edge (kPixWordMargin); pix_box -= TBOX (ICOORD (0, 0 + BUG_OFFSET), ICOORD (bin_image.get_xsize (), bin_image.get_ysize () - BUG_OFFSET)); diff --git a/ccmain/charcut.h b/ccmain/charcut.h index 0b791bd14a..838ce86920 100644 --- a/ccmain/charcut.h +++ b/ccmain/charcut.h @@ -16,14 +16,6 @@ ** limitations under the License. * **********************************************************************/ -/** - * @file charcut.h - * @note Formerly charclip.h - * @brief Code for character clipping - * @author Phil Cheatle - * @date Created Wed Nov 11 08:35:15 GMT 1992 - * - */ #ifndef CHARCUT_H #define CHARCUT_H @@ -44,6 +36,8 @@ class ScrollView; * the row defined by min[0] and max[0] is held in row_offset. */ +const int kPixWordMargin = 3; // how far outside word BB to grow + class PIXROW:public ELIST_LINK { public: @@ -126,11 +120,6 @@ class PIXROW:public ELIST_LINK }; ELISTIZEH (PIXROW) -extern INT_VAR_H (pix_word_margin, 3, "How far outside word BB to grow"); -extern BOOL_VAR_H (show_char_clipping, TRUE, "Show clip image window?"); -extern INT_VAR_H (net_image_width, 40, "NN input image width"); -extern INT_VAR_H (net_image_height, 36, "NN input image height"); -extern INT_VAR_H (net_image_x_height, 22, "NN input image x_height"); void char_clip_word( WERD *word, ///< word to be processed IMAGE &bin_image, ///< whole image diff --git a/ccmain/charsample.cpp b/ccmain/charsample.cpp deleted file mode 100644 index 1afd506128..0000000000 --- a/ccmain/charsample.cpp +++ /dev/null @@ -1,709 +0,0 @@ -/********************************************************************** - * File: charsample.cpp (Formerly charsample.c) - * Description: Class to contain character samples and match scores - * to be used for adaption - * Author: Chris Newton - * Created: Thu Oct 7 13:40:37 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mfcpch.h" - -#include -#include -#include -#ifdef __UNIX__ -#include -#include -#endif -#include "memry.h" -#include "tessvars.h" -#include "statistc.h" -#include "charsample.h" -#include "paircmp.h" -#include "matmatch.h" -#include "adaptions.h" -#include "secname.h" -#include "notdll.h" -#include "tesseractclass.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -extern inT32 demo_word; // Hack for demos - -ELISTIZE (CHAR_SAMPLE) ELISTIZE (CHAR_SAMPLES) CHAR_SAMPLE::CHAR_SAMPLE () { - sample_blob = NULL; - sample_denorm = NULL; - sample_image = NULL; - ch = '\0'; - n_samples_matched = 0; - total_match_scores = 0.0; - sumsq_match_scores = 0.0; -} - - -CHAR_SAMPLE::CHAR_SAMPLE(PBLOB *blob, DENORM *denorm, char c) { - sample_blob = blob; - sample_denorm = denorm; - sample_image = NULL; - ch = c; - n_samples_matched = 0; - total_match_scores = 0.0; - sumsq_match_scores = 0.0; -} - - -CHAR_SAMPLE::CHAR_SAMPLE(IMAGE *image, char c) { - sample_blob = NULL; - sample_denorm = NULL; - sample_image = image; - ch = c; - n_samples_matched = 0; - total_match_scores = 0.0; - sumsq_match_scores = 0.0; -} - - -float CHAR_SAMPLE::match_sample( // Update match scores - CHAR_SAMPLE *test_sample, - BOOL8 updating, - tesseract::Tesseract* tess) { - float score1; - float score2; - IMAGE *image = test_sample->image (); - - if (sample_blob != NULL && test_sample->blob () != NULL) { - PBLOB *blob = test_sample->blob (); - DENORM *denorm = test_sample->denorm (); - - score1 = tess->compare_bln_blobs (sample_blob, sample_denorm, blob, denorm); - score2 = tess->compare_bln_blobs (blob, denorm, sample_blob, sample_denorm); - - score1 = (score1 > score2) ? score1 : score2; - } - else if (sample_image != NULL && image != NULL) { - CHAR_PROTO *sample = new CHAR_PROTO (this); - - score1 = matrix_match (sample_image, image); - delete sample; - } - else - return BAD_SCORE; - - if ((tessedit_use_best_sample || tessedit_cluster_debug) && updating) { - n_samples_matched++; - total_match_scores += score1; - sumsq_match_scores += score1 * score1; - } - return score1; -} - - -double CHAR_SAMPLE::mean_score() { - if (n_samples_matched > 0) - return (total_match_scores / n_samples_matched); - else - return BAD_SCORE; -} - - -double CHAR_SAMPLE::variance() { - double mean = mean_score (); - - if (n_samples_matched > 0) { - return (sumsq_match_scores / n_samples_matched) - mean * mean; - } - else - return BAD_SCORE; -} - - -void CHAR_SAMPLE::print(FILE *f) { - if (!tessedit_cluster_debug) - return; - - if (n_samples_matched > 0) - fprintf (f, - "%c - sample matched against " INT32FORMAT - " blobs, mean: %f, var: %f\n", ch, n_samples_matched, - mean_score (), variance ()); - else - fprintf (f, "No matches for this sample (%c)\n", ch); -} - - -void CHAR_SAMPLE::reset_match_statistics() { - n_samples_matched = 0; - total_match_scores = 0.0; - sumsq_match_scores = 0.0; -} - - -CHAR_SAMPLES::CHAR_SAMPLES() { - type = UNKNOWN; - samples.clear (); - ch = '\0'; - best_sample = NULL; - proto = NULL; -} - - -CHAR_SAMPLES::CHAR_SAMPLES(CHAR_SAMPLE *sample) { - CHAR_SAMPLE_IT sample_it = &samples; - - ASSERT_HOST (sample->image () != NULL || sample->blob () != NULL); - - if (sample->image () != NULL) - type = IMAGE_CLUSTER; - else if (sample->blob () != NULL) - type = BLOB_CLUSTER; - - samples.clear (); - sample_it.add_to_end (sample); - if (tessedit_mm_only_match_same_char) - ch = sample->character (); - else - ch = '\0'; - best_sample = NULL; - proto = NULL; -} - - -void CHAR_SAMPLES::add_sample(CHAR_SAMPLE *sample, tesseract::Tesseract* tess) { - CHAR_SAMPLE_IT sample_it = &samples; - - if (tessedit_use_best_sample || tessedit_cluster_debug) - for (sample_it.mark_cycle_pt (); - !sample_it.cycled_list (); sample_it.forward ()) { - sample_it.data ()->match_sample (sample, TRUE, tess); - sample->match_sample (sample_it.data (), TRUE, tess); - } - - sample_it.add_to_end (sample); - - if (tessedit_mm_use_prototypes && type == IMAGE_CLUSTER) { - if (samples.length () == tessedit_mm_prototype_min_size) - this->build_prototype (); - else if (samples.length () > tessedit_mm_prototype_min_size) - this->add_sample_to_prototype (sample); - } -} - - -void CHAR_SAMPLES::add_sample_to_prototype(CHAR_SAMPLE *sample) { - BOOL8 rebuild = FALSE; - inT32 new_xsize = proto->x_size (); - inT32 new_ysize = proto->y_size (); - inT32 sample_xsize = sample->image ()->get_xsize (); - inT32 sample_ysize = sample->image ()->get_ysize (); - - if (sample_xsize > new_xsize) { - new_xsize = sample_xsize; - rebuild = TRUE; - } - if (sample_ysize > new_ysize) { - new_ysize = sample_ysize; - rebuild = TRUE; - } - - if (rebuild) - proto->enlarge_prototype (new_xsize, new_ysize); - - proto->add_sample (sample); -} - - -void CHAR_SAMPLES::build_prototype() { - CHAR_SAMPLE_IT sample_it = &samples; - CHAR_SAMPLE *sample; - inT32 proto_xsize = 0; - inT32 proto_ysize = 0; - - if (type != IMAGE_CLUSTER - || samples.length () < tessedit_mm_prototype_min_size) - return; - - for (sample_it.mark_cycle_pt (); - !sample_it.cycled_list (); sample_it.forward ()) { - sample = sample_it.data (); - if (sample->image ()->get_xsize () > proto_xsize) - proto_xsize = sample->image ()->get_xsize (); - if (sample->image ()->get_ysize () > proto_ysize) - proto_ysize = sample->image ()->get_ysize (); - } - - proto = new CHAR_PROTO (proto_xsize, proto_ysize, 0, 0, '\0'); - - for (sample_it.mark_cycle_pt (); - !sample_it.cycled_list (); sample_it.forward ()) - this->add_sample_to_prototype (sample_it.data ()); - -} - - -void CHAR_SAMPLES::find_best_sample() { - CHAR_SAMPLE_IT sample_it = &samples; - double score; - double best_score = MAX_INT32; - - if (ch == '\0' || samples.length () < tessedit_mm_prototype_min_size) - return; - - for (sample_it.mark_cycle_pt (); - !sample_it.cycled_list (); sample_it.forward ()) { - score = sample_it.data ()->mean_score (); - if (score < best_score) { - best_score = score; - best_sample = sample_it.data (); - } - } - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) { - tprintf ("Best sample for this %c cluster:\n", ch); - best_sample->print (debug_fp); - } - #endif -} - - -float CHAR_SAMPLES::match_score(CHAR_SAMPLE *sample, - tesseract::Tesseract* tess) { - if (tessedit_mm_only_match_same_char && sample->character () != ch) - return BAD_SCORE; - - if (tessedit_use_best_sample && best_sample != NULL) - return best_sample->match_sample (sample, FALSE, tess); - else if ((tessedit_mm_use_prototypes - || tessedit_mm_adapt_using_prototypes) && proto != NULL) - return proto->match_sample (sample); - else - return this->nn_match_score (sample, tess); -} - - -float CHAR_SAMPLES::nn_match_score(CHAR_SAMPLE *sample, - tesseract::Tesseract* tess) { - CHAR_SAMPLE_IT sample_it = &samples; - float score; - float min_score = MAX_INT32; - - for (sample_it.mark_cycle_pt (); - !sample_it.cycled_list (); sample_it.forward ()) { - score = sample_it.data ()->match_sample (sample, FALSE, tess); - if (score < min_score) - min_score = score; - } - - return min_score; -} - - -void CHAR_SAMPLES::assign_to_char() { - STATS char_frequency(FIRST_CHAR, LAST_CHAR); - CHAR_SAMPLE_IT sample_it = &samples; - inT32 i; - inT32 max_index = 0; - inT32 max_freq = 0; - - if (samples.length () == 0 || tessedit_mm_only_match_same_char) - return; - - for (sample_it.mark_cycle_pt (); - !sample_it.cycled_list (); sample_it.forward ()) - char_frequency.add ((inT32) sample_it.data ()->character (), 1); - - for (i = FIRST_CHAR; i <= LAST_CHAR; i++) - if (char_frequency.pile_count (i) > max_freq) { - max_index = i; - max_freq = char_frequency.pile_count (i); - } - - if (samples.length () >= tessedit_cluster_min_size - && max_freq > samples.length () * tessedit_cluster_accept_fraction) - ch = (char) max_index; -} - - -void CHAR_SAMPLES::print(FILE *f) { - CHAR_SAMPLE_IT sample_it = &samples; - - fprintf (f, "Collected " INT32FORMAT " samples\n", samples.length ()); - - #ifndef SECURE_NAMES - if (tessedit_cluster_debug) - for (sample_it.mark_cycle_pt (); - !sample_it.cycled_list (); sample_it.forward ()) - sample_it.data ()->print (f); - - if (ch == '\0') - fprintf (f, "\nCluster not used for adaption\n"); - else - fprintf (f, "\nCluster used to adapt to '%c's\n", ch); - #endif -} - - -CHAR_PROTO::CHAR_PROTO() { - xsize = 0; - ysize = 0; - ch = '\0'; - nsamples = 0; - proto_data = NULL; - proto = NULL; -} - - -CHAR_PROTO::CHAR_PROTO(inT32 x_size, - inT32 y_size, - inT32 n_samples, - float initial_value, - char c) { - inT32 x; - inT32 y; - - xsize = x_size; - ysize = y_size; - ch = c; - nsamples = n_samples; - - ALLOC_2D_ARRAY(xsize, ysize, proto_data, proto, float); - - for (y = 0; y < ysize; y++) - for (x = 0; x < xsize; x++) - proto[x][y] = initial_value; -} - - -CHAR_PROTO::CHAR_PROTO(CHAR_SAMPLE *sample) { - inT32 x; - inT32 y; - IMAGELINE imline_s; - - if (sample->image () == NULL) { - xsize = 0; - ysize = 0; - ch = '\0'; - nsamples = 0; - proto_data = NULL; - proto = NULL; - } - else { - ch = sample->character (); - xsize = sample->image ()->get_xsize (); - ysize = sample->image ()->get_ysize (); - nsamples = 1; - - ALLOC_2D_ARRAY(xsize, ysize, proto_data, proto, float); - - for (y = 0; y < ysize; y++) { - sample->image ()->fast_get_line (0, y, xsize, &imline_s); - for (x = 0; x < xsize; x++) - if (imline_s.pixels[x] == BINIM_WHITE) - proto[x][y] = 1.0; - else - proto[x][y] = -1.0; - } - } -} - - -CHAR_PROTO::~CHAR_PROTO () { - if (proto_data != NULL) - FREE_2D_ARRAY(proto_data, proto); -} - - -float CHAR_PROTO::match_sample(CHAR_SAMPLE *test_sample) { - CHAR_PROTO *test_proto; - float score; - - if (test_sample->image () != NULL) { - test_proto = new CHAR_PROTO (test_sample); - if (xsize > test_proto->x_size ()) - score = this->match (test_proto); - else { - demo_word = -demo_word; // Flag different call - score = test_proto->match (this); - } - } - else - return BAD_SCORE; - - delete test_proto; - - return score; -} - - -float CHAR_PROTO::match(CHAR_PROTO *test_proto) { - inT32 xsize2 = test_proto->x_size (); - inT32 y_size; - inT32 y_size2; - inT32 x_offset; - inT32 y_offset; - inT32 x; - inT32 y; - CHAR_PROTO *match_proto; - float score; - float sum = 0.0; - - ASSERT_HOST (xsize >= xsize2); - - x_offset = (xsize - xsize2) / 2; - - if (ysize < test_proto->y_size ()) { - y_size = test_proto->y_size (); - y_size2 = ysize; - y_offset = (y_size - y_size2) / 2; - - match_proto = new CHAR_PROTO (xsize, - y_size, - nsamples * test_proto->n_samples (), - 0, '\0'); - - for (y = 0; y < y_offset; y++) { - for (x = 0; x < xsize2; x++) { - match_proto->data ()[x + x_offset][y] = - test_proto->data ()[x][y] * nsamples; - sum += match_proto->data ()[x + x_offset][y]; - } - } - - for (y = y_offset + y_size2; y < y_size; y++) { - for (x = 0; x < xsize2; x++) { - match_proto->data ()[x + x_offset][y] = - test_proto->data ()[x][y] * nsamples; - sum += match_proto->data ()[x + x_offset][y]; - } - } - - for (y = y_offset; y < y_offset + y_size2; y++) { - for (x = 0; x < x_offset; x++) { - match_proto->data ()[x][y] = proto[x][y - y_offset] * - test_proto->n_samples (); - sum += match_proto->data ()[x][y]; - } - - for (x = x_offset + xsize2; x < xsize; x++) { - match_proto->data ()[x][y] = proto[x][y - y_offset] * - test_proto->n_samples (); - sum += match_proto->data ()[x][y]; - } - - for (x = x_offset; x < x_offset + xsize2; x++) { - match_proto->data ()[x][y] = - proto[x][y - y_offset] * test_proto->data ()[x - x_offset][y]; - sum += match_proto->data ()[x][y]; - } - } - } - else { - y_size = ysize; - y_size2 = test_proto->y_size (); - y_offset = (y_size - y_size2) / 2; - - match_proto = new CHAR_PROTO (xsize, - y_size, - nsamples * test_proto->n_samples (), - 0, '\0'); - - for (y = 0; y < y_offset; y++) - for (x = 0; x < xsize; x++) { - match_proto->data ()[x][y] = - proto[x][y] * test_proto->n_samples (); - sum += match_proto->data ()[x][y]; - } - - for (y = y_offset + y_size2; y < y_size; y++) - for (x = 0; x < xsize; x++) { - match_proto->data ()[x][y] = - proto[x][y] * test_proto->n_samples (); - sum += match_proto->data ()[x][y]; - } - - for (y = y_offset; y < y_offset + y_size2; y++) { - for (x = 0; x < x_offset; x++) { - match_proto->data ()[x][y] = - proto[x][y] * test_proto->n_samples (); - sum += match_proto->data ()[x][y]; - } - - for (x = x_offset + xsize2; x < xsize; x++) { - match_proto->data ()[x][y] = - proto[x][y] * test_proto->n_samples (); - sum += match_proto->data ()[x][y]; - } - - for (x = x_offset; x < x_offset + xsize2; x++) { - match_proto->data ()[x][y] = proto[x][y] * - test_proto->data ()[x - x_offset][y - y_offset]; - sum += match_proto->data ()[x][y]; - } - } - } - - score = (1.0 - sum / - (xsize * y_size * nsamples * test_proto->n_samples ())); - - if (tessedit_mm_debug) { - if (score < 0) { - tprintf ("Match score %f\n", score); - tprintf ("x: %d, y: %d, ns: %d, nt: %d, dx %d, dy: %d\n", - xsize, y_size, nsamples, test_proto->n_samples (), - x_offset, y_offset); - for (y = 0; y < y_size; y++) { - tprintf ("\n%d", y); - for (x = 0; x < xsize; x++) - tprintf ("\t%d", match_proto->data ()[x][y]); - - } - tprintf ("\n"); - fflush(debug_fp); - } - } - -#ifndef GRAPHICS_DISABLED - if (tessedit_display_mm) { - tprintf ("Match score %f\n", score); - display_images (this->make_image (), - test_proto->make_image (), match_proto->make_image ()); - } - else if (demo_word != 0) { - if (demo_word > 0) - display_image (test_proto->make_image (), "Test sample", - 300, 400, FALSE); - else - display_image (this->make_image (), "Test sample", 300, 400, FALSE); - - display_image (match_proto->make_image (), "Best match", - 700, 400, TRUE); - } -#endif - - delete match_proto; - - return score; -} - - -void CHAR_PROTO::enlarge_prototype(inT32 new_xsize, inT32 new_ysize) { - float *old_proto_data = proto_data; - float **old_proto = proto; - inT32 old_xsize = xsize; - inT32 old_ysize = ysize; - inT32 x_offset; - inT32 y_offset; - inT32 x; - inT32 y; - - ASSERT_HOST (new_xsize >= xsize && new_ysize >= ysize); - - xsize = new_xsize; - ysize = new_ysize; - ALLOC_2D_ARRAY(xsize, ysize, proto_data, proto, float); - x_offset = (xsize - old_xsize) / 2; - y_offset = (ysize - old_ysize) / 2; - - for (y = 0; y < y_offset; y++) - for (x = 0; x < xsize; x++) - proto[x][y] = nsamples; - - for (y = y_offset + old_ysize; y < ysize; y++) - for (x = 0; x < xsize; x++) - proto[x][y] = nsamples; - - for (y = y_offset; y < y_offset + old_ysize; y++) { - for (x = 0; x < x_offset; x++) - proto[x][y] = nsamples; - - for (x = x_offset + old_xsize; x < xsize; x++) - proto[x][y] = nsamples; - - for (x = x_offset; x < x_offset + old_xsize; x++) - proto[x][y] = old_proto[x - x_offset][y - y_offset]; - } - - FREE_2D_ARRAY(old_proto_data, old_proto); -} - - -void CHAR_PROTO::add_sample(CHAR_SAMPLE *sample) { - inT32 x_offset; - inT32 y_offset; - inT32 x; - inT32 y; - IMAGELINE imline_s; - inT32 sample_xsize = sample->image ()->get_xsize (); - inT32 sample_ysize = sample->image ()->get_ysize (); - - x_offset = (xsize - sample_xsize) / 2; - y_offset = (ysize - sample_ysize) / 2; - - ASSERT_HOST (x_offset >= 0 && y_offset >= 0); - - for (y = 0; y < y_offset; y++) - for (x = 0; x < xsize; x++) - proto[x][y]++; // Treat pixels outside the - // range as white - for (y = y_offset + sample_ysize; y < ysize; y++) - for (x = 0; x < xsize; x++) - proto[x][y]++; - - for (y = y_offset; y < y_offset + sample_ysize; y++) { - sample->image ()->fast_get_line (0, - y - y_offset, sample_xsize, &imline_s); - for (x = x_offset; x < x_offset + sample_xsize; x++) { - if (imline_s.pixels[x - x_offset] == BINIM_WHITE) - proto[x][y]++; - else - proto[x][y]--; - } - - for (x = 0; x < x_offset; x++) - proto[x][y]++; - - for (x = x_offset + sample_xsize; x < xsize; x++) - proto[x][y]++; - } - - nsamples++; -} - - -IMAGE *CHAR_PROTO::make_image() { - IMAGE *image; - IMAGELINE imline_p; - inT32 x; - inT32 y; - - ASSERT_HOST (nsamples != 0); - - image = new (IMAGE); - image->create (xsize, ysize, 8); - - for (y = 0; y < ysize; y++) { - image->fast_get_line (0, y, xsize, &imline_p); - - for (x = 0; x < xsize; x++) { - imline_p.pixels[x] = 128 + - (uinT8) ((proto[x][y] * 128.0) / (0.00001 + nsamples)); - } - - image->fast_put_line (0, y, xsize, &imline_p); - } - return image; -} diff --git a/ccmain/charsample.h b/ccmain/charsample.h deleted file mode 100644 index 5e53cc1827..0000000000 --- a/ccmain/charsample.h +++ /dev/null @@ -1,214 +0,0 @@ -/********************************************************************** - * File: charsample.h (Formerly charsample.h) - * Description: Class to contain character samples and match scores - * to be used for adaption - * Author: Chris Newton - * Created: Thu Oct 7 13:40:37 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef CHARSAMPLE_H -#define CHARSAMPLE_H - -#include "elst.h" -#include "pageres.h" -#include "memry.h" -#include "notdll.h" - -#define BAD_SCORE MAX_INT32 -#define FIRST_CHAR '!' -#define LAST_CHAR '~' - -namespace tesseract { - class Tesseract; // Fwd decl. -} - -enum ClusterType -{ UNKNOWN, BLOB_CLUSTER, IMAGE_CLUSTER }; - -class CHAR_SAMPLE; //forward decl - -ELISTIZEH (CHAR_SAMPLE) -class CHAR_SAMPLES; //forward decl - -ELISTIZEH (CHAR_SAMPLES) -class CHAR_PROTO; //forward decl - -class CHAR_SAMPLE:public ELIST_LINK -{ - public: - CHAR_SAMPLE(); // empty constructor - - CHAR_SAMPLE( // simple constructor - PBLOB *blob, - DENORM *denorm, - char c - ); - - CHAR_SAMPLE( // simple constructor - IMAGE *image, - char c - ); - - ~CHAR_SAMPLE () { - // We own the image, so it has to be deleted. - if (sample_image != NULL) - delete sample_image; - } - - float match_sample(CHAR_SAMPLE *test_sample, BOOL8 updating, - tesseract::Tesseract* tess); - - inT32 n_matches() { - return n_samples_matched; - } - - IMAGE *image() { - return sample_image; - } - - PBLOB *blob() { - return sample_blob; - } - - DENORM *denorm() { - return sample_denorm; - } - - double mean_score(); - - double variance(); - - char character() { - return ch; - } - - void print(FILE *f); - - void reset_match_statistics(); - - NEWDELETE2 (CHAR_SAMPLE) private: - IMAGE * sample_image; - PBLOB *sample_blob; - DENORM *sample_denorm; - inT32 n_samples_matched; - double total_match_scores; - double sumsq_match_scores; - char ch; -}; - -class CHAR_SAMPLES:public ELIST_LINK -{ - public: - CHAR_SAMPLES(); //empty constructor - - CHAR_SAMPLES(CHAR_SAMPLE *sample); - - ~CHAR_SAMPLES () { //destructor - } - - inT32 n_samples() { - return samples.length (); - } - - void add_sample(CHAR_SAMPLE *sample, tesseract::Tesseract*); - - void build_prototype(); - - void rebuild_prototype(inT32 new_xsize, inT32 new_ysize); - - void add_sample_to_prototype(CHAR_SAMPLE *sample); - - CHAR_PROTO *prototype() { - return proto; - } - - void find_best_sample(); - - float match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess); - - float nn_match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess); - - char character() { - return ch; - } - - void assign_to_char(); - - void print(FILE *f); - - NEWDELETE2 (CHAR_SAMPLES) private: - ClusterType type; - char ch; - CHAR_PROTO *proto; - CHAR_SAMPLE *best_sample; - CHAR_SAMPLE_LIST samples; -}; - -class CHAR_PROTO -{ - public: - CHAR_PROTO(); // empty constructor - - CHAR_PROTO(inT32 x_size, - inT32 y_size, - inT32 n_samples, - float initial_value, - char c); - - CHAR_PROTO( // simple constructor - CHAR_SAMPLE *sample); - - ~CHAR_PROTO (); - - float match_sample(CHAR_SAMPLE *test_sample); - - float match(CHAR_PROTO *test_proto); - - inT32 n_samples() { - return nsamples; - } - - inT32 x_size() { - return xsize; - } - - inT32 y_size() { - return ysize; - } - - float **data() { - return proto; - } - char character() { - return ch; - } - - void enlarge_prototype(inT32 new_xsize, inT32 new_ysize); - - void add_sample(CHAR_SAMPLE *sample); - - IMAGE *make_image(); - - void print(FILE *f); - - NEWDELETE2 (CHAR_PROTO) private: - inT32 xsize; - inT32 ysize; - float *proto_data; - float **proto; - inT32 nsamples; - char ch; -}; -#endif diff --git a/ccmain/control.cpp b/ccmain/control.cpp index 8ca9b4f6e4..f35235b92c 100644 --- a/ccmain/control.cpp +++ b/ccmain/control.cpp @@ -18,40 +18,35 @@ * **********************************************************************/ -#include "mfcpch.h" +#include "mfcpch.h" -#include "mainblk.h" -#include -#include +#include +#include #ifdef __UNIX__ -#include -#include -#include +#include +#include +#include #endif -#include -#include "ocrclass.h" -#include "werdit.h" -#include "drawfx.h" -#include "tfacep.h" -#include "tessbox.h" -#include "tessvars.h" -//#include "fxtop.h" -#include "pgedit.h" -#include "reject.h" -#include "adaptions.h" -#include "charcut.h" -#include "fixxht.h" -#include "fixspace.h" -#include "genblob.h" -#include "docqual.h" -#include "control.h" -#include "secname.h" -#include "output.h" -#include "callcpp.h" -#include "notdll.h" -#include "tordvars.h" -#include "adaptmatch.h" +#include +#include "ocrclass.h" +#include "werdit.h" +#include "drawfx.h" +#include "tfacep.h" +#include "tessbox.h" +#include "tessvars.h" +#include "pgedit.h" +#include "reject.h" +#include "charcut.h" +#include "fixspace.h" +#include "genblob.h" +#include "docqual.h" +#include "control.h" +#include "secname.h" +#include "output.h" +#include "callcpp.h" +#include "notdll.h" #include "globals.h" +#include "sorthelper.h" #include "tesseractclass.h" // Include automatically generated configuration file if running autoconf. @@ -62,105 +57,9 @@ #define MIN_FONT_ROW_COUNT 8 #define MAX_XHEIGHT_DIFF 3 -#define EXTERN -//extern "C" { -//EXTERN BOOL_VAR(tessedit_small_match,FALSE,"Use small matrix matcher"); - -//extern FILE* matcher_fp; -//extern FILE* correct_fp; -//}; -BOOL_VAR (tessedit_small_match, FALSE, "Use small matrix matcher"); -EXTERN BOOL_VAR (tessedit_print_text, FALSE, "Write text to stdout"); -EXTERN BOOL_VAR (tessedit_draw_words, FALSE, "Draw source words"); -EXTERN BOOL_VAR (tessedit_draw_outwords, FALSE, "Draw output words"); -EXTERN BOOL_VAR (tessedit_training_wiseowl, FALSE, "Call WO to learn blobs"); -EXTERN BOOL_VAR (tessedit_training_tess, FALSE, "Call Tess to learn blobs"); -EXTERN BOOL_VAR (tessedit_matcher_is_wiseowl, FALSE, "Call WO to classify"); -EXTERN BOOL_VAR (tessedit_dump_choices, FALSE, "Dump char choices"); -EXTERN BOOL_VAR (tessedit_fix_fuzzy_spaces, TRUE, -"Try to improve fuzzy spaces"); -EXTERN BOOL_VAR (tessedit_unrej_any_wd, FALSE, -"Dont bother with word plausibility"); -EXTERN BOOL_VAR (tessedit_fix_hyphens, TRUE, "Crunch double hyphens?"); - -EXTERN BOOL_VAR (tessedit_reject_fullstops, FALSE, "Reject all fullstops"); -EXTERN BOOL_VAR (tessedit_reject_suspect_fullstops, FALSE, -"Reject suspect fullstops"); -EXTERN BOOL_VAR (tessedit_redo_xheight, TRUE, "Check/Correct x-height"); -EXTERN BOOL_VAR (tessedit_cluster_adaption_on, TRUE, -"Do our own adaption - ems only"); -EXTERN BOOL_VAR (tessedit_enable_doc_dict, TRUE, -"Add words to the document dictionary"); -EXTERN BOOL_VAR (word_occ_first, FALSE, "Do word occ before re-est xht"); -EXTERN BOOL_VAR (tessedit_debug_fonts, FALSE, "Output font info per char"); -EXTERN BOOL_VAR (tessedit_xht_fiddles_on_done_wds, TRUE, -"Apply xht fix up even if done"); -EXTERN BOOL_VAR (tessedit_xht_fiddles_on_no_rej_wds, TRUE, -"Apply xht fix up even in no rejects"); -EXTERN INT_VAR (x_ht_check_word_occ, 2, "Check Char Block occupancy"); -EXTERN INT_VAR (x_ht_stringency, 1, "How many confirmed a/n to accept?"); -EXTERN BOOL_VAR (x_ht_quality_check, TRUE, "Dont allow worse quality"); -EXTERN BOOL_VAR (tessedit_debug_block_rejection, FALSE, -"Block and Row stats"); -EXTERN INT_VAR (debug_x_ht_level, 0, "Reestimate debug"); -EXTERN BOOL_VAR (rej_use_xht, TRUE, "Individual rejection control"); -EXTERN BOOL_VAR (debug_acceptable_wds, FALSE, "Dump word pass/fail chk"); - -EXTERN STRING_VAR (chs_leading_punct, "('`\"", "Leading punctuation"); -EXTERN -STRING_VAR (chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation"); -EXTERN STRING_VAR (chs_trailing_punct2, ")'`\"", -"2nd Trailing punctuation"); - -EXTERN double_VAR (quality_rej_pc, 0.08, -"good_quality_doc lte rejection limit"); -EXTERN double_VAR (quality_blob_pc, 0.0, -"good_quality_doc gte good blobs limit"); -EXTERN double_VAR (quality_outline_pc, 1.0, -"good_quality_doc lte outline error limit"); -EXTERN double_VAR (quality_char_pc, 0.95, -"good_quality_doc gte good char limit"); -EXTERN INT_VAR (quality_min_initial_alphas_reqd, 2, -"alphas in a good word"); - -EXTERN BOOL_VAR (tessedit_tess_adapt_to_rejmap, FALSE, -"Use reject map to control Tesseract adaption"); -EXTERN INT_VAR (tessedit_tess_adaption_mode, 0x27, -"Adaptation decision algorithm for tess"); -EXTERN INT_VAR (tessedit_em_adaption_mode, 0, -"Adaptation decision algorithm for ems matrix matcher"); -EXTERN BOOL_VAR (tessedit_cluster_adapt_after_pass1, FALSE, -"Adapt using clusterer after pass 1"); -EXTERN BOOL_VAR (tessedit_cluster_adapt_after_pass2, FALSE, -"Adapt using clusterer after pass 1"); -EXTERN BOOL_VAR (tessedit_cluster_adapt_after_pass3, FALSE, -"Adapt using clusterer after pass 1"); -EXTERN BOOL_VAR (tessedit_cluster_adapt_before_pass1, FALSE, -"Adapt using clusterer before Tess adaping during pass 1"); -EXTERN INT_VAR (tessedit_cluster_adaption_mode, 0, -"Adaptation decision algorithm for matrix matcher"); -EXTERN BOOL_VAR (tessedit_adaption_debug, FALSE, -"Generate and print debug information for adaption"); -EXTERN BOOL_VAR (tessedit_minimal_rej_pass1, FALSE, -"Do minimal rejection on pass 1 output"); -EXTERN BOOL_VAR (tessedit_test_adaption, FALSE, -"Test adaption criteria"); -EXTERN BOOL_VAR (tessedit_global_adaption, FALSE, -"Adapt to all docs over time"); -EXTERN BOOL_VAR (tessedit_matcher_log, FALSE, "Log matcher activity"); -EXTERN INT_VAR (tessedit_test_adaption_mode, 3, -"Adaptation decision algorithm for tess"); -EXTERN BOOL_VAR(save_best_choices, FALSE, - "Save the results of the recognition step" - " (blob_choices) within the corresponding WERD_CHOICE"); - -EXTERN BOOL_VAR (test_pt, FALSE, "Test for point"); -EXTERN double_VAR (test_pt_x, 99999.99, "xcoord"); -EXTERN double_VAR (test_pt_y, 99999.99, "ycoord"); - -extern int display_ratings; -extern int number_debug; -FILE *choice_file = NULL; // Choice file ptr +const char* const kBackUpConfigFile = "tempconfigdata.config"; +// Multiple of x-height to make a repeated word have spaces in it. +const double kRepcharGapThreshold = 0.5; CLISTIZEH (PBLOB) CLISTIZE (PBLOB) /* DEBUGGING */ @@ -174,20 +73,21 @@ inT16 blob_count(WERD *w) { * * Make a word from the selected blobs and run Tess on them. * - * @param block_list recognise blobs + * @param page_res recognise blobs * @param selection_box within this box */ namespace tesseract { -void Tesseract::recog_pseudo_word(BLOCK_LIST *block_list, +void Tesseract::recog_pseudo_word(PAGE_RES* page_res, TBOX &selection_box) { WERD *word; ROW *pseudo_row; // row of word BLOCK *pseudo_block; // block of word - word = make_pseudo_word (block_list, selection_box, - pseudo_block, pseudo_row); + word = make_pseudo_word(page_res, selection_box, + pseudo_block, pseudo_row); if (word != NULL) { - recog_interactive(pseudo_block, pseudo_row, word); + WERD_RES word_res(word); + recog_interactive(pseudo_block, pseudo_row, &word_res); delete word; } } @@ -202,32 +102,70 @@ void Tesseract::recog_pseudo_word(BLOCK_LIST *block_list, * @param row row of word * @param word word to recognise */ -BOOL8 Tesseract::recog_interactive(BLOCK *block, - ROW *row, - WERD *word) { - WERD_RES word_res(word); +BOOL8 Tesseract::recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res) { inT16 char_qual; inT16 good_char_qual; - classify_word_pass2(&word_res, block, row); - #ifndef SECURE_NAMES + classify_word_pass2(word_res, block, row); if (tessedit_debug_quality_metrics) { - word_char_quality(&word_res, row, &char_qual, &good_char_qual); + word_char_quality(word_res, row, &char_qual, &good_char_qual); tprintf ("\n%d chars; word_blob_quality: %d; outline_errs: %d; char_quality: %d; good_char_quality: %d\n", - word_res.reject_map.length (), word_blob_quality (&word_res, row), - word_outline_errs (&word_res), char_qual, good_char_qual); + word_res->reject_map.length(), word_blob_quality(word_res, row), + word_outline_errs(word_res), char_qual, good_char_qual); } - #endif return TRUE; } +// Helper function to check for a target word and handle it appropriately. +// Inspired by Jetsoft's requirement to process only single words on pass2 +// and beyond. +// If word_config is not null: +// If the word_box and target_word_box overlap, read the word_config file +// else reset to previous config data. +// return true. +// else +// If the word_box and target_word_box overlap or pass <= 1, return true. +// Note that this function uses a fixed temporary file for storing the previous +// configs, so it is neither thread-safe, nor process-safe, but the assumption +// is that it will only be used for one debug window at a time. +bool Tesseract::ProcessTargetWord(const TBOX& word_box, + const TBOX& target_word_box, + const char* word_config, + int pass) { + if (word_config != NULL) { + if (word_box.major_overlap(target_word_box)) { + if (backup_config_file_ == NULL) { + backup_config_file_ = kBackUpConfigFile; + FILE* config_fp = fopen(backup_config_file_, "wb"); + ParamUtils::PrintParams(config_fp, params()); + fclose(config_fp); + ParamUtils::ReadParamsFile(word_config, false, params()); + } + } else { + if (backup_config_file_ != NULL) { + ParamUtils::ReadParamsFile(backup_config_file_, false, params()); + backup_config_file_ = NULL; + } + } + } else if (pass > 1 && !word_box.major_overlap(target_word_box)) { + return false; + } + return true; +} /** * recog_all_words() * - * Walk the current block list applying the specified word processor function - * to all words + * Walk the page_res, recognizing all the words. + * If monitor is not null, it is used as a progress monitor/timeout/cancel. + * If dopasses is 0, all recognition passes are run, + * 1 just pass 1, 2 passes2 and higher. + * If target_word_box is not null, special things are done to words that + * overlap the target_word_box: + * if word_config is not null, the word config file is read for just the + * target word(s), otherwise, on pass 2 and beyond ONLY the target words + * are processed (Jetsoft modification.) * * @param page_res page structure * @param monitor progress monitor @@ -235,350 +173,276 @@ BOOL8 Tesseract::recog_interactive(BLOCK *block, * @param dopasses 0 - all, 1 just pass 1, 2 passes 2 and higher */ -void Tesseract::recog_all_words(PAGE_RES *page_res, - volatile ETEXT_DESC *monitor, - TBOX *target_word_box, - inT16 dopasses) { - // reset page iterator - static PAGE_RES_IT page_res_it; +void Tesseract::recog_all_words(PAGE_RES* page_res, + ETEXT_DESC* monitor, + const TBOX* target_word_box, + const char* word_config, + int dopasses) { + // reset page iterator + // If we only intend to run cube - run it and return. + if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) { + run_cube(page_res); + return; + } + // Return if we do not want to run Tesseract. + if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY && + tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED) return; + + PAGE_RES_IT page_res_it; inT16 chars_in_word; inT16 rejects_in_word; - static CHAR_SAMPLES_LIST em_clusters; - static CHAR_SAMPLE_LIST ems_waiting; - static CHAR_SAMPLES_LIST char_clusters; - static CHAR_SAMPLE_LIST chars_waiting; inT16 blob_quality = 0; inT16 outline_errs = 0; - static inT16 doc_blob_quality = 0; - static inT16 doc_outline_errs = 0; - static inT16 doc_char_quality = 0; inT16 all_char_quality; inT16 accepted_all_char_quality; - static inT16 good_char_count = 0; - static inT16 doc_good_char_quality = 0; - int i; - - - inT32 tess_adapt_mode = 0; - static inT32 word_count; // count of words in doc inT32 word_index; // current word - static int dict_words; + int i; if (tessedit_minimal_rej_pass1) { tessedit_test_adaption.set_value (TRUE); tessedit_minimal_rejection.set_value (TRUE); } - if (tessedit_cluster_adapt_before_pass1) { - tess_adapt_mode = tessedit_tess_adaption_mode; - tessedit_tess_adaption_mode.set_value (0); - tessedit_tess_adapt_to_rejmap.set_value (TRUE); - } - - -if (dopasses==0 || dopasses==1) -{ - page_res_it.page_res=page_res; - page_res_it.restart_page(); - - /* Pass 1 */ - word_count = 0; - if (monitor != NULL) { - monitor->ocr_alive = TRUE; - while (page_res_it.word () != NULL) { - word_count++; - page_res_it.forward (); - } - page_res_it.restart_page (); - } - else - word_count = 1; + if (dopasses==0 || dopasses==1) { + page_res_it.page_res=page_res; + page_res_it.restart_page(); - word_index = 0; + // ****************** Pass 1 ******************* - em_clusters.clear(); - ems_waiting.clear(); - char_clusters.clear(); - chars_waiting.clear(); - dict_words = 0; - doc_blob_quality = 0; - doc_outline_errs = 0; - doc_char_quality = 0; - good_char_count = 0; - doc_good_char_quality = 0; + // Clear adaptive classifier at the beginning of the page if it is full. + // This is done only at the beginning of the page to ensure that the + // classifier is not reset at an arbitraty point while processing the page, + // which would cripple Passes 2+ if the reset happens towards the end of + // Pass 1 on a page with very difficul text. + // TODO(daria): preemptively clear the classifier if it is almost full. + if (AdaptiveClassifierIsFull()) ResetAdaptiveClassifier(); - while (page_res_it.word () != NULL) { - set_global_loc_code(LOC_PASS1); - word_index++; + stats_.word_count = 0; if (monitor != NULL) { monitor->ocr_alive = TRUE; - monitor->progress = 30 + 50 * word_index / word_count; - if ((monitor->end_time != 0 && clock() > monitor->end_time) || - (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this, - dict_words))) - return; - } - classify_word_pass1(page_res_it.word(), page_res_it.row()->row, - page_res_it.block()->block, FALSE, NULL, NULL); - if (tessedit_dump_choices) { - word_dumper(NULL, page_res_it.row()->row, page_res_it.word()->word); - tprintf("Pass1: %s [%s]\n", - page_res_it.word()->best_choice->unichar_string().string(), - page_res_it.word()->best_choice-> - debug_string(unicharset).string()); + while (page_res_it.word() != NULL) { + stats_.word_count++; + page_res_it.forward(); + } + page_res_it.restart_page(); + } else { + stats_.word_count = 1; } - if (tessedit_test_adaption && !tessedit_minimal_rejection) { - if (!word_adaptable (page_res_it.word (), - tessedit_test_adaption_mode)) { - page_res_it.word ()->reject_map.rej_word_tess_failure(); - // FAKE PERM REJ - } else { - // Override rejection mechanisms for this word. - UNICHAR_ID space = unicharset.unichar_to_id(" "); - for (i = 0; i < page_res_it.word()->best_choice->length(); i++) { - if ((page_res_it.word()->best_choice->unichar_id(i) != space) && - page_res_it.word()->reject_map[i].rejected()) - page_res_it.word ()->reject_map[i].setrej_minimal_rej_accept(); + word_index = 0; + + stats_.dict_words = 0; + stats_.doc_blob_quality = 0; + stats_.doc_outline_errs = 0; + stats_.doc_char_quality = 0; + stats_.good_char_count = 0; + stats_.doc_good_char_quality = 0; + + while (page_res_it.word() != NULL) { + set_global_loc_code(LOC_PASS1); + word_index++; + if (monitor != NULL) { + monitor->ocr_alive = TRUE; + monitor->progress = 30 + 50 * word_index / stats_.word_count; + if (monitor->deadline_exceeded() || + (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this, + stats_.dict_words))) + return; + } + if (target_word_box && + !ProcessTargetWord(page_res_it.word()->word->bounding_box(), + *target_word_box, word_config, 1)) { + page_res_it.forward(); + continue; + } + classify_word_pass1(page_res_it.word(), page_res_it.row()->row, + page_res_it.block()->block); + if (page_res_it.word()->word->flag(W_REP_CHAR)) { + fix_rep_char(&page_res_it); + page_res_it.forward(); + continue; + } + if (tessedit_dump_choices) { + word_dumper(NULL, page_res_it.row()->row, page_res_it.word()); + tprintf("Pass1: %s [%s]\n", + page_res_it.word()->best_choice->unichar_string().string(), + page_res_it.word()->best_choice-> + debug_string(unicharset).string()); + } + + // tessedit_test_adaption enables testing of the accuracy of the + // input to the adaptive classifier. + if (tessedit_test_adaption && !tessedit_minimal_rejection) { + if (!word_adaptable (page_res_it.word(), + tessedit_test_adaption_mode)) { + page_res_it.word()->reject_map.rej_word_tess_failure(); + // FAKE PERM REJ + } else { + // Override rejection mechanisms for this word. + UNICHAR_ID space = unicharset.unichar_to_id(" "); + for (i = 0; i < page_res_it.word()->best_choice->length(); i++) { + if ((page_res_it.word()->best_choice->unichar_id(i) != space) && + page_res_it.word()->reject_map[i].rejected()) + page_res_it.word()->reject_map[i].setrej_minimal_rej_accept(); + } } } - } - if ((tessedit_cluster_adapt_after_pass1 - || tessedit_cluster_adapt_after_pass3 - || tessedit_cluster_adapt_before_pass1) - && tessedit_cluster_adaption_mode != 0) { - collect_characters_for_adaption (page_res_it.word (), - &char_clusters, &chars_waiting); + // Count dict words. + if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM) + ++(stats_.dict_words); + page_res_it.forward(); } - // Count dict words. - if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM) - ++dict_words; - page_res_it.forward (); } - if (tessedit_cluster_adapt_before_pass1) - tessedit_tess_adaption_mode.set_value (tess_adapt_mode); - - page_res_it.restart_page (); - while ((tessedit_cluster_adapt_after_pass1 - || tessedit_cluster_adapt_before_pass1) - && page_res_it.word () != NULL) { - if (monitor != NULL) - monitor->ocr_alive = TRUE; - if (tessedit_cluster_adapt_after_pass1) - adapt_to_good_samples (page_res_it.word (), - &char_clusters, &chars_waiting); - else - classify_word_pass1(page_res_it.word(), - page_res_it.row()->row, - page_res_it.block()->block, - TRUE, &char_clusters, &chars_waiting); - - page_res_it.forward (); - } - - // - - - } + if (dopasses == 1) return; -if (dopasses==1) return; - - /* Pass 2 */ - page_res_it.restart_page (); + // ****************** Pass 2 ******************* + page_res_it.restart_page(); word_index = 0; - while (!tessedit_test_adaption && page_res_it.word () != NULL) { + while (!tessedit_test_adaption && page_res_it.word() != NULL) { set_global_loc_code(LOC_PASS2); word_index++; if (monitor != NULL) { monitor->ocr_alive = TRUE; - monitor->progress = 80 + 10 * word_index / word_count; - if ((monitor->end_time != 0 && clock() > monitor->end_time) || + monitor->progress = 80 + 10 * word_index / stats_.word_count; + if (monitor->deadline_exceeded() || (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this, - dict_words))) + stats_.dict_words))) return; } -//changed by jetsoft -//specific to its needs to extract one word when need - - if (target_word_box) - { - - TBOX current_word_box=page_res_it.word ()->word->bounding_box(); - FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2); - if (!target_word_box->contains(center_pt)) - { - page_res_it.forward (); - continue; - } + // changed by jetsoft + // specific to its needs to extract one word when need + if (target_word_box && + !ProcessTargetWord(page_res_it.word()->word->bounding_box(), + *target_word_box, word_config, 2)) { + page_res_it.forward(); + continue; } -//end jetsoft + // end jetsoft classify_word_pass2(page_res_it.word(), page_res_it.block()->block, page_res_it.row()->row); + if (page_res_it.word()->word->flag(W_REP_CHAR) && + !page_res_it.word()->done) { + fix_rep_char(&page_res_it); + page_res_it.forward(); + continue; + } if (tessedit_dump_choices) { - word_dumper(NULL, page_res_it.row()->row, page_res_it.word()->word); + word_dumper(NULL, page_res_it.row()->row, page_res_it.word()); tprintf("Pass2: %s [%s]\n", page_res_it.word()->best_choice->unichar_string().string(), page_res_it.word()->best_choice-> debug_string(unicharset).string()); } - - if (tessedit_em_adaption_mode > 0) - collect_ems_for_adaption (page_res_it.word (), - &em_clusters, &ems_waiting); - - if (tessedit_cluster_adapt_after_pass2 - && tessedit_cluster_adaption_mode != 0) - collect_characters_for_adaption (page_res_it.word (), - &char_clusters, &chars_waiting); - page_res_it.forward (); + page_res_it.forward(); } - /* Another pass */ + // ****************** Pass 3 ******************* + // Fix fuzzy spaces. set_global_loc_code(LOC_FUZZY_SPACE); if (!tessedit_test_adaption && tessedit_fix_fuzzy_spaces - && !tessedit_word_for_word) - fix_fuzzy_spaces(monitor, word_count, page_res); - - if (!tessedit_test_adaption && tessedit_em_adaption_mode != 0) - // Initially ems only - print_em_stats(&em_clusters, &ems_waiting); + && !tessedit_word_for_word) + fix_fuzzy_spaces(monitor, stats_.word_count, page_res); - /* Pass 3 - used for checking confusion sets */ - page_res_it.restart_page (); + // ****************** Pass 4 ******************* + // Gather statistics on rejects. + page_res_it.restart_page(); word_index = 0; - while (!tessedit_test_adaption && page_res_it.word () != NULL) { + while (!tessedit_test_adaption && page_res_it.word() != NULL) { set_global_loc_code(LOC_MM_ADAPT); word_index++; if (monitor != NULL) { monitor->ocr_alive = TRUE; - monitor->progress = 95 + 5 * word_index / word_count; - } - check_debug_pt (page_res_it.word (), 70); - /* Use good matches to sort out confusions */ - - -//changed by jetsoft -//specific to its needs to extract one word when need - - if (target_word_box) - { - TBOX current_word_box=page_res_it.word ()->word->bounding_box(); - FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2); - if (!target_word_box->contains(center_pt)) - { - page_res_it.forward (); - continue; - } + monitor->progress = 95 + 5 * word_index / stats_.word_count; } -// end jetsoft - - if (tessedit_em_adaption_mode != 0) - adapt_to_good_ems (page_res_it.word (), &em_clusters, &ems_waiting); - - if (tessedit_cluster_adapt_after_pass2 - && tessedit_cluster_adaption_mode != 0) - adapt_to_good_samples (page_res_it.word (), - &char_clusters, &chars_waiting); - - UNICHAR_ID dot = unicharset.unichar_to_id("."); - if (tessedit_reject_fullstops && - page_res_it.word()->best_choice->contains_unichar_id(dot)) { - reject_all_fullstops (page_res_it.word ()); - } else if (tessedit_reject_suspect_fullstops && - page_res_it.word()->best_choice->contains_unichar_id(dot)) { - reject_suspect_fullstops (page_res_it.word ()); + check_debug_pt(page_res_it.word(), 70); + + // changed by jetsoft + // specific to its needs to extract one word when need + if (target_word_box && + !ProcessTargetWord(page_res_it.word()->word->bounding_box(), + *target_word_box, word_config, 4)) { + page_res_it.forward(); + continue; } - - page_res_it.rej_stat_word (); - chars_in_word = page_res_it.word ()->reject_map.length (); - rejects_in_word = page_res_it.word ()->reject_map.reject_count (); - - blob_quality = word_blob_quality (page_res_it.word (), - page_res_it.row ()->row); - doc_blob_quality += blob_quality; - outline_errs = word_outline_errs (page_res_it.word ()); - doc_outline_errs += outline_errs; - word_char_quality (page_res_it.word (), - page_res_it.row ()->row, - &all_char_quality, &accepted_all_char_quality); - doc_char_quality += all_char_quality; - uinT8 permuter_type = page_res_it.word ()->best_choice->permuter (); + // end jetsoft + + page_res_it.rej_stat_word(); + chars_in_word = page_res_it.word()->reject_map.length(); + rejects_in_word = page_res_it.word()->reject_map.reject_count(); + + blob_quality = word_blob_quality(page_res_it.word(), + page_res_it.row()->row); + stats_.doc_blob_quality += blob_quality; + outline_errs = word_outline_errs(page_res_it.word()); + stats_.doc_outline_errs += outline_errs; + word_char_quality(page_res_it.word(), + page_res_it.row()->row, + &all_char_quality, &accepted_all_char_quality); + stats_.doc_char_quality += all_char_quality; + uinT8 permuter_type = page_res_it.word()->best_choice->permuter(); if ((permuter_type == SYSTEM_DAWG_PERM) || - (permuter_type == FREQ_DAWG_PERM) || - (permuter_type == USER_DAWG_PERM)) { - good_char_count += chars_in_word - rejects_in_word; - doc_good_char_quality += accepted_all_char_quality; + (permuter_type == FREQ_DAWG_PERM) || + (permuter_type == USER_DAWG_PERM)) { + stats_.good_char_count += chars_in_word - rejects_in_word; + stats_.doc_good_char_quality += accepted_all_char_quality; } - check_debug_pt (page_res_it.word (), 80); + check_debug_pt(page_res_it.word(), 80); if (tessedit_reject_bad_qual_wds && - (blob_quality == 0) && (outline_errs >= chars_in_word)) - page_res_it.word ()->reject_map.rej_word_bad_quality (); - check_debug_pt (page_res_it.word (), 90); - page_res_it.forward (); + (blob_quality == 0) && (outline_errs >= chars_in_word)) + page_res_it.word()->reject_map.rej_word_bad_quality(); + check_debug_pt(page_res_it.word(), 90); + page_res_it.forward(); } - page_res_it.restart_page (); - while (!tessedit_test_adaption - && tessedit_cluster_adapt_after_pass3 && page_res_it.word () != NULL) { - if (monitor != NULL) - monitor->ocr_alive = TRUE; - -//changed by jetsoft -//specific to its needs to extract one word when need - - if (target_word_box) - { - TBOX current_word_box=page_res_it.word ()->word->bounding_box(); - FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2); - if (!target_word_box->contains(center_pt)) - { - page_res_it.forward (); - continue; - } - } - -//end jetsoft - if (tessedit_cluster_adaption_mode != 0) - adapt_to_good_samples (page_res_it.word (), - &char_clusters, &chars_waiting); - page_res_it.forward (); + // ****************** Pass 5 ******************* + // If cube is loaded and its combiner is present, run it. + if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) { + run_cube(page_res); } - #ifndef SECURE_NAMES if (tessedit_debug_quality_metrics) { tprintf - ("QUALITY: num_chs= %d num_rejs= %d %5.3f blob_qual= %d %5.3f outline_errs= %d %5.3f char_qual= %d %5.3f good_ch_qual= %d %5.3f\n", + ("QUALITY: num_chs= %d num_rejs= %d %5.3f blob_qual= %d %5.3f" + " outline_errs= %d %5.3f char_qual= %d %5.3f good_ch_qual= %d %5.3f\n", page_res->char_count, page_res->rej_count, - page_res->rej_count / (float) page_res->char_count, doc_blob_quality, - doc_blob_quality / (float) page_res->char_count, doc_outline_errs, - doc_outline_errs / (float) page_res->char_count, doc_char_quality, - doc_char_quality / (float) page_res->char_count, - doc_good_char_quality, - good_char_count > - 0 ? doc_good_char_quality / (float) good_char_count : 0.0); + page_res->rej_count / static_cast(page_res->char_count), + stats_.doc_blob_quality, + stats_.doc_blob_quality / static_cast(page_res->char_count), + stats_.doc_outline_errs, + stats_.doc_outline_errs / static_cast(page_res->char_count), + stats_.doc_char_quality, + stats_.doc_char_quality / static_cast(page_res->char_count), + stats_.doc_good_char_quality, + (stats_.good_char_count > 0) ? + (stats_.doc_good_char_quality / + static_cast(stats_.good_char_count)) : 0.0); } - #endif BOOL8 good_quality_doc = - (page_res->rej_count / (float) page_res->char_count <= quality_rej_pc) - && - (doc_blob_quality / (float) page_res->char_count >= quality_blob_pc) && - (doc_outline_errs / (float) page_res->char_count <= quality_outline_pc) && - (doc_char_quality / (float) page_res->char_count >= quality_char_pc); - - /* Do whole document or whole block rejection pass*/ - + ((page_res->rej_count / static_cast(page_res->char_count)) <= + quality_rej_pc) && + (stats_.doc_blob_quality / static_cast(page_res->char_count) >= + quality_blob_pc) && + (stats_.doc_outline_errs / static_cast(page_res->char_count) <= + quality_outline_pc) && + (stats_.doc_char_quality / static_cast(page_res->char_count) >= + quality_char_pc); + + // ****************** Pass 6 ******************* + // Do whole document or whole block rejection pass if (!tessedit_test_adaption) { set_global_loc_code(LOC_DOC_BLK_REJ); quality_based_rejection(page_res_it, good_quality_doc); } + + // ****************** Pass 7 ******************* font_recognition_pass(page_res_it); - /* Write results pass */ + // Write results pass. set_global_loc_code(LOC_WRITE_RESULTS); // This is now redundant, but retained commented so show how to obtain // bounding boxes and style information. @@ -586,8 +450,11 @@ if (dopasses==1) return; // changed by jetsoft // needed for dll to output memory structure if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv)) - output_pass(page_res_it, ocr_char_space() > 0, target_word_box); + output_pass(page_res_it, target_word_box); // end jetsoft + PageSegMode pageseg_mode = static_cast( + static_cast(tessedit_pageseg_mode)); + textord_.CleanupSingleRowResult(pageseg_mode, page_res); } @@ -597,114 +464,49 @@ if (dopasses==1) return; * Baseline normalize the word and pass it to Tess. */ -void Tesseract::classify_word_pass1( //recog one word - WERD_RES *word, //word to do +void Tesseract::classify_word_pass1(WERD_RES *word, // word to do ROW *row, - BLOCK* block, - BOOL8 cluster_adapt, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting) { - WERD *bln_word; //baseline norm copy - //detailed results - BLOB_CHOICE_LIST_CLIST local_blob_choices; - BLOB_CHOICE_LIST_CLIST *blob_choices; + BLOCK* block) { + BLOB_CHOICE_LIST_CLIST *blob_choices = new BLOB_CHOICE_LIST_CLIST(); BOOL8 adapt_ok; const char *rejmap; inT16 index; STRING mapstr = ""; - char *match_string; - char word_string[1024]; - - if (save_best_choices) - blob_choices = new BLOB_CHOICE_LIST_CLIST(); - else - blob_choices = &local_blob_choices; - - if (matcher_fp != NULL) { - fgets (word_string, 1023, correct_fp); - if ((match_string = strchr (word_string, '\r')) != NULL) - *match_string = '\0'; - if ((match_string = strchr (word_string, '\n')) != NULL) - *match_string = '\0'; - if (word_string[0] != '\0') { - word->word->set_text (word_string); - word_answer = (char *) word->word->text (); - } - else - word_answer = NULL; - } - - check_debug_pt (word, 0); - bln_word = make_bln_copy(word->word, row, block, word->x_height, - &word->denorm); - - word->best_choice = tess_segment_pass1 (bln_word, &word->denorm, - &Tesseract::tess_default_matcher, - word->raw_choice, blob_choices, - word->outword); - /* - Test for TESS screw up on word. Recog_word has already ensured that the - choice list, outword blob lists and best_choice string are the same - length. A TESS screw up is indicated by a blank filled or 0 length string. - */ - if ((word->best_choice->length() == 0) || - (strspn (word->best_choice->unichar_string().string(), " ") == - word->best_choice->length())) { - word->done = FALSE; // Try again on pass2 - adaption may help. - word->tess_failed = TRUE; - word->reject_map.initialise(word->best_choice->length()); - word->reject_map.rej_word_tess_failure (); - } else { - word->tess_failed = FALSE; - if ((word->best_choice->length() != - word->outword->blob_list()->length()) || - (word->best_choice->length() != blob_choices->length())) { - tprintf - ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n", - word->best_choice->debug_string(unicharset).string(), - word->best_choice->length(), - word->outword->blob_list()->length(), - blob_choices->length()); - } - ASSERT_HOST(word->best_choice->length() == - word->outword->blob_list()->length()); - ASSERT_HOST(word->best_choice->length() == blob_choices->length()); + check_debug_pt(word, 0); + if (word->SetupForRecognition(unicharset, classify_bln_numeric_mode, + row, block)) + tess_segment_pass1(word, blob_choices); + if (!word->tess_failed) { /* The adaption step used to be here. It has been moved to after make_reject_map so that we know whether the word will be accepted in the first pass or not. This move will PREVENT adaption to words containing double quotes because the word will not be identical to what tess thinks its best choice is. (See CurrentBestChoiceIs in - danj/microfeatures/stopper.c which is used by AdaptableWord in - danj/microfeatures/adaptmatch.c) + stopper.cpp which is used by AdaptableWord in + adaptmatch.cpp) */ - if (word->word->flag(W_REP_CHAR)) { - fix_rep_char(word); - } else { + if (!word->word->flag(W_REP_CHAR)) { // TODO(daria) delete these hacks when replaced by more generic code. // Convert '' (double single) to " (single double). - fix_quotes(word->best_choice, word->outword, blob_choices); + fix_quotes(word, blob_choices); if (tessedit_fix_hyphens) // turn -- to - - fix_hyphens(word->best_choice, word->outword, blob_choices); - record_certainty(word->best_choice->certainty(), 1); - // accounting. + fix_hyphens(word, blob_choices); word->tess_accepted = tess_acceptable_word(word->best_choice, word->raw_choice); - word->tess_would_adapt = tess_adaptable_word(word->outword, - word->best_choice, - word->raw_choice); + word->tess_would_adapt = word->best_choice && word->raw_choice && + AdaptableWord(word->rebuild_word, + *word->best_choice, + *word->raw_choice); // Also sets word->done flag make_reject_map(word, blob_choices, row, 1); adapt_ok = word_adaptable(word, tessedit_tess_adaption_mode); - if (cluster_adapt) - adapt_to_good_samples(word, char_clusters, chars_waiting); - if (adapt_ok || tessedit_tess_adapt_to_rejmap) { if (!tessedit_tess_adapt_to_rejmap) { rejmap = NULL; @@ -720,32 +522,115 @@ void Tesseract::classify_word_pass1( //recog one word } rejmap = mapstr.string(); } - - // adapt to it. - tess_adapter(word->outword, &word->denorm, - *word->best_choice, - *word->raw_choice, rejmap); + // Send word to adaptive classifier for training. + word->BestChoiceToCorrectText(unicharset); + LearnWord(NULL, rejmap, word); } if (tessedit_enable_doc_dict) tess_add_doc_word(word->best_choice); - set_word_fonts(word, blob_choices); } } -#if 0 - if (tessedit_print_text) { - write_cooked_text(bln_word, word->best_choice->string(), - word->done, FALSE, stdout); - } -#endif - delete bln_word; // Save best choices in the WERD_CHOICE if needed - if (blob_choices != &local_blob_choices) { - word->best_choice->set_blob_choices(blob_choices); + word->best_choice->set_blob_choices(blob_choices); +} + +// Helper to switch between the original and new xht word or to discard +// the new xht word, according to accept_new_word. +static void SwitchWordOrDiscard(bool accept_new_word, WERD_RES* word, + WERD_RES* new_word) { + if (accept_new_word) { + // The new_word is deemed superior so put the final results in the real + // word and destroy the old results. + word->denorm = new_word->denorm; + delete word->chopped_word; + word->chopped_word = new_word->chopped_word; + new_word->chopped_word = NULL; + delete word->rebuild_word; + word->rebuild_word = new_word->rebuild_word; + new_word->rebuild_word = NULL; + delete word->box_word; + word->box_word = new_word->box_word; + new_word->box_word = NULL; + free_seam_list(word->seam_array); + word->seam_array = new_word->seam_array; + new_word->seam_array = NULL; + word->best_state.move(&new_word->best_state); + word->correct_text.move(&new_word->correct_text); + delete word->best_choice; + word->best_choice = new_word->best_choice; + new_word->best_choice = NULL; + delete word->raw_choice; + word->raw_choice = new_word->raw_choice; + new_word->raw_choice = NULL; + word->reject_map = new_word->reject_map; + word->done = new_word->done; } else { - blob_choices->deep_clear(); + // The new_word is no better, so destroy it and cleanup. + new_word->ClearResults(); + } +} + +// Helper to report the result of the xheight fix. +void Tesseract::ReportXhtFixResult(bool accept_new_word, float new_x_ht, + WERD_RES* word, WERD_RES* new_word) { + tprintf("New XHT Match:%s = %s ", + word->best_choice->unichar_string().string(), + word->best_choice->debug_string(unicharset).string()); + word->reject_map.print(debug_fp); + tprintf(" -> %s = %s ", + new_word->best_choice->unichar_string().string(), + new_word->best_choice->debug_string(unicharset).string()); + new_word->reject_map.print(debug_fp); + tprintf(" %s->%s %s %s\n", + word->guessed_x_ht ? "GUESS" : "CERT", + new_word->guessed_x_ht ? "GUESS" : "CERT", + new_x_ht > 0.1 ? "STILL DOUBT" : "OK", + accept_new_word ? "ACCEPTED" : ""); +} + +// Run the x-height fix-up, based on min/max top/bottom information in +// unicharset. +// Returns true if the word was changed. +// See the comment in fixxht.cpp for a description of the overall process. +bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) { + bool accept_new_x_ht = false; + int original_misfits = CountMisfitTops(word); + if (original_misfits == 0) + return false; + float new_x_ht = ComputeCompatibleXheight(word); + if (new_x_ht > 0.0f) { + WERD_RES new_x_ht_word(word->word); + new_x_ht_word.x_height = new_x_ht; + new_x_ht_word.caps_height = 0.0; + match_word_pass2(&new_x_ht_word, row, block); + if (!new_x_ht_word.tess_failed) { + int new_misfits = CountMisfitTops(&new_x_ht_word); + if (debug_x_ht_level >= 1) { + tprintf("Old misfits=%d with x-height %f, new=%d with x-height %f\n", + original_misfits, word->x_height, + new_misfits, new_x_ht); + tprintf("Old rating= %f, certainty=%f, new=%f, %f\n", + word->best_choice->rating(), word->best_choice->certainty(), + new_x_ht_word.best_choice->rating(), + new_x_ht_word.best_choice->certainty()); + } + // The misfits must improve and either the rating or certainty. + accept_new_x_ht = new_misfits < original_misfits && + (new_x_ht_word.best_choice->certainty() > + word->best_choice->certainty() || + new_x_ht_word.best_choice->rating() < + word->best_choice->rating()); + if (debug_x_ht_level >= 1) { + ReportXhtFixResult(accept_new_x_ht, new_x_ht, word, &new_x_ht_word); + } + } + SwitchWordOrDiscard(accept_new_x_ht, word, &new_x_ht_word); + if (accept_new_x_ht) + return true; } + return false; } /** @@ -755,189 +640,44 @@ void Tesseract::classify_word_pass1( //recog one word */ void Tesseract::classify_word_pass2(WERD_RES *word, BLOCK* block, ROW *row) { - BOOL8 done_this_pass = FALSE; - WERD_RES new_x_ht_word(word->word); - float new_x_ht = 0.0; - inT16 old_xht_reject_count; - inT16 new_xht_reject_count; - inT16 old_xht_accept_count; - inT16 new_xht_accept_count; - BOOL8 accept_new_x_ht = FALSE; - inT16 old_chs_in_wd; - inT16 new_chs_in_wd; - inT16 old_word_quality; - inT16 new_word_quality; - inT16 dummy; - + bool done_this_pass = false; set_global_subloc_code(SUBLOC_NORM); check_debug_pt(word, 30); - if (!word->done || - tessedit_training_tess || - tessedit_training_wiseowl) { + if (!word->done || tessedit_training_tess) { word->caps_height = 0.0; if (word->x_height == 0.0f) word->x_height = row->x_height(); - if (word->outword != NULL) { - delete word->outword; // get rid of junk - delete word->best_choice; - delete word->raw_choice; - } - match_word_pass2 (word, row, block, word->x_height); + match_word_pass2(word, row, block); done_this_pass = TRUE; - check_debug_pt (word, 40); + check_debug_pt(word, 40); } - if (!word->tess_failed && !word->word->flag (W_REP_CHAR)) { - set_global_subloc_code(SUBLOC_FIX_XHT); - if ((tessedit_xht_fiddles_on_done_wds || !word->done) && - (tessedit_xht_fiddles_on_no_rej_wds || - (word->reject_map.reject_count () > 0))) { - if ((x_ht_check_word_occ >= 2) && word_occ_first) - check_block_occ(word); - - if (tessedit_redo_xheight) - re_estimate_x_ht(word, &new_x_ht); - - if (((x_ht_check_word_occ >= 2) && !word_occ_first) || - ((x_ht_check_word_occ >= 1) && (new_x_ht > 0))) - check_block_occ(word); - } - if (new_x_ht > 0) { - old_chs_in_wd = word->reject_map.length (); - - /* Re-estimated x_ht error suggests a rematch is worthwhile. */ - new_x_ht_word.x_height = new_x_ht; - new_x_ht_word.caps_height = 0.0; - match_word_pass2(&new_x_ht_word, row, block, new_x_ht_word.x_height); - if (!new_x_ht_word.tess_failed) { - if ((x_ht_check_word_occ >= 1) && word_occ_first) - check_block_occ(&new_x_ht_word); - - re_estimate_x_ht(&new_x_ht_word, &new_x_ht); - - if ((x_ht_check_word_occ >= 1) && !word_occ_first) - check_block_occ(&new_x_ht_word); - - old_xht_reject_count = word->reject_map.reject_count (); - old_xht_accept_count = old_chs_in_wd - old_xht_reject_count; - new_xht_reject_count = new_x_ht_word.reject_map.reject_count (); - new_chs_in_wd = new_x_ht_word.reject_map.length (); - new_xht_accept_count = new_chs_in_wd - new_xht_reject_count; - accept_new_x_ht = - ((new_xht_accept_count > old_xht_accept_count) || - ((new_xht_accept_count == old_xht_accept_count) && - (new_xht_accept_count > 0))) && - (!new_x_ht_word.guessed_x_ht || - !new_x_ht_word.guessed_caps_ht); - - if (accept_new_x_ht && x_ht_quality_check) { - word_char_quality(word, row, &old_word_quality, &dummy); - word_char_quality(&new_x_ht_word, row, &new_word_quality, &dummy); - if (old_word_quality > new_word_quality) - accept_new_x_ht = FALSE; - } - - if (accept_new_x_ht && (x_ht_stringency > 0)) { - accept_new_x_ht = - (count_alphanums (&new_x_ht_word) > x_ht_stringency); - if (!accept_new_x_ht && rej_use_xht) { - if (debug_x_ht_level >= 1) - tprintf - ("Failed stringency test so reject original word\n"); - word->reject_map.rej_word_xht_fixup (); - } - } - - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 1) { - tprintf ("New XHT Match:: %s ", - word->best_choice->debug_string(unicharset).string()); - word->reject_map.print (debug_fp); - tprintf (" -> %s ", - new_x_ht_word.best_choice->debug_string( - unicharset).string()); - new_x_ht_word.reject_map.print (debug_fp); - tprintf (" %s->%s %s %s\n", - word->guessed_x_ht ? "GUESS" : "CERT", - new_x_ht_word.guessed_x_ht ? "GUESS" : "CERT", - new_x_ht > 0.1 ? "STILL DOUBT" : "OK", - accept_new_x_ht ? "ACCEPTED" : ""); - } - #endif - } - if (accept_new_x_ht) { - /* - The new x_ht is deemed superior so put the final results in the real - word and destroy the old results - */ - delete word->outword; //get rid of junk - word->outword = new_x_ht_word.outword; - word->denorm = new_x_ht_word.denorm; - delete word->best_choice; - word->best_choice = new_x_ht_word.best_choice; - delete word->raw_choice; - word->raw_choice = new_x_ht_word.raw_choice; - word->reject_map = new_x_ht_word.reject_map; - word->done = new_x_ht_word.done; - done_this_pass = TRUE; - } - else { - /* - The new x_ht is no better, so destroy the copy word and put any - uncertain x or cap ht estimate back to default. (I.e. dont blame - me if its bad!) Conditionally, use any ammended block occ chars. - */ - //get rid of junk - delete new_x_ht_word.outword; - delete new_x_ht_word.best_choice; - delete new_x_ht_word.raw_choice; - } - //to keep new destructor happy - new_x_ht_word.outword = NULL; - //to keep new destructor happy - new_x_ht_word.best_choice = NULL; - //to keep new destructor happy - new_x_ht_word.raw_choice = NULL; - - if (rej_mostly_reject_mode == 2) { - reject_mostly_rejects(word); - tprintf("Rejecting mostly rejects on %s ", - word->best_choice->debug_string(unicharset).string()); - } + if (!word->tess_failed && !word->word->flag(W_REP_CHAR)) { + bool accept_new_xht = false; + if (unicharset.top_bottom_useful() && unicharset.script_has_xheight()) { + // Use the tops and bottoms since they are available. + accept_new_xht = TrainedXheightFix(word, block, row); } + if (accept_new_xht) + done_this_pass = true; set_global_subloc_code(SUBLOC_NORM); - - if (done_this_pass && !word->done && tessedit_save_stats) { - STRING word_str; - word->best_choice->string_and_lengths(unicharset, &word_str, NULL); - SaveBadWord(word_str.string(), word->best_choice->certainty()); - } - record_certainty (word->best_choice->certainty(), 2); - //accounting } #ifndef GRAPHICS_DISABLED if (tessedit_draw_outwords) { if (fx_win == NULL) create_fx_win(); clear_fx_win(); - word->outword->plot (fx_win); - TBOX wbox = word->outword->bounding_box(); + word->rebuild_word->plot(fx_win); + TBOX wbox = word->rebuild_word->bounding_box(); fx_win->ZoomToRectangle(wbox.left(), wbox.top(), wbox.right(), wbox.bottom()); - //make_picture_current(fx_win); ScrollView::Update(); } #endif set_global_subloc_code(SUBLOC_NORM); -#if 0 - if (tessedit_print_text) { - write_cooked_text (word->outword, word->best_choice->string (), - word->done, done_this_pass, stdout); - } -#endif - check_debug_pt (word, 50); + check_debug_pt(word, 50); } @@ -947,97 +687,30 @@ void Tesseract::classify_word_pass2(WERD_RES *word, BLOCK* block, ROW *row) { * Baseline normalize the word and pass it to Tess. */ -void Tesseract::match_word_pass2( //recog one word - WERD_RES *word, //word to do +void Tesseract::match_word_pass2(WERD_RES *word, //word to do ROW *row, - BLOCK* block, - float x_height) { - WERD *bln_word; //baseline norm copy - //detailed results - BLOB_CHOICE_LIST_CLIST local_blob_choices; - BLOB_CHOICE_LIST_CLIST *blob_choices; - - if (save_best_choices) - blob_choices = new BLOB_CHOICE_LIST_CLIST(); - else - blob_choices = &local_blob_choices; + BLOCK* block) { + BLOB_CHOICE_LIST_CLIST *blob_choices = new BLOB_CHOICE_LIST_CLIST(); - set_global_subsubloc_code(SUBSUBLOC_OTHER); - if (matcher_fp != NULL) { - word_answer = (char *) word->word->text (); - if (word_answer != NULL && word_answer[0] == '\0') - word_answer = NULL; - } - bln_word = make_bln_copy (word->word, row, block, x_height, &word->denorm); - set_global_subsubloc_code(SUBSUBLOC_TESS); - if (tessedit_training_tess) - word->best_choice = correct_segment_pass2 (bln_word, - &word->denorm, - &Tesseract::tess_default_matcher, - tess_training_tester, - word->raw_choice, - blob_choices, word->outword); - else { - word->best_choice = tess_segment_pass2 (bln_word, &word->denorm, - &Tesseract::tess_default_matcher, - word->raw_choice, blob_choices, - word->outword); - } - set_global_subsubloc_code(SUBSUBLOC_OTHER); - /* - Test for TESS screw up on word. Recog_word has already ensured that the - choice list, outword blob lists and best_choice string are the same - length. A TESS screw up is indicated by a blank filled or 0 length string. - */ - if ((word->best_choice->length() == 0) || - (strspn (word->best_choice->unichar_string().string (), " ") == - word->best_choice->length())) { - word->tess_failed = TRUE; - word->reject_map.initialise (word->best_choice->length()); - word->reject_map.rej_word_tess_failure (); - // tprintf("Empty word produced\n"); - } - else { - if ((word->best_choice->length() != - word->outword->blob_list()->length ()) || - (word->best_choice->length() != blob_choices->length())) { - tprintf - ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n", - word->best_choice->debug_string(unicharset).string(), - word->best_choice->length(), - word->outword->blob_list()->length(), blob_choices->length()); - } - ASSERT_HOST (word->best_choice->length() == - word->outword->blob_list()->length()); - ASSERT_HOST (word->best_choice->length() == blob_choices->length()); + if (word->SetupForRecognition(unicharset, classify_bln_numeric_mode, + row, block)) + tess_segment_pass2(word, blob_choices); - word->tess_failed = FALSE; - if (word->word->flag (W_REP_CHAR)) { - fix_rep_char(word); - } - else { - fix_quotes (word->best_choice, - word->outword, blob_choices); + if (!word->tess_failed) { + if (!word->word->flag (W_REP_CHAR)) { + fix_quotes(word, blob_choices); if (tessedit_fix_hyphens) - fix_hyphens (word->best_choice, - word->outword, blob_choices); + fix_hyphens(word, blob_choices); /* Dont trust fix_quotes! - though I think I've fixed the bug */ - if ((word->best_choice->length() != - word->outword->blob_list()->length()) || - (word->best_choice->length() != blob_choices->length())) { - #ifndef SECURE_NAMES - tprintf - ("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n", - word->best_choice->debug_string(unicharset).string(), - word->best_choice->length(), - word->outword->blob_list()->length(), blob_choices->length()); - #endif + if (word->best_choice->length() != word->box_word->length() || + word->best_choice->length() != blob_choices->length()) { + tprintf("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;" + " #Blobs=%d; #Choices=%d\n", + word->best_choice->debug_string(unicharset).string(), + word->best_choice->length(), + word->box_word->length(), blob_choices->length()); } - ASSERT_HOST (word->best_choice->length() == - word->outword->blob_list()->length()); - ASSERT_HOST (word->best_choice->length() == blob_choices->length()); - word->tess_accepted = tess_acceptable_word(word->best_choice, word->raw_choice); @@ -1046,69 +719,148 @@ void Tesseract::match_word_pass2( //recog one word } // Save best choices in the WERD_CHOICE if needed - if (blob_choices != &local_blob_choices) - word->best_choice->set_blob_choices(blob_choices); - else - blob_choices->deep_clear(); + word->best_choice->set_blob_choices(blob_choices); - delete bln_word; assert (word->raw_choice != NULL); } -} // namespace tesseract +// Helper to find the BLOB_CHOICE in the bc_list that matches the given +// unichar_id, or NULL if there is no match. +static BLOB_CHOICE* FindMatchingChoice(UNICHAR_ID char_id, + BLOB_CHOICE_LIST* bc_list) { + // Find the corresponding best BLOB_CHOICE. + BLOB_CHOICE_IT choice_it(bc_list); + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); + choice_it.forward()) { + BLOB_CHOICE* choice = choice_it.data(); + if (choice->unichar_id() == char_id) { + return choice; + } + } + return NULL; +} + +// Helper to return the best rated BLOB_CHOICE in the whole word that matches +// the given char_id, or NULL if none can be found. +static BLOB_CHOICE* FindBestMatchingChoice(UNICHAR_ID char_id, + WERD_RES* word_res) { + // Find the corresponding best BLOB_CHOICE from any position in the word_res. + BLOB_CHOICE* best_choice = NULL; + BLOB_CHOICE_LIST_C_IT bc_it(word_res->best_choice->blob_choices()); + for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { + BLOB_CHOICE* choice = FindMatchingChoice(char_id, bc_it.data()); + if (choice != NULL) { + if (best_choice == NULL || choice->rating() < best_choice->rating()) + best_choice = choice; + } + } + return best_choice; +} + +// Helper to insert blob_choice in each location in the leader word if there is +// no matching BLOB_CHOICE there already, and correct any incorrect results +// in the best_choice. +static void CorrectRepcharChoices(BLOB_CHOICE* blob_choice, + WERD_RES* word_res) { + WERD_CHOICE* word = word_res->best_choice; + BLOB_CHOICE_LIST_C_IT bc_it(word->blob_choices()); + for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { + BLOB_CHOICE* choice = FindMatchingChoice(blob_choice->unichar_id(), + bc_it.data()); + if (choice == NULL) { + BLOB_CHOICE_IT choice_it(bc_it.data()); + choice_it.add_before_stay_put(new BLOB_CHOICE(*blob_choice)); + } + } + // Correct any incorrect results in word. + for (int i = 0; i < word->length(); ++i) { + if (word->unichar_id(i) != blob_choice->unichar_id()) + word->set_unichar_id(blob_choice->unichar_id(), i); + } +} -namespace tesseract { /** * fix_rep_char() - * The word is a repeated char. Find the repeated char character. Make a reject - * string which rejects any char other than the voted char. Set the word to done - * to stop rematching it. - * + * The word is a repeated char. (Leader.) Find the repeated char character. + * Create the appropriate single-word or multi-word sequence according to + * the size of spaces in between blobs, and correct the classifications + * where some of the characters disagree with the majority. */ -void Tesseract::fix_rep_char(WERD_RES *word_res) { - struct REP_CH { - UNICHAR_ID unichar_id; - int count; - }; +void Tesseract::fix_rep_char(PAGE_RES_IT* page_res_it) { + WERD_RES *word_res = page_res_it->word(); const WERD_CHOICE &word = *(word_res->best_choice); - REP_CH *rep_ch; // array of char counts - int rep_ch_count = 0; // how many unique chs - int i, j; - int total = 0; - int max = 0; - UNICHAR_ID maxch_id = INVALID_UNICHAR_ID; // most common char + + // Find the frequency of each unique character in the word. UNICHAR_ID space = unicharset.unichar_to_id(" "); + SortHelper rep_ch(word.length()); + for (int i = 0; i < word.length(); ++i) { + if (word.unichar_id(i) != space) + rep_ch.Add(word.unichar_id(i), 1); + } - rep_ch = new REP_CH[word.length()]; - for (i = 0; i < word.length(); ++i) { - for (j = 0; j < rep_ch_count && - rep_ch[j].unichar_id != word.unichar_id(i); ++j); - if (j < rep_ch_count) { - rep_ch[j].count++; - } else { - rep_ch[rep_ch_count].unichar_id = word.unichar_id(i); - rep_ch[rep_ch_count].count = 1; - rep_ch_count++; - } + // Find the most frequent result. + UNICHAR_ID maxch_id = INVALID_UNICHAR_ID; // most common char + int max_count = rep_ch.MaxCount(&maxch_id); + // Find the best exemplar of a classifier result for maxch_id. + BLOB_CHOICE* best_choice = FindBestMatchingChoice(maxch_id, word_res); + if (best_choice == NULL) { + tprintf("Failed to find a choice for %s, occurring %d times\n", + unicharset.debug_str(maxch_id).string(), max_count); + return; } + word_res->done = TRUE; - for (j = 0; j < rep_ch_count; j++) { - total += rep_ch[j].count; - if ((rep_ch[j].count > max) && (rep_ch[j].unichar_id != space)) { - max = rep_ch[j].count; - maxch_id = rep_ch[j].unichar_id; - } + // Measure the mean space. + int total_gap = 0; + int gap_count = 0; + WERD* werd = word_res->word; + C_BLOB_IT blob_it(werd->cblob_list()); + C_BLOB* prev_blob = blob_it.data(); + for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) { + C_BLOB* blob = blob_it.data(); + int gap = blob->bounding_box().left(); + gap -= prev_blob->bounding_box().right(); + total_gap += gap; + ++gap_count; + prev_blob = blob; } - // tprintf( "REPEATED CHAR %s len=%d total=%d choice=%c\n", - // word_str, word_len, total, maxch ); - delete[] rep_ch; - - word_res->reject_map.initialise(word.length()); - for (i = 0; i < word.length(); ++i) { - if (word.unichar_id(i) != maxch_id) - word_res->reject_map[i].setrej_bad_repetition(); // rej unrecognised blobs + if (total_gap > word_res->x_height * gap_count * kRepcharGapThreshold) { + // Needs spaces between. + ExplodeRepeatedWord(best_choice, page_res_it); + } else { + // Just correct existing classification. + CorrectRepcharChoices(best_choice, word_res); + word_res->best_choice->populate_unichars(unicharset); + word_res->reject_map.initialise(word.length()); } - word_res->done = TRUE; +} + +// Explode the word at the given iterator location into individual words +// of a single given unichar_id defined by best_choice. +// The original word is deleted, and the replacements copy most of their +// fields from the original. +void Tesseract::ExplodeRepeatedWord(BLOB_CHOICE* best_choice, + PAGE_RES_IT* page_res_it) { + WERD_RES *word_res = page_res_it->word(); + ASSERT_HOST(best_choice != NULL); + + // Make a new word for each blob in the original. + WERD* werd = word_res->word; + C_BLOB_IT blob_it(werd->cblob_list()); + for (; !blob_it.empty(); blob_it.forward()) { + bool first_blob = blob_it.at_first(); + bool last_blob = blob_it.at_last(); + WERD* blob_word = werd->ConstructFromSingleBlob(first_blob, last_blob, + blob_it.extract()); + WERD_RES* rep_word = page_res_it->InsertCloneWord(*word_res, blob_word); + // Setup the single char WERD_RES + rep_word->SetupForRecognition(unicharset, false, page_res_it->row()->row, + page_res_it->block()->block); + rep_word->CloneChoppedToRebuild(); + BLOB_CHOICE* blob_choice = new BLOB_CHOICE(*best_choice); + rep_word->FakeClassifyWord(unicharset, 1, &blob_choice); + } + page_res_it->DeleteCurrentWord(); } // TODO(tkielbus) Decide between keeping this behavior here or modifying the @@ -1131,54 +883,51 @@ static int is_simple_quote(const char* signed_str, int length) { *(str + 2) == 0x99))); } +// Callback helper for fix_quotes returns a double quote if both +// arguments are quote, otherwise INVALID_UNICHAR_ID. +UNICHAR_ID Tesseract::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) { + const char *ch = unicharset.id_to_unichar(id1); + const char *next_ch = unicharset.id_to_unichar(id2); + if (is_simple_quote(ch, strlen(ch)) && + is_simple_quote(next_ch, strlen(next_ch))) + return unicharset.unichar_to_id("\""); + return INVALID_UNICHAR_ID; +} + /** * fix_quotes * * Change pairs of quotes to double quotes. */ -void Tesseract::fix_quotes(WERD_CHOICE *choice, //choice to fix - WERD *word, //word to do //char choices - BLOB_CHOICE_LIST_CLIST *blob_choices) { +void Tesseract::fix_quotes(WERD_RES* word_res, + BLOB_CHOICE_LIST_CLIST* blob_choices) { if (!unicharset.contains_unichar("\"") || !unicharset.get_enabled(unicharset.unichar_to_id("\""))) return; // Don't create it if it is disallowed. - PBLOB_IT blob_it = word->blob_list(); // blobs - BLOB_CHOICE_LIST_C_IT blob_choices_it = blob_choices; // choices - BLOB_CHOICE_IT it1; // first choices - BLOB_CHOICE_IT it2; // second choices + word_res->ConditionalBlobMerge( + unicharset, + NewPermanentTessCallback(this, &Tesseract::BothQuotes), + NULL, + blob_choices); +} - int i; - int modified = false; - for (i = 0; i < choice->length()-1; - ++i, blob_it.forward(), blob_choices_it.forward()) { - const char *ch = unicharset.id_to_unichar(choice->unichar_id(i)); - const char *next_ch = unicharset.id_to_unichar(choice->unichar_id(i+1)); - if (is_simple_quote(ch, strlen(ch)) && - is_simple_quote(next_ch, strlen(next_ch))) { - choice->set_unichar_id(unicharset.unichar_to_id("\""), i); - choice->remove_unichar_id(i+1); - modified = true; - merge_blobs(blob_it.data(), blob_it.data_relative(1)); - blob_it.forward(); - delete blob_it.extract(); // get rid of spare - - it1.set_to_list(blob_choices_it.data()); - it2.set_to_list(blob_choices_it.data_relative(1)); - if (it1.data()->certainty() < it2.data()->certainty()) { - blob_choices_it.forward(); - delete blob_choices_it.extract(); // get rid of spare - } else { - delete blob_choices_it.extract(); // get rid of spare - blob_choices_it.forward(); - } - } - } - if (modified) { - choice->populate_unichars(unicharset); - } +// Callback helper for fix_hyphens returns UNICHAR_ID of - if both +// arguments are hyphen, otherwise INVALID_UNICHAR_ID. +UNICHAR_ID Tesseract::BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2) { + const char *ch = unicharset.id_to_unichar(id1); + const char *next_ch = unicharset.id_to_unichar(id2); + if (strlen(ch) == 1 && strlen(next_ch) == 1 && + (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~')) + return unicharset.unichar_to_id("-"); + return INVALID_UNICHAR_ID; } +// Callback helper for fix_hyphens returns true if box1 and box2 overlap +// (assuming both on the same textline, are in order and a chopped em dash.) +bool Tesseract::HyphenBoxesOverlap(const TBOX& box1, const TBOX& box2) { + return box1.right() >= box2.left(); +} /** * fix_hyphens @@ -1186,50 +935,17 @@ void Tesseract::fix_quotes(WERD_CHOICE *choice, //choice to fix * Change pairs of hyphens to a single hyphen if the bounding boxes touch * Typically a long dash which has been segmented. */ -void Tesseract::fix_hyphens( //crunch double hyphens - WERD_CHOICE *choice, //choice to fix - WERD *word, //word to do //char choices +void Tesseract::fix_hyphens(WERD_RES *word_res, BLOB_CHOICE_LIST_CLIST *blob_choices) { if (!unicharset.contains_unichar("-") || !unicharset.get_enabled(unicharset.unichar_to_id("-"))) return; // Don't create it if it is disallowed. - PBLOB_IT blob_it = word->blob_list(); - BLOB_CHOICE_LIST_C_IT blob_choices_it = blob_choices; - BLOB_CHOICE_IT it1; // first choices - BLOB_CHOICE_IT it2; // second choices - - bool modified = false; - for (int i = 0; i+1 < choice->length(); - ++i, blob_it.forward (), blob_choices_it.forward ()) { - const char *ch = unicharset.id_to_unichar(choice->unichar_id(i)); - const char *next_ch = unicharset.id_to_unichar(choice->unichar_id(i+1)); - if (strlen(ch) != 1 || strlen(next_ch) != 1) continue; - if ((*ch == '-' || *ch == '~') && - (*next_ch == '-' || *next_ch == '~') && - (blob_it.data()->bounding_box().right() >= - blob_it.data_relative(1)->bounding_box().left ())) { - choice->set_unichar_id(unicharset.unichar_to_id("-"), i); - choice->remove_unichar_id(i+1); - modified = true; - merge_blobs(blob_it.data(), blob_it.data_relative(1)); - blob_it.forward(); - delete blob_it.extract(); // get rid of spare - - it1.set_to_list(blob_choices_it.data()); - it2.set_to_list(blob_choices_it.data_relative(1)); - if (it1.data()->certainty() < it2.data()->certainty()) { - blob_choices_it.forward(); - delete blob_choices_it.extract(); // get rid of spare - } else { - delete blob_choices_it.extract(); // get rid of spare - blob_choices_it.forward(); - } - } - } - if (modified) { - choice->populate_unichars(unicharset); - } + word_res->ConditionalBlobMerge( + unicharset, + NewPermanentTessCallback(this, &Tesseract::BothHyphens), + NewPermanentTessCallback(this, &Tesseract::HyphenBoxesOverlap), + blob_choices); } } // namespace tesseract @@ -1252,85 +968,8 @@ void merge_blobs( //combine 2 blobs outline_it.add_list_after (blob2->out_list ()); } - -/********************************************************************** - * choice_dump_tester - * - * Matcher tester function which generates .chc file entries. - * Called via test_segment_pass2 for every blob tested by tess in a word. - * (But only for words for which a correct segmentation could be found.) - **********************************************************************/ -/* DEADCODE -void choice_dump_tester( //dump chars in word - PBLOB *, //blob - DENORM *, //de-normaliser - BOOL8 correct, //ly segmented - char *text, //correct text - inT32 count, //chars in text - BLOB_CHOICE_LIST *ratings //list of results - ) { - STRING choice_file_name; - BLOB_CHOICE *blob_choice; - BLOB_CHOICE_IT it; - char source_chars[20]; - char correct_char[3]; - - if (choice_file == NULL) { - choice_file_name = imagebasename + ".chc"; - if (!(choice_file = fopen (choice_file_name.string (), "w"))) { - CANTOPENFILE.error ("choice_dump_tester", EXIT, "%s %d", - choice_file_name.string (), errno); - } - } - - if ((count == 0) || (text == NULL) || (text[0] == '\0')) { - strcpy (source_chars, "$$"); - strcpy (correct_char, "$$"); - } - else { - strncpy(source_chars, text, count); - source_chars[count] = '\0'; - if (correct) { - correct_char[0] = text[0]; - correct_char[1] = '\0'; - } - else { - strcpy (correct_char, "$$"); - } - } - fprintf (choice_file, "%s\t%s", source_chars, correct_char); - - it.set_to_list (ratings); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - blob_choice = it.data (); - fprintf (choice_file, "\t%s\t%f\t%f", - blob_choice->unichar (), - blob_choice->rating (), blob_choice->certainty ()); - } - fprintf (choice_file, "\n"); -} -*/ - -/** - * make_bln_copy() - * - * Generate a baseline normalised copy of the source word. The copy is done so - * that whatever format the original word is in, a polygonal bln version is - * generated as output. - */ - -WERD *make_bln_copy(WERD *src_word, ROW *row, BLOCK* block, - float x_height, DENORM *denorm) { - WERD *result = src_word->poly_copy(row->x_height()); - - result->baseline_normalise_x(row, x_height, denorm); - if (block != NULL) - denorm->set_block(block); - return result; -} - - namespace tesseract { + ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(const char *s, const char *lengths) { int i = 0; @@ -1438,11 +1077,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(const char *s, return word_type; } -} // namespace tesseract - -/* DEBUGGING ROUTINE */ - -BOOL8 check_debug_pt(WERD_RES *word, int location) { +BOOL8 Tesseract::check_debug_pt(WERD_RES *word, int location) { BOOL8 show_map_detail = FALSE; inT16 i; @@ -1452,23 +1087,17 @@ BOOL8 check_debug_pt(WERD_RES *word, int location) { tessedit_rejection_debug.set_value (FALSE); debug_x_ht_level.set_value (0); - tessedit_cluster_debug.set_value (FALSE); - nn_debug.set_value (FALSE); - nn_reject_debug.set_value (FALSE); if (word->word->bounding_box ().contains (FCOORD (test_pt_x, test_pt_y))) { if (location < 0) - return TRUE; //For breakpoint use + return TRUE; // For breakpoint use tessedit_rejection_debug.set_value (TRUE); debug_x_ht_level.set_value (20); - tessedit_cluster_debug.set_value (TRUE); - nn_debug.set_value (TRUE); - nn_reject_debug.set_value (TRUE); tprintf ("\n\nTESTWD::"); switch (location) { case 0: tprintf ("classify_word_pass1 start\n"); - word->word->print (debug_fp); + word->word->print(); break; case 10: tprintf ("make_reject_map: initial map"); @@ -1520,7 +1149,6 @@ BOOL8 check_debug_pt(WERD_RES *word, int location) { word->reject_map[i].full_print(debug_fp); } } - tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); return TRUE; @@ -1530,47 +1158,77 @@ BOOL8 check_debug_pt(WERD_RES *word, int location) { return FALSE; } +/** + * find_modal_font + * + * Find the modal font and remove from the stats. + */ +static void find_modal_font( //good chars in word + STATS *fonts, //font stats + inT8 *font_out, //output font + inT8 *font_count //output count + ) { + inT8 font; //font index + inT32 count; //pile couat + + if (fonts->get_total () > 0) { + font = (inT8) fonts->mode (); + *font_out = font; + count = fonts->pile_count (font); + *font_count = count < MAX_INT8 ? count : MAX_INT8; + fonts->add (font, -*font_count); + } + else { + *font_out = -1; + *font_count = 0; + } +} /** * set_word_fonts * * Get the fonts for the word. */ -namespace tesseract { -void Tesseract::set_word_fonts( - WERD_RES *word, // word to adapt to - BLOB_CHOICE_LIST_CLIST *blob_choices // detailed results - ) { +void Tesseract::set_word_fonts(WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices) { + if (blob_choices == NULL) return; + // Don't try to set the word fonts for a cube word, as the configs + // will be meaningless. + if (word->chopped_word == NULL) return; + inT32 index; // char id index - UNICHAR_ID choice_char_id; // char id from word - inT8 config; // font of char // character iterator BLOB_CHOICE_LIST_C_IT char_it = blob_choices; BLOB_CHOICE_IT choice_it; // choice iterator int fontinfo_size = get_fontinfo_table().size(); int fontset_size = get_fontset_table().size(); - if (fontinfo_size == 0 || fontset_size == 0) - return; + if (fontinfo_size == 0 || fontset_size == 0) return; STATS fonts(0, fontinfo_size); // font counters word->italic = 0; word->bold = 0; + if (!word->best_choice_fontinfo_ids.empty()) { + word->best_choice_fontinfo_ids.clear(); + } + // Compute the modal font for the word for (char_it.mark_cycle_pt(), index = 0; !char_it.cycled_list(); ++index, char_it.forward()) { - choice_char_id = word->best_choice->unichar_id(index); + UNICHAR_ID word_ch_id = word->best_choice->unichar_id(index); + if (word_ch_id >= PreTrainedTemplates->NumClasses) + return; // This must be a cube word. choice_it.set_to_list(char_it.data()); for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) { - if (choice_it.data()->unichar_id() == choice_char_id) { - config = choice_it.data()->config(); - int class_id = choice_it.data()->unichar_id(); - int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id; + UNICHAR_ID blob_ch_id = choice_it.data()->unichar_id(); + if (blob_ch_id == word_ch_id) { + int config = choice_it.data()->config(); + int config2 = choice_it.data()->config2(); + int font_set_id = PreTrainedTemplates->Class[blob_ch_id]->font_set_id; if (font_set_id >= 0 && config >= 0 && font_set_id < fontset_size) { FontSet font_set = get_fontset_table().get(font_set_id); if (tessedit_debug_fonts) { - tprintf("%s(%d=%d%c%c)", unicharset.id_to_unichar(choice_char_id), - config, (config & 31) >> 2, - config & 2 ? 'N' : 'B', config & 1 ? 'N' : 'I'); + tprintf("%s(%d/%d)", unicharset.id_to_unichar(blob_ch_id), + config, config2); const char* fontname; if (config >= font_set.size) { fontname = "Unknown"; @@ -1582,12 +1240,16 @@ void Tesseract::set_word_fonts( unicharset.id_to_unichar(choice_it.data()->unichar_id()), font_set_id, config, fontname); } + // 1st choice config gets 2 pts, 2nd choice 1 pt. if (config < font_set.size) { int fontinfo_id = font_set.configs[config]; if (fontinfo_id < fontinfo_size) { - FontInfo fi = get_fontinfo_table().get(fontinfo_id); - word->italic += fi.is_italic(); - word->bold += fi.is_bold(); + fonts.add(fontinfo_id, 2); + } + } + if (config2 >= 0 && config2 < font_set.size) { + int fontinfo_id = font_set.configs[config2]; + if (fontinfo_id < fontinfo_size) { fonts.add(fontinfo_id, 1); } } @@ -1598,33 +1260,25 @@ void Tesseract::set_word_fonts( } find_modal_font(&fonts, &word->font1, &word->font1_count); find_modal_font(&fonts, &word->font2, &word->font2_count); - if (tessedit_debug_fonts) - tprintf("\n"); + // All the blobs get the word's best choice font. + for (int i = 0; i < word->best_choice->length(); ++i) { + word->best_choice_fontinfo_ids.push_back(word->font1); + } if (word->font1_count > 0) { - word->italic = word->bold = 0; - for (char_it.mark_cycle_pt(), index = 0; - !char_it.cycled_list(); char_it.forward(), ++index) { - choice_char_id = word->best_choice->unichar_id(index); - choice_it.set_to_list(char_it.data()); - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - if (choice_it.data()->unichar_id() == choice_char_id) { - config = choice_it.data()->config(); - int class_id = choice_it.data()->unichar_id(); - int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id; - if (font_set_id >= 0 && config >= 0 && font_set_id < fontset_size) { - int fontinfo_id = get_fontset_table().get(font_set_id). - configs[config]; - if (fontinfo_id == word->font1 && fontinfo_id < fontinfo_size) { - FontInfo fi = fontinfo_table_.get(fontinfo_id); - word->italic += fi.is_italic(); - word->bold += fi.is_bold(); - } - } - break; - } + FontInfo fi = fontinfo_table_.get(word->font1); + if (tessedit_debug_fonts) { + if (word->font2_count > 0) { + tprintf("Word modal font=%s, score=%d, 2nd choice %s/%d\n", + fi.name, word->font1_count, + fontinfo_table_.get(word->font2).name, word->font2_count); + } else { + tprintf("Word modal font=%s, score=%d. No 2nd choice\n", + fi.name, word->font1_count); } } + // 1st choices got 2 pts, so we need to halve the score for the mode. + word->italic = (fi.is_italic() ? 1 : -1) * (word->font1_count + 1) / 2; + word->bold = (fi.is_bold() ? 1 : -1) * (word->font1_count + 1) / 2; } } @@ -1641,173 +1295,42 @@ void Tesseract::font_recognition_pass( //good chars in word inT32 count; //of a feature inT8 doc_font; //modal font inT8 doc_font_count; //modal font - inT32 doc_italic; //total italics - inT32 doc_bold; //total bolds - ROW_RES *row = NULL; //current row WERD_RES *word; //current word - STATS fonts (0, get_fontinfo_table().size() ? - get_fontinfo_table().size() : 32); // font counters STATS doc_fonts (0, get_fontinfo_table().size() ? get_fontinfo_table().size() : 32); // font counters - doc_italic = 0; - doc_bold = 0; - page_res_it.restart_page (); - while (page_res_it.word () != NULL) { - if (row != page_res_it.row ()) { - if (row != NULL) { - find_modal_font (&fonts, &row->font1, &row->font1_count); - find_modal_font (&fonts, &row->font2, &row->font2_count); - } - row = page_res_it.row (); //current row - fonts.clear (); //clear counters - row->italic = 0; - row->bold = 0; + page_res_it.restart_page(); + while (page_res_it.word() != NULL) { + word = page_res_it.word(); + set_word_fonts(word, word->best_choice->blob_choices()); + if (!save_best_choices) { // set_blob_choices() does a deep clear + word->best_choice->set_blob_choices(NULL); } - word = page_res_it.word (); - row->italic += word->italic; - row->bold += word->bold; - fonts.add (word->font1, word->font1_count); - fonts.add (word->font2, word->font2_count); - doc_italic += word->italic; - doc_bold += word->bold; - doc_fonts.add (word->font1, word->font1_count); - doc_fonts.add (word->font2, word->font2_count); - page_res_it.forward (); - } - if (row != NULL) { - find_modal_font (&fonts, &row->font1, &row->font1_count); - find_modal_font (&fonts, &row->font2, &row->font2_count); + doc_fonts.add(word->font1, word->font1_count); + doc_fonts.add(word->font2, word->font2_count); + page_res_it.forward(); } find_modal_font(&doc_fonts, &doc_font, &doc_font_count); - /* - row=NULL; - page_res_it.restart_page(); - while (page_res_it.word() != NULL) - { - if (row!=page_res_it.row()) - { - row2=row; - row=page_res_it.row(); - if (row->font1_countrow->x_height()-row2->row->x_height(); - if (hdiff<0) - hdiff=-hdiff; - if (hdiffrow->x_height()-row2->row->x_height(); - if (hdiff<0) - hdiff=-hdiff; - if (hdiffitalic=italic; - row->bold=bold; - find_modal_font(&fonts,&row->font1,&row->font1_count); - find_modal_font(&fonts,&row->font2,&row->font2_count); - } - else - page_res_it.forward(); - } - else - page_res_it.forward(); - }*/ + if (doc_font_count == 0) + return; + FontInfo fi = fontinfo_table_.get(doc_font); page_res_it.restart_page (); while (page_res_it.word () != NULL) { - row = page_res_it.row (); //current row word = page_res_it.word (); length = word->best_choice->length(); - count = word->italic; - if (count < 0) - count = -count; - if (!(count == length || (length > 3 && count >= length * 3 / 4))) - word->italic = doc_italic > 0 ? 1 : -1; - - count = word->bold; - if (count < 0) - count = -count; - if (!(count == length || (length > 3 && count >= length * 3 / 4))) - word->bold = doc_bold > 0 ? 1 : -1; - - count = word->font1_count; + // 1st choices got 2 pts, so we need to halve the score for the mode. + count = (word->font1_count + 1) / 2; if (!(count == length || (length > 3 && count >= length * 3 / 4))) { word->font1 = doc_font; - word->font1_count = doc_font_count; + // Counts only get 1 as it came from the doc. + word->font1_count = 1; + word->italic = fi.is_italic() ? 1 : -1; + word->bold = fi.is_bold() ? 1 : -1; } - - page_res_it.forward (); + page_res_it.forward(); } } -} // namespace tesseract - - -/** - * add_in_one_row - * - * Add into the stats for one row. - */ -//dead code? -void add_in_one_row( //good chars in word - ROW_RES *row, //current row - STATS *fonts, //font stats - inT8 *italic, //output count - inT8 *bold //output count - ) { - WERD_RES *word; //current word - WERD_RES_IT word_it = &row->word_res_list; - - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - *italic += word->italic; - *bold += word->bold; - if (word->font1_count > 0) - fonts->add (word->font1, word->font1_count); - if (word->font2_count > 0) - fonts->add (word->font2, word->font2_count); - } -} - - -/** - * find_modal_font - * - * Find the modal font and remove from the stats. - */ -//make static? -void find_modal_font( //good chars in word - STATS *fonts, //font stats - inT8 *font_out, //output font - inT8 *font_count //output count - ) { - inT8 font; //font index - inT32 count; //pile couat - - if (fonts->get_total () > 0) { - font = (inT8) fonts->mode (); - *font_out = font; - count = fonts->pile_count (font); - *font_count = count < MAX_INT8 ? count : MAX_INT8; - fonts->add (font, -*font_count); - } - else { - *font_out = -1; - *font_count = 0; - } -} +} // namespace tesseract diff --git a/ccmain/control.h b/ccmain/control.h index 61e8e1d188..5a16154366 100644 --- a/ccmain/control.h +++ b/ccmain/control.h @@ -25,16 +25,11 @@ #ifndef CONTROL_H #define CONTROL_H -#include "varable.h" +#include "params.h" #include "ocrblock.h" -//#include "epapdest.h" #include "ratngs.h" #include "statistc.h" -//#include "epapconv.h" -#include "ocrshell.h" #include "pageres.h" -//TODO (wanke) why does the app. path have to be so weird here? -#include "charsample.h" #include "notdll.h" enum ACCEPTABLE_WERD_TYPE @@ -49,129 +44,12 @@ enum ACCEPTABLE_WERD_TYPE typedef BOOL8 (*BLOB_REJECTOR) (PBLOB *, BLOB_CHOICE_IT *, void *); -extern INT_VAR_H (tessedit_single_match, FALSE, "Top choice only from CP"); -//extern BOOL_VAR_H(tessedit_small_match,FALSE,"Use small matrix matcher"); -extern BOOL_VAR_H (tessedit_print_text, FALSE, "Write text to stdout"); -extern BOOL_VAR_H (tessedit_draw_words, FALSE, "Draw source words"); -extern BOOL_VAR_H (tessedit_draw_outwords, FALSE, "Draw output words"); -extern BOOL_VAR_H (tessedit_training_wiseowl, FALSE, -"Call WO to learn blobs"); -extern BOOL_VAR_H (tessedit_training_tess, FALSE, "Call Tess to learn blobs"); -extern BOOL_VAR_H (tessedit_matcher_is_wiseowl, FALSE, "Call WO to classify"); -extern BOOL_VAR_H (tessedit_dump_choices, FALSE, "Dump char choices"); -extern BOOL_VAR_H (tessedit_fix_fuzzy_spaces, TRUE, -"Try to improve fuzzy spaces"); -extern BOOL_VAR_H (tessedit_unrej_any_wd, FALSE, -"Dont bother with word plausibility"); -extern BOOL_VAR_H (tessedit_fix_hyphens, TRUE, "Crunch double hyphens?"); -extern BOOL_VAR_H (tessedit_reject_fullstops, FALSE, "Reject all fullstops"); -extern BOOL_VAR_H (tessedit_reject_suspect_fullstops, FALSE, -"Reject suspect fullstops"); -extern BOOL_VAR_H (tessedit_redo_xheight, TRUE, "Check/Correct x-height"); -extern BOOL_VAR_H (tessedit_cluster_adaption_on, TRUE, -"Do our own adaption - ems only"); -extern BOOL_VAR_H (tessedit_enable_doc_dict, TRUE, -"Add words to the document dictionary"); -extern BOOL_VAR_H (word_occ_first, FALSE, "Do word occ before re-est xht"); -extern BOOL_VAR_H (tessedit_xht_fiddles_on_done_wds, TRUE, -"Apply xht fix up even if done"); -extern BOOL_VAR_H (tessedit_xht_fiddles_on_no_rej_wds, TRUE, -"Apply xht fix up even in no rejects"); -extern INT_VAR_H (x_ht_check_word_occ, 2, "Check Char Block occupancy"); -extern INT_VAR_H (x_ht_stringency, 1, "How many confirmed a/n to accept?"); -extern BOOL_VAR_H (x_ht_quality_check, TRUE, "Dont allow worse quality"); -extern BOOL_VAR_H (tessedit_debug_block_rejection, FALSE, -"Block and Row stats"); -extern INT_VAR_H (debug_x_ht_level, 0, "Reestimate debug"); -extern BOOL_VAR_H (rej_use_xht, TRUE, "Individual rejection control"); -extern BOOL_VAR_H (debug_acceptable_wds, FALSE, "Dump word pass/fail chk"); -extern STRING_VAR_H (chs_leading_punct, "('`\"", "Leading punctuation"); -extern -STRING_VAR_H (chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation"); -extern STRING_VAR_H (chs_trailing_punct2, ")'`\"", -"2nd Trailing punctuation"); -extern double_VAR_H (quality_rej_pc, 0.08, -"good_quality_doc lte rejection limit"); -extern double_VAR_H (quality_blob_pc, 0.0, -"good_quality_doc gte good blobs limit"); -extern double_VAR_H (quality_outline_pc, 1.0, -"good_quality_doc lte outline error limit"); -extern double_VAR_H (quality_char_pc, 0.95, -"good_quality_doc gte good char limit"); -extern INT_VAR_H (quality_min_initial_alphas_reqd, 2, -"alphas in a good word"); -extern BOOL_VAR_H (tessedit_tess_adapt_to_rejmap, FALSE, -"Use reject map to control Tesseract adaption"); -extern INT_VAR_H (tessedit_tess_adaption_mode, 3, -"Adaptation decision algorithm for tess"); -extern INT_VAR_H (tessedit_em_adaption_mode, 62, -"Adaptation decision algorithm for ems matrix matcher"); -extern BOOL_VAR_H (tessedit_cluster_adapt_after_pass1, FALSE, -"Adapt using clusterer after pass 1"); -extern BOOL_VAR_H (tessedit_cluster_adapt_after_pass2, FALSE, -"Adapt using clusterer after pass 1"); -extern BOOL_VAR_H (tessedit_cluster_adapt_after_pass3, FALSE, -"Adapt using clusterer after pass 1"); -extern BOOL_VAR_H (tessedit_cluster_adapt_before_pass1, FALSE, -"Adapt using clusterer before Tess adaping during pass 1"); -extern INT_VAR_H (tessedit_cluster_adaption_mode, 0, -"Adaptation decision algorithm for matrix matcher"); -extern BOOL_VAR_H (tessedit_adaption_debug, FALSE, -"Generate and print debug information for adaption"); -extern BOOL_VAR_H (tessedit_minimal_rej_pass1, FALSE, -"Do minimal rejection on pass 1 output"); -extern BOOL_VAR_H (tessedit_test_adaption, FALSE, -"Test adaption criteria"); -extern BOOL_VAR_H (tessedit_global_adaption, FALSE, -"Adapt to all docs over time"); -extern BOOL_VAR_H (tessedit_matcher_log, FALSE, "Log matcher activity"); -extern INT_VAR_H (tessedit_test_adaption_mode, 3, -"Adaptation decision algorithm for tess"); -extern BOOL_VAR_H (test_pt, FALSE, "Test for point"); -extern double_VAR_H (test_pt_x, 99999.99, "xcoord"); -extern double_VAR_H (test_pt_y, 99999.99, "ycoord"); -extern BOOL_VAR_H(save_best_choices, FALSE, - "Save the results of the recognition step" - " (blob_choices) within the corresponding WERD_CHOICE"); - -/* -void classify_word_pass1( //recog one word - WERD_RES *word, //word to do - ROW *row, - BOOL8 cluster_adapt, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting); -*/ - //word to do -void classify_word_pass2(WERD_RES *word, ROW *row); -/** - * recognize one word - * @param word word to do - */ -void match_word_pass2( - WERD_RES *word, - ROW *row, - float x_height); -/** - * crunch double hyphens - * @param choice string to fix - * @param word word to do - * @param blob_choices char choices - */ -void fix_hyphens( - WERD_CHOICE *choice, - WERD *word, - BLOB_CHOICE_LIST_CLIST *blob_choices); - /** * combine 2 blobs * @param blob1 dest blob * @param blob2 source blob */ -void merge_blobs( - PBLOB *blob1, - PBLOB *blob2 - ); +void merge_blobs(PBLOB *blob1, PBLOB *blob2); /** dump chars in word */ void choice_dump_tester( PBLOB *, ///< blob @@ -181,20 +59,4 @@ void choice_dump_tester( inT32 count, ///< chars in text BLOB_CHOICE_LIST *ratings ///< list of results ); -WERD *make_bln_copy(WERD *src_word, ROW *row, BLOCK* block, - float x_height, DENORM *denorm); -BOOL8 check_debug_pt(WERD_RES *word, int location); -/** good chars in word */ -void add_in_one_row( - ROW_RES *row, ///< current row - STATS *fonts, ///< font stats - inT8 *italic, ///< output count - inT8 *bold ///< output count - ); -/** good chars in word */ -void find_modal_font( - STATS *fonts, ///< font stats - inT8 *font_out, ///< output font - inT8 *font_count ///< output count - ); #endif diff --git a/ccmain/cube_control.cpp b/ccmain/cube_control.cpp new file mode 100644 index 0000000000..dcaa581a0b --- /dev/null +++ b/ccmain/cube_control.cpp @@ -0,0 +1,465 @@ +/****************************************************************** + * File: cube_control.cpp + * Description: Tesseract class methods for invoking cube convolutional + * neural network word recognizer. + * Author: Raquel Romano + * Created: September 2009 + * + **********************************************************************/ + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#ifdef HAVE_LIBLEPT +// Include leptonica library only if autoconf (or makefile etc) tell us to. +#include "allheaders.h" +#endif + +#include "cube_object.h" +#include "cube_reco_context.h" +#include "tesseractclass.h" +#include "tesseract_cube_combiner.h" + +namespace tesseract { + +/********************************************************************** + * convert_prob_to_tess_certainty + * + * Normalize a probability in the range [0.0, 1.0] to a tesseract + * certainty in the range [-20.0, 0.0] + **********************************************************************/ +static float convert_prob_to_tess_certainty(float prob) { + return (prob - 1.0) * 20.0; +} + +/********************************************************************** + * char_box_to_tbox + * + * Create a TBOX from a character bounding box. If nonzero, the + * x_offset accounts for any additional padding of the word box that + * should be taken into account. + * + **********************************************************************/ +TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) { + l_int32 left; + l_int32 top; + l_int32 width; + l_int32 height; + l_int32 right; + l_int32 bottom; + + boxGetGeometry(char_box, &left, &top, &width, &height); + left += word_box.left() - x_offset; + right = left + width; + top = word_box.bottom() + word_box.height() - top; + bottom = top - height; + return TBOX(left, bottom, right, top); +} + +/********************************************************************** + * extract_cube_state + * + * Extract CharSamp objects and character bounding boxes from the + * CubeObject's state. The caller should free both structres. + * +**********************************************************************/ +bool Tesseract::extract_cube_state(CubeObject* cube_obj, + int* num_chars, + Boxa** char_boxes, + CharSamp*** char_samples) { + if (!cube_obj) { + if (cube_debug_level > 0) { + tprintf("Cube WARNING (extract_cube_state): Invalid cube object " + "passed to extract_cube_state\n"); + } + return false; + } + + // Note that the CubeObject accessors return either the deslanted or + // regular objects search object or beam search object, whichever + // was used in the last call to Recognize() + CubeSearchObject* cube_search_obj = cube_obj->SrchObj(); + if (!cube_search_obj) { + if (cube_debug_level > 0) { + tprintf("Cube WARNING (Extract_cube_state): Could not retrieve " + "cube's search object in extract_cube_state.\n"); + } + return false; + } + BeamSearch *beam_search_obj = cube_obj->BeamObj(); + if (!beam_search_obj) { + if (cube_debug_level > 0) { + tprintf("Cube WARNING (Extract_cube_state): Could not retrieve " + "cube's beam search object in extract_cube_state.\n"); + } + return false; + } + + // Get the character samples and bounding boxes by backtracking + // through the beam search path + int best_node_index = beam_search_obj->BestPresortedNodeIndex(); + *char_samples = beam_search_obj->BackTrack( + cube_search_obj, best_node_index, num_chars, NULL, char_boxes); + if (!*char_samples) + return false; + return true; +} + +/********************************************************************** + * create_cube_box_word + * + * Fill the given BoxWord with boxes from character bounding + * boxes. The char_boxes have local coordinates w.r.t. the + * word bounding box, i.e., the left-most character bbox of each word + * has (0,0) left-top coord, but the BoxWord must be defined in page + * coordinates. + **********************************************************************/ +bool Tesseract::create_cube_box_word(Boxa *char_boxes, + int num_chars, + TBOX word_box, + BoxWord* box_word) { + if (!box_word) { + if (cube_debug_level > 0) { + tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n"); + } + return false; + } + + // Find the x-coordinate of left-most char_box, which could be + // nonzero if the word image was padded before recognition took place. + int x_offset = -1; + for (int i = 0; i < num_chars; ++i) { + Box* char_box = boxaGetBox(char_boxes, i, L_CLONE); + if (x_offset < 0 || char_box->x < x_offset) { + x_offset = char_box->x; + } + boxDestroy(&char_box); + } + + for (int i = 0; i < num_chars; ++i) { + Box* char_box = boxaGetBox(char_boxes, i, L_CLONE); + TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset); + boxDestroy(&char_box); + box_word->InsertBox(i, tbox); + } + return true; +} + +/********************************************************************** + * create_werd_choice + * + **********************************************************************/ +static WERD_CHOICE *create_werd_choice( + CharSamp** char_samples, + int num_chars, + const char* str, + float certainty, + const UNICHARSET &unicharset, + CharSet* cube_char_set + ) { + // Insert unichar ids into WERD_CHOICE + WERD_CHOICE *werd_choice = new WERD_CHOICE(num_chars); + ASSERT_HOST(werd_choice != NULL); + UNICHAR_ID uch_id; + for (int i = 0; i < num_chars; ++i) { + uch_id = cube_char_set->UnicharID(char_samples[i]->StrLabel()); + if (uch_id != INVALID_UNICHAR_ID) + werd_choice->append_unichar_id_space_allocated(uch_id, 1, 0.0, certainty); + } + + BLOB_CHOICE *blob_choice; + BLOB_CHOICE_LIST *choices_list; + BLOB_CHOICE_IT choices_list_it; + BLOB_CHOICE_LIST_CLIST *blob_choices = new BLOB_CHOICE_LIST_CLIST(); + BLOB_CHOICE_LIST_C_IT blob_choices_it; + blob_choices_it.set_to_list(blob_choices); + + for (int i = 0; i < werd_choice->length(); ++i) { + // Create new BLOB_CHOICE_LIST for this unichar + choices_list = new BLOB_CHOICE_LIST(); + choices_list_it.set_to_list(choices_list); + // Add a single BLOB_CHOICE to the list + blob_choice = new BLOB_CHOICE(werd_choice->unichar_id(i), + 0.0, certainty, -1, -1, 0); + choices_list_it.add_after_then_move(blob_choice); + // Add list to the clist + blob_choices_it.add_to_end(choices_list); + } + werd_choice->populate_unichars(unicharset); + werd_choice->set_certainty(certainty); + werd_choice->set_blob_choices(blob_choices); + return werd_choice; +} + +/********************************************************************** + * init_cube_objects + * + * Instantitates Tesseract object's CubeRecoContext and TesseractCubeCombiner. + * Returns false if cube context could not be created or if load_combiner is + * true, but the combiner could not be loaded. + **********************************************************************/ +bool Tesseract::init_cube_objects(bool load_combiner, + TessdataManager *tessdata_manager) { + ASSERT_HOST(cube_cntxt_ == NULL); + ASSERT_HOST(tess_cube_combiner_ == NULL); + + // Create the cube context object + cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset); + if (cube_cntxt_ == NULL) { + if (cube_debug_level > 0) { + tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to " + "instantiate CubeRecoContext\n"); + } + return false; + } + + // Create the combiner object and load the combiner net for target languages. + if (load_combiner) { + tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_); + if (!tess_cube_combiner_ || !tess_cube_combiner_->LoadCombinerNet()) { + delete cube_cntxt_; + cube_cntxt_ = NULL; + if (tess_cube_combiner_ != NULL) { + delete tess_cube_combiner_; + tess_cube_combiner_ = NULL; + } + if (cube_debug_level > 0) + tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n"); + return false; + } + } + return true; +} + +/********************************************************************** + * run_cube + * + * Iterate through tesseract's results and call cube on each word. + * If the combiner is present, optionally run the tesseract-cube + * combiner on each word. + **********************************************************************/ +void Tesseract::run_cube( + PAGE_RES *page_res // page structure + ) { + ASSERT_HOST(cube_cntxt_ != NULL); + if (!pix_binary_) { + if (cube_debug_level > 0) + tprintf("Tesseract::run_cube(): NULL binary image.\n"); + return; + } + if (!page_res) + return; + PAGE_RES_IT page_res_it(page_res); + page_res_it.restart_page(); + + // Iterate through the word results and call cube on each word. + CubeObject *cube_obj; + for (page_res_it.restart_page(); page_res_it.word () != NULL; + page_res_it.forward()) { + WERD_RES* word = page_res_it.word(); + TBOX word_box = word->word->bounding_box(); + const BLOCK* block = word->denorm.block(); + if (block != NULL && (block->re_rotation().x() != 1.0f || + block->re_rotation().y() != 0.0f)) { + // TODO(rays) We have to rotate the bounding box to get the true coords. + // This will be achieved in the future via DENORM. + // In the mean time, cube can't process this word. + if (cube_debug_level > 0) { + tprintf("Cube can't process rotated word at:"); + word_box.print(); + } + if (word->best_choice == NULL) + page_res_it.DeleteCurrentWord(); // Nobody has an answer. + continue; + } + cube_obj = new tesseract::CubeObject(cube_cntxt_, pix_binary_, + word_box.left(), + pix_binary_->h - word_box.top(), + word_box.width(), word_box.height()); + cube_recognize(cube_obj, &page_res_it); + delete cube_obj; + } +} + +/********************************************************************** + * cube_recognize + * + * Call cube on the current word, optionally run the tess-cube combiner, and + * modify the tesseract result if cube wins. If cube fails to run, or + * if tesseract wins, leave the tesseract result unchanged. If the + * combiner is not instantiated, always use cube's result. + * + **********************************************************************/ +void Tesseract::cube_recognize( + CubeObject *cube_obj, + PAGE_RES_IT *page_res_it + ) { + // Retrieve tesseract's data structure for the current word. + WERD_RES *tess_werd_res = page_res_it->word(); + if (!tess_werd_res->best_choice && tess_cube_combiner_ != NULL) { + if (cube_debug_level > 0) + tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot run combiner " + "without a tess result.\n"); + return; + } + + // Skip cube entirely if combiner is present but tesseract's + // certainty is greater than threshold. + int combiner_run_thresh = convert_prob_to_tess_certainty( + cube_cntxt_->Params()->CombinerRunThresh()); + if (tess_cube_combiner_ != NULL && + (tess_werd_res->best_choice->certainty() >= combiner_run_thresh)) { + return; + } + + // Run cube + WordAltList *cube_alt_list = cube_obj->RecognizeWord(); + if (!cube_alt_list || cube_alt_list->AltCount() <= 0) { + if (cube_debug_level > 0) { + tprintf("Cube returned nothing for word at:"); + tess_werd_res->word->bounding_box().print(); + } + if (tess_werd_res->best_choice == NULL) { + // Nobody has recognized it, so pretend it doesn't exist. + if (cube_debug_level > 0) { + tprintf("Deleted word not recognized by cube and/or tesseract at:"); + tess_werd_res->word->bounding_box().print(); + } + page_res_it->DeleteCurrentWord(); + } + return; + } + + // At this point we *could* run the combiner and bail out if + // Tesseract wins, but that would require instantiating a new + // CubeObject to avoid losing the original recognition results + // (e.g., beam search lattice) stored with the CubeObject. Instead, + // we first extract the state we need from the current recognition + // and then reuse the CubeObject so that the combiner does not need + // to recompute the image's connected components, segmentation, etc. + + // Get cube's best result and its probability, mapped to tesseract's + // certainty range + char_32 *cube_best_32 = cube_alt_list->Alt(0); + double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0)); + float cube_certainty = convert_prob_to_tess_certainty(cube_prob); + string cube_best_str; + CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str); + + // Retrieve Cube's character bounding boxes and CharSamples, + // corresponding to the most recent call to RecognizeWord(). + Boxa *char_boxes = NULL; + CharSamp **char_samples = NULL;; + int num_chars; + if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples) + && cube_debug_level > 0) { + tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract " + "cube state.\n"); + return; + } + + // Convert cube's character bounding boxes to a BoxWord. + BoxWord cube_box_word; + TBOX tess_word_box = tess_werd_res->word->bounding_box(); + if (tess_werd_res->denorm.block() != NULL) + tess_word_box.rotate(tess_werd_res->denorm.block()->re_rotation()); + bool box_word_success = create_cube_box_word(char_boxes, num_chars, + tess_word_box, + &cube_box_word); + boxaDestroy(&char_boxes); + if (!box_word_success) { + if (cube_debug_level > 0) { + tprintf("Cube WARNING (Tesseract::cube_recognize): Could not " + "create cube BoxWord\n"); + } + return; + } + + // Create cube's best choice. + WERD_CHOICE* cube_werd_choice = create_werd_choice( + char_samples, num_chars, cube_best_str.c_str(), cube_certainty, + unicharset, cube_cntxt_->CharacterSet()); + delete []char_samples; + + if (!cube_werd_choice) { + if (cube_debug_level > 0) { + tprintf("Cube WARNING (Tesseract::cube_recognize): Could not " + "create cube WERD_CHOICE\n"); + } + return; + } + + // Run combiner if present, now that we're free to reuse the CubeObject. + if (tess_cube_combiner_ != NULL) { + float combiner_prob = tess_cube_combiner_->CombineResults(tess_werd_res, + cube_obj); + // If combiner probability is greater than tess/cube combiner + // classifier threshold, i.e. tesseract wins, then reset the WERD_RES + // certainty to the combiner certainty and return. Note that when + // tesseract and cube agree, the combiner probability is 1.0, so + // the final WERD_RES certainty will be maximized to 0.0. + if (combiner_prob >= + cube_cntxt_->Params()->CombinerClassifierThresh()) { + float combiner_certainty = convert_prob_to_tess_certainty(combiner_prob); + tess_werd_res->best_choice->set_certainty(combiner_certainty); + delete cube_werd_choice; + return; + } + if (cube_debug_level > 5) { + tprintf("Cube INFO: tesseract result replaced by cube: " + "%s -> %s\n", + tess_werd_res->best_choice->unichar_string().string(), + cube_best_str.c_str()); + } + } + + // Fill tesseract result's fields with cube results + fill_werd_res(cube_box_word, cube_werd_choice, cube_best_str.c_str(), + page_res_it); +} + +/********************************************************************** + * fill_werd_res + * + * Fill Tesseract's word result fields with cube's. + * + **********************************************************************/ +void Tesseract::fill_werd_res(const BoxWord& cube_box_word, + WERD_CHOICE* cube_werd_choice, + const char* cube_best_str, + PAGE_RES_IT *page_res_it) { + WERD_RES *tess_werd_res = page_res_it->word(); + + // Replace tesseract results's best choice with cube's + delete tess_werd_res->best_choice; + tess_werd_res->best_choice = cube_werd_choice; + + delete tess_werd_res->box_word; + tess_werd_res->box_word = new BoxWord(cube_box_word); + tess_werd_res->box_word->ClipToOriginalWord(page_res_it->block()->block, + tess_werd_res->word); + // Fill text and remaining fields + tess_werd_res->word->set_text(cube_best_str); + tess_werd_res->tess_failed = FALSE; + tess_werd_res->tess_accepted = + tess_acceptable_word(tess_werd_res->best_choice, + tess_werd_res->raw_choice); + // There is no output word, so we can' call AdaptableWord, but then I don't + // think we need to. Fudge the result with accepted. + tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted; + + // Initialize the reject_map and set it to done, i.e., ignore all of + // tesseract's tests for rejection + tess_werd_res->reject_map.initialise(cube_werd_choice->length()); + tess_werd_res->done = tess_werd_res->tess_accepted; + + // Some sanity checks + ASSERT_HOST(tess_werd_res->best_choice->length() == + tess_werd_res->best_choice->blob_choices()->length()); + ASSERT_HOST(tess_werd_res->best_choice->length() == + tess_werd_res->reject_map.length()); +} + +} // namespace tesseract diff --git a/ccmain/cube_reco_context.cpp b/ccmain/cube_reco_context.cpp new file mode 100644 index 0000000000..0f2ff63df4 --- /dev/null +++ b/ccmain/cube_reco_context.cpp @@ -0,0 +1,201 @@ +/********************************************************************** + * File: cube_reco_context.cpp + * Description: Implementation of the Cube Recognition Context Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include + +#include "cube_reco_context.h" + +#include "classifier_factory.h" +#include "cube_tuning_params.h" +#include "dict.h" +#include "feature_bmp.h" +#include "tessdatamanager.h" +#include "tesseractclass.h" +#include "tess_lang_model.h" + +namespace tesseract { + +// Instantiate a CubeRecoContext object using a Tesseract object. +// CubeRecoContext will not take ownership of tess_obj, but will +// record the pointer to it and will make use of various Tesseract +// components (language model, flags, etc). Thus the caller should +// keep tess_obj alive so long as the instantiated CubeRecoContext is used. +CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) { + tess_obj_ = tess_obj; + lang_ = ""; + loaded_ = false; + lang_mod_ = NULL; + params_ = NULL; + char_classifier_ = NULL; + char_set_ = NULL; + word_size_model_ = NULL; + char_bigrams_ = NULL; + word_unigrams_ = NULL; + noisy_input_ = false; + size_normalization_ = false; +} + +CubeRecoContext::~CubeRecoContext() { + if (char_classifier_ != NULL) { + delete char_classifier_; + char_classifier_ = NULL; + } + + if (word_size_model_ != NULL) { + delete word_size_model_; + word_size_model_ = NULL; + } + + if (char_set_ != NULL) { + delete char_set_; + char_set_ = NULL; + } + + if (char_bigrams_ != NULL) { + delete char_bigrams_; + char_bigrams_ = NULL; + } + + if (word_unigrams_ != NULL) { + delete word_unigrams_; + word_unigrams_ = NULL; + } + + if (lang_mod_ != NULL) { + delete lang_mod_; + lang_mod_ = NULL; + } + + if (params_ != NULL) { + delete params_; + params_ = NULL; + } +} + +// Returns the path of the data files by looking up the TESSDATA_PREFIX +// environment variable and appending a "tessdata" directory to it +bool CubeRecoContext::GetDataFilePath(string *path) const { + *path = tess_obj_->datadir.string(); + return true; +} + +// The object initialization function that loads all the necessary +// components of a RecoContext. TessdataManager is used to load the +// data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET +// component is present, Cube will be instantiated with the unicharset +// specified in this component and the corresponding dictionary +// (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to +// Tesseract's. Otherwise, TessdataManager will assume that Cube will +// be using Tesseract's unicharset and dawgs, and will load the +// unicharset from the TESSDATA_UNICHARSET component and will load the +// dawgs from TESSDATA_*_DAWG components. +bool CubeRecoContext::Load(TessdataManager *tessdata_manager, + UNICHARSET *tess_unicharset) { + ASSERT_HOST(tess_obj_ != NULL); + string data_file_path; + + // Get the data file path. + if (GetDataFilePath(&data_file_path) == false) { + fprintf(stderr, "Unable to get data file path\n"); + return false; + } + + // Get the language from the Tesseract object. + lang_ = tess_obj_->lang.string(); + + // Create the char set. + if ((char_set_ = + CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) { + fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load " + "CharSet\n"); + return false; + } + // Create the language model. + string lm_file_name = data_file_path + lang_ + ".cube.lm"; + string lm_params; + if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) { + fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube " + "language model params from %s\n", lm_file_name.c_str()); + return false; + } + lang_mod_ = new TessLangModel(lm_params, data_file_path, + tess_obj_->getDict().load_system_dawg, + tessdata_manager, this); + if (lang_mod_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to create " + "TessLangModel\n"); + return false; + } + + // Create the optional char bigrams object. + char_bigrams_ = CharBigrams::Create(data_file_path, lang_); + + // Create the optional word unigrams object. + word_unigrams_ = WordUnigrams::Create(data_file_path, lang_); + + // Create the optional size model. + word_size_model_ = WordSizeModel::Create(data_file_path, lang_, + char_set_, Contextual()); + + // Load tuning params. + params_ = CubeTuningParams::Create(data_file_path, lang_); + if (params_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read " + "CubeTuningParams from %s\n", data_file_path.c_str()); + return false; + } + + // Create the char classifier. + char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_, + lang_mod_, char_set_, + params_); + if (char_classifier_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load " + "CharClassifierFactory object from %s\n", data_file_path.c_str()); + return false; + } + + loaded_ = true; + + return true; +} + +// Creates a CubeRecoContext object using a tesseract object +CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj, + TessdataManager *tessdata_manager, + UNICHARSET *tess_unicharset) { + // create the object + CubeRecoContext *cntxt = new CubeRecoContext(tess_obj); + if (cntxt == NULL) { + fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to create " + "CubeRecoContext object\n"); + return NULL; + } + // load the necessary components + if (cntxt->Load(tessdata_manager, tess_unicharset) == false) { + fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init " + "CubeRecoContext object\n"); + delete cntxt; + return NULL; + } + // success + return cntxt; +} +} // tesseract} diff --git a/ccmain/cube_reco_context.h b/ccmain/cube_reco_context.h new file mode 100644 index 0000000000..822ef62ce7 --- /dev/null +++ b/ccmain/cube_reco_context.h @@ -0,0 +1,155 @@ +/********************************************************************** + * File: cube_reco_context.h + * Description: Declaration of the Cube Recognition Context Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process +// (or a thread) would create one CubeRecoContext object per language. +// The CubeRecoContext object also provides methods to get and set the +// different attribues of the Cube OCR Engine. + +#ifndef CUBE_RECO_CONTEXT_H +#define CUBE_RECO_CONTEXT_H + +#include +#include "neural_net.h" +#include "lang_model.h" +#include "classifier_base.h" +#include "feature_base.h" +#include "char_set.h" +#include "word_size_model.h" +#include "char_bigrams.h" +#include "word_unigrams.h" + +namespace tesseract { + +class Tesseract; +class TessdataManager; + +class CubeRecoContext { + public: + // Reading order enum type + enum ReadOrder { + L2R, + R2L + }; + + // Instantiate using a Tesseract object + CubeRecoContext(Tesseract *tess_obj); + + ~CubeRecoContext(); + + // accessor functions + inline const string & Lang() const { return lang_; } + inline CharSet *CharacterSet() const { return char_set_; } + inline CharClassifier *Classifier() const { return char_classifier_; } + inline WordSizeModel *SizeModel() const { return word_size_model_; } + inline CharBigrams *Bigrams() const { return char_bigrams_; } + inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; } + inline TuningParams *Params() const { return params_; } + inline LangModel *LangMod() const { return lang_mod_; } + + // the reading order of the language + inline ReadOrder ReadingOrder() const { + return ((lang_ == "ara") ? R2L : L2R); + } + + // does the language support case + inline bool HasCase() const { + return (lang_ != "ara" && lang_ != "hin"); + } + + inline bool Cursive() const { + return (lang_ == "ara"); + } + + inline bool HasItalics() const { + return (lang_ != "ara" && lang_ != "hin" && lang_ != "uk"); + } + + inline bool Contextual() const { + return (lang_ == "ara"); + } + + // RecoContext runtime flags accessor functions + inline bool SizeNormalization() const { return size_normalization_; } + inline bool NoisyInput() const { return noisy_input_; } + inline bool OOD() const { return lang_mod_->OOD(); } + inline bool Numeric() const { return lang_mod_->Numeric(); } + inline bool WordList() const { return lang_mod_->WordList(); } + inline bool Punc() const { return lang_mod_->Punc(); } + inline bool CaseSensitive() const { + return char_classifier_->CaseSensitive(); + } + + inline void SetSizeNormalization(bool size_normalization) { + size_normalization_ = size_normalization; + } + inline void SetNoisyInput(bool noisy_input) { + noisy_input_ = noisy_input; + } + inline void SetOOD(bool ood_enabled) { + lang_mod_->SetOOD(ood_enabled); + } + inline void SetNumeric(bool numeric_enabled) { + lang_mod_->SetNumeric(numeric_enabled); + } + inline void SetWordList(bool word_list_enabled) { + lang_mod_->SetWordList(word_list_enabled); + } + inline void SetPunc(bool punc_enabled) { + lang_mod_->SetPunc(punc_enabled); + } + inline void SetCaseSensitive(bool case_sensitive) { + char_classifier_->SetCaseSensitive(case_sensitive); + } + inline tesseract::Tesseract *TesseractObject() const { + return tess_obj_; + } + + // Returns the path of the data files + bool GetDataFilePath(string *path) const; + // Creates a CubeRecoContext object using a tesseract object. Data + // files are loaded via the tessdata_manager, and the tesseract + // unicharset is provided in order to map Cube's unicharset to + // Tesseract's in the case where the two unicharsets differ. + static CubeRecoContext *Create(Tesseract *tess_obj, + TessdataManager *tessdata_manager, + UNICHARSET *tess_unicharset); + + private: + bool loaded_; + string lang_; + CharSet *char_set_; + WordSizeModel *word_size_model_; + CharClassifier *char_classifier_; + CharBigrams *char_bigrams_; + WordUnigrams *word_unigrams_; + TuningParams *params_; + LangModel *lang_mod_; + Tesseract *tess_obj_; // CubeRecoContext does not own this pointer + bool size_normalization_; + bool noisy_input_; + + // Loads and initialized all the necessary components of a + // CubeRecoContext. See .cpp for more details. + bool Load(TessdataManager *tessdata_manager, + UNICHARSET *tess_unicharset); +}; +} + +#endif // CUBE_RECO_CONTEXT_H diff --git a/ccmain/docqual.cpp b/ccmain/docqual.cpp index 892afdf0d1..a97157e5ad 100644 --- a/ccmain/docqual.cpp +++ b/ccmain/docqual.cpp @@ -27,397 +27,109 @@ #include "tstruct.h" #include "tfacep.h" #include "reject.h" +#include "tesscallback.h" #include "tessvars.h" #include "genblob.h" #include "secname.h" #include "globals.h" #include "tesseractclass.h" -#define EXTERN - -EXTERN STRING_VAR (outlines_odd, "%| ", "Non standard number of outlines"); -EXTERN STRING_VAR (outlines_2, "ij!?%\":;", -"Non standard number of outlines"); -EXTERN BOOL_VAR (docqual_excuse_outline_errs, FALSE, -"Allow outline errs in unrejection?"); -EXTERN BOOL_VAR (tessedit_good_quality_unrej, TRUE, -"Reduce rejection on good docs"); -EXTERN BOOL_VAR (tessedit_use_reject_spaces, TRUE, "Reject spaces?"); -EXTERN double_VAR (tessedit_reject_doc_percent, 65.00, -"%rej allowed before rej whole doc"); -EXTERN double_VAR (tessedit_reject_block_percent, 45.00, -"%rej allowed before rej whole block"); -EXTERN double_VAR (tessedit_reject_row_percent, 40.00, -"%rej allowed before rej whole row"); -EXTERN double_VAR (tessedit_whole_wd_rej_row_percent, 70.00, -"%of row rejects in whole word rejects which prevents whole row rejection"); -EXTERN BOOL_VAR (tessedit_preserve_blk_rej_perfect_wds, TRUE, -"Only rej partially rejected words in block rejection"); -EXTERN BOOL_VAR (tessedit_preserve_row_rej_perfect_wds, TRUE, -"Only rej partially rejected words in row rejection"); -EXTERN BOOL_VAR (tessedit_dont_blkrej_good_wds, FALSE, -"Use word segmentation quality metric"); -EXTERN BOOL_VAR (tessedit_dont_rowrej_good_wds, FALSE, -"Use word segmentation quality metric"); -EXTERN INT_VAR (tessedit_preserve_min_wd_len, 2, -"Only preserve wds longer than this"); -EXTERN BOOL_VAR (tessedit_row_rej_good_docs, TRUE, -"Apply row rejection to good docs"); -EXTERN double_VAR (tessedit_good_doc_still_rowrej_wd, 1.1, -"rej good doc wd if more than this fraction rejected"); -EXTERN BOOL_VAR (tessedit_reject_bad_qual_wds, TRUE, -"Reject all bad quality wds"); -EXTERN BOOL_VAR (tessedit_debug_doc_rejection, FALSE, "Page stats"); -EXTERN BOOL_VAR (tessedit_debug_quality_metrics, FALSE, -"Output data to debug file"); -EXTERN BOOL_VAR (bland_unrej, FALSE, "unrej potential with no checks"); -EXTERN double_VAR (quality_rowrej_pc, 1.1, -"good_quality_doc gte good char limit"); - -EXTERN BOOL_VAR (unlv_tilde_crunching, TRUE, -"Mark v.bad words for tilde crunch"); -EXTERN BOOL_VAR (crunch_early_merge_tess_fails, TRUE, "Before word crunch?"); -EXTERN BOOL_EVAR (crunch_early_convert_bad_unlv_chs, FALSE, -"Take out ~^ early?"); - -EXTERN double_VAR (crunch_terrible_rating, 80.0, "crunch rating lt this"); -EXTERN BOOL_VAR (crunch_terrible_garbage, TRUE, "As it says"); -EXTERN double_VAR (crunch_poor_garbage_cert, -9.0, -"crunch garbage cert lt this"); -EXTERN double_VAR (crunch_poor_garbage_rate, 60, -"crunch garbage rating lt this"); - -EXTERN double_VAR (crunch_pot_poor_rate, 40, -"POTENTIAL crunch rating lt this"); -EXTERN double_VAR (crunch_pot_poor_cert, -8.0, -"POTENTIAL crunch cert lt this"); -EXTERN BOOL_VAR (crunch_pot_garbage, TRUE, "POTENTIAL crunch garbage"); - -EXTERN double_VAR (crunch_del_rating, 60, "POTENTIAL crunch rating lt this"); -EXTERN double_VAR (crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this"); -EXTERN double_VAR (crunch_del_min_ht, 0.7, "Del if word ht lt xht x this"); -EXTERN double_VAR (crunch_del_max_ht, 3.0, "Del if word ht gt xht x this"); -EXTERN double_VAR (crunch_del_min_width, 3.0, -"Del if word width lt xht x this"); -EXTERN double_VAR (crunch_del_high_word, 1.5, -"Del if word gt xht x this above bl"); -EXTERN double_VAR (crunch_del_low_word, 0.5, -"Del if word gt xht x this below bl"); -EXTERN double_VAR (crunch_small_outlines_size, 0.6, "Small if lt xht x this"); - -EXTERN INT_VAR (crunch_rating_max, 10, "For adj length in rating per ch"); -EXTERN INT_VAR (crunch_pot_indicators, 1, -"How many potential indicators needed"); - -EXTERN BOOL_VAR (crunch_leave_ok_strings, TRUE, -"Dont touch sensible strings"); -EXTERN BOOL_VAR (crunch_accept_ok, TRUE, "Use acceptability in okstring"); -EXTERN BOOL_VAR (crunch_leave_accept_strings, FALSE, -"Dont pot crunch sensible strings"); -EXTERN BOOL_VAR (crunch_include_numerals, FALSE, "Fiddle alpha figures"); -EXTERN INT_VAR (crunch_leave_lc_strings, 4, -"Dont crunch words with long lower case strings"); -EXTERN INT_VAR (crunch_leave_uc_strings, 4, -"Dont crunch words with long lower case strings"); -EXTERN INT_VAR (crunch_long_repetitions, 3, -"Crunch words with long repetitions"); - -EXTERN INT_VAR (crunch_debug, 0, "As it says"); - -static BOOL8 crude_match_blobs(PBLOB *blob1, PBLOB *blob2); -static void unrej_good_chs(WERD_RES *word, ROW *row); +namespace tesseract{ -/************************************************************************* - * word_blob_quality() - * How many blobs in the outword are identical to those of the inword? - * ASSUME blobs in both initial word and outword are in ascending order of - * left hand blob edge. - *************************************************************************/ -inT16 word_blob_quality( //Blob seg changes - WERD_RES *word, - ROW *row) { - WERD *bln_word; //BL norm init word - TWERD *tessword; //tess format - WERD *init_word; //BL norm init word - PBLOB_IT outword_it; - PBLOB_IT initial_it; - inT16 i; - inT16 init_blobs_left; - inT16 match_count = 0; - BOOL8 matched; - TBOX out_box; - PBLOB *test_blob; - DENORM denorm; - float bln_xht; - - if (word->word->gblob_list ()->empty ()) - return 0; - //xht used for blnorm - bln_xht = bln_x_height / word->denorm.scale (); - bln_word = make_bln_copy(word->word, row, NULL, bln_xht, &denorm); - /* - NOTE: Need to convert to tess format and back again to ensure that the - same float -> int rounding of coords is done to source wd as out wd before - comparison - */ - tessword = make_tess_word(bln_word, NULL); // Convert word. - init_word = make_ed_word(tessword, bln_word); - delete bln_word; - delete_word(tessword); - if (init_word == NULL) { - // Conversion failed. - return 0; - } - - initial_it.set_to_list(init_word->blob_list()); - init_blobs_left = initial_it.length(); - outword_it.set_to_list(word->outword->blob_list()); - - for (outword_it.mark_cycle_pt(); - !outword_it.cycled_list(); outword_it.forward()) { - out_box = outword_it.data()->bounding_box(); +// A little class to provide the callbacks as we have no pre-bound args. +struct DocQualCallbacks { + explicit DocQualCallbacks(WERD_RES* word0) + : word(word0), match_count(0), accepted_match_count(0) {} - // Skip any initial blobs LEFT of current outword blob. - while (!initial_it.at_last() && - (initial_it.data()->bounding_box().left() < out_box.left())) { - initial_it.forward(); - init_blobs_left--; - } + void CountMatchingBlobs(int index) { + ++match_count; + } - /* See if current outword blob matches any initial blob with the same left - coord. (Normally only one but possibly more - in unknown order) */ + void CountAcceptedBlobs(int index) { + if (word->reject_map[index].accepted()) + ++accepted_match_count; + ++match_count; + } - i = 0; - matched = FALSE; - do { - test_blob = initial_it.data_relative (i++); - matched = crude_match_blobs (test_blob, outword_it.data ()); - if (matched) - match_count++; - } - while (!matched && - (init_blobs_left - i > 0) && - (i < 129) && - !initial_it.at_last() && - test_blob->bounding_box().left() == out_box.left()); + void AcceptIfGoodQuality(int index) { + if (word->reject_map[index].accept_if_good_quality()) + word->reject_map[index].setrej_quality_accept(); } - delete init_word; - return match_count; -} + WERD_RES* word; + inT16 match_count; + inT16 accepted_match_count; +}; /************************************************************************* - * crude_match_blobs() - * Check bounding boxes are the same and the number of outlines are the same. + * word_blob_quality() + * How many blobs in the box_word are identical to those of the inword? + * ASSUME blobs in both initial word and box_word are in ascending order of + * left hand blob edge. *************************************************************************/ -static BOOL8 crude_match_blobs(PBLOB *blob1, PBLOB *blob2) { - TBOX box1 = blob1->bounding_box(); - TBOX box2 = blob2->bounding_box(); +inT16 Tesseract::word_blob_quality(WERD_RES *word, ROW *row) { + if (word->bln_boxes == NULL || + word->rebuild_word == NULL || word->rebuild_word->blobs == NULL) + return 0; - if (box1.contains(box2) && - box2.contains(box1) && - (blob1->out_list()->length() == blob1->out_list()->length())) - return TRUE; - else - return FALSE; + DocQualCallbacks cb(word); + word->bln_boxes->ProcessMatchedBlobs( + *word->rebuild_word, + NewPermanentTessCallback(&cb, &DocQualCallbacks::CountMatchingBlobs)); + return cb.match_count; } - -inT16 word_outline_errs(WERD_RES *word) { - PBLOB_IT outword_it; +inT16 Tesseract::word_outline_errs(WERD_RES *word) { inT16 i = 0; inT16 err_count = 0; - outword_it.set_to_list(word->outword->blob_list()); + TBLOB* blob = word->rebuild_word->blobs; - for (outword_it.mark_cycle_pt(); - !outword_it.cycled_list(); outword_it.forward()) { + for (; blob != NULL; blob = blob->next) { err_count += count_outline_errs(word->best_choice->unichar_string()[i], - outword_it.data()->out_list()->length()); + blob->NumOutlines()); i++; } return err_count; } - /************************************************************************* * word_char_quality() * Combination of blob quality and outline quality - how many good chars are * there? - I.e chars which pass the blob AND outline tests. *************************************************************************/ -void word_char_quality(WERD_RES *word, - ROW *row, - inT16 *match_count, - inT16 *accepted_match_count) { - WERD *bln_word; // BL norm init word - TWERD *tessword; // tess format - WERD *init_word; // BL norm init word - PBLOB_IT outword_it; - PBLOB_IT initial_it; - inT16 i; - inT16 init_blobs_left; - BOOL8 matched; - TBOX out_box; - PBLOB *test_blob; - DENORM denorm; - float bln_xht; - inT16 j = 0; - - *match_count = 0; - *accepted_match_count = 0; - if (word->word->gblob_list ()->empty ()) - return; - - // xht used for blnorm - bln_xht = bln_x_height / word->denorm.scale(); - bln_word = make_bln_copy(word->word, row, NULL, bln_xht, &denorm); - /* - NOTE: Need to convert to tess format and back again to ensure that the - same float -> int rounding of coords is done to source wd as out wd before - comparison - */ - tessword = make_tess_word(bln_word, NULL); // Convert word. - init_word = make_ed_word(tessword, bln_word); - delete bln_word; - delete_word(tessword); - if (init_word == NULL) +void Tesseract::word_char_quality(WERD_RES *word, + ROW *row, + inT16 *match_count, + inT16 *accepted_match_count) { + if (word->bln_boxes == NULL || + word->rebuild_word == NULL || word->rebuild_word->blobs == NULL) return; - initial_it.set_to_list(init_word->blob_list()); - init_blobs_left = initial_it.length(); - outword_it.set_to_list(word->outword->blob_list()); - - for (outword_it.mark_cycle_pt(); - !outword_it.cycled_list(); outword_it.forward()) { - out_box = outword_it.data()->bounding_box(); - - /* Skip any initial blobs LEFT of current outword blob */ - while (!initial_it.at_last() && - (initial_it.data()->bounding_box().left() < out_box.left())) { - initial_it.forward(); - init_blobs_left--; - } - - /* See if current outword blob matches any initial blob with the same left - coord. (Normally only one but possibly more - in unknown order) */ - - i = 0; - matched = FALSE; - do { - test_blob = initial_it.data_relative(i++); - matched = crude_match_blobs(test_blob, outword_it.data()); - if (matched && - (count_outline_errs (word->best_choice->unichar_string()[j], - outword_it.data ()->out_list ()->length ()) - == 0)) { - (*match_count)++; - if (word->reject_map[j].accepted ()) - (*accepted_match_count)++; - } - } - while (!matched && - (init_blobs_left - i > 0) && - (i < 129) && - !initial_it.at_last() && - test_blob->bounding_box().left() == out_box.left()); - j++; - } - delete init_word; + DocQualCallbacks cb(word); + word->bln_boxes->ProcessMatchedBlobs( + *word->rebuild_word, + NewPermanentTessCallback(&cb, &DocQualCallbacks::CountAcceptedBlobs)); + *match_count = cb.match_count; + *accepted_match_count = cb.accepted_match_count; } - /************************************************************************* * unrej_good_chs() * Unreject POTENTIAL rejects if the blob passes the blob and outline checks *************************************************************************/ -static void unrej_good_chs(WERD_RES *word, ROW *row) { - WERD *bln_word; // BL norm init word - TWERD *tessword; // tess format - WERD *init_word; // BL norm init word - PBLOB_IT outword_it; - PBLOB_IT initial_it; - inT16 i; - inT16 init_blobs_left; - BOOL8 matched; - TBOX out_box; - PBLOB *test_blob; - DENORM denorm; - float bln_xht; - inT16 j = 0; - - if (word->word->gblob_list ()->empty ()) +void Tesseract::unrej_good_chs(WERD_RES *word, ROW *row) { + if (word->bln_boxes == NULL || + word->rebuild_word == NULL || word->rebuild_word->blobs == NULL) return; - // xht used for blnorm - bln_xht = bln_x_height / word->denorm.scale (); - bln_word = make_bln_copy(word->word, row, NULL, bln_xht, &denorm); - /* - NOTE: Need to convert to tess format and back again to ensure that the - same float -> int rounding of coords is done to source wd as out wd before - comparison - */ - tessword = make_tess_word(bln_word, NULL); // Convert word - init_word = make_ed_word(tessword, bln_word); - delete bln_word; - delete_word(tessword); - if (init_word == NULL) - return; - - initial_it.set_to_list (init_word->blob_list ()); - init_blobs_left = initial_it.length (); - outword_it.set_to_list (word->outword->blob_list ()); - - for (outword_it.mark_cycle_pt (); - !outword_it.cycled_list (); outword_it.forward ()) { - out_box = outword_it.data ()->bounding_box (); - - /* Skip any initial blobs LEFT of current outword blob */ - while (!initial_it.at_last () && - (initial_it.data ()->bounding_box ().left () < out_box.left ())) { - initial_it.forward (); - init_blobs_left--; - } - - /* See if current outword blob matches any initial blob with the same left - coord. (Normally only one but possibly more - in unknown order) */ - - i = 0; - matched = FALSE; - do { - test_blob = initial_it.data_relative (i++); - matched = crude_match_blobs (test_blob, outword_it.data ()); - if (matched && - (word->reject_map[j].accept_if_good_quality ()) && - (docqual_excuse_outline_errs || - (count_outline_errs (word->best_choice->unichar_string()[j], - outword_it.data ()->out_list ()-> - length ()) == 0))) - word->reject_map[j].setrej_quality_accept (); - } - while (!matched && - (init_blobs_left - i > 0) && - (i < 129) && - !initial_it.at_last () && - test_blob->bounding_box ().left () == out_box.left ()); - j++; - } - delete init_word; + DocQualCallbacks cb(word); + word->bln_boxes->ProcessMatchedBlobs( + *word->rebuild_word, + NewPermanentTessCallback(&cb, &DocQualCallbacks::AcceptIfGoodQuality)); } - -void print_boxes(WERD *word) { - PBLOB_IT it; - TBOX box; - - it.set_to_list (word->blob_list ()); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - box = it.data ()->bounding_box (); - box.print (); - } -} - - -inT16 count_outline_errs(char c, inT16 outline_count) { +inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) { int expected_outline_count; if (STRING (outlines_odd).contains (c)) @@ -429,20 +141,11 @@ inT16 count_outline_errs(char c, inT16 outline_count) { return abs (outline_count - expected_outline_count); } - -namespace tesseract { void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc) { if ((tessedit_good_quality_unrej && good_quality_doc)) unrej_good_quality_words(page_res_it); doc_and_block_rejection(page_res_it, good_quality_doc); - - page_res_it.restart_page (); - while (page_res_it.word () != NULL) { - insert_rej_cblobs(page_res_it.word()); - page_res_it.forward(); - } - if (unlv_tilde_crunching) { tilde_crunch(page_res_it); tilde_delete(page_res_it); @@ -542,7 +245,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks BOOL8 rej_word; BOOL8 prev_word_rejected; - inT16 char_quality; + inT16 char_quality = 0; inT16 accepted_char_quality; if ((page_res_it.page_res->rej_count * 100.0 / @@ -833,10 +536,10 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) { page_res_it.forward (); } } -} // namespace tesseract -BOOL8 terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level) { +BOOL8 Tesseract::terrible_word_crunch(WERD_RES *word, + GARBAGE_LEVEL garbage_level) { float rating_per_ch; int adjusted_len; int crunch_mode = 0; @@ -873,7 +576,6 @@ BOOL8 terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level) { return FALSE; } -namespace tesseract { BOOL8 Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level, BOOL8 ok_dict_word) { @@ -1022,36 +724,30 @@ void Tesseract::convert_bad_unlv_chs(WERD_RES *word_res) { } } +// Callback helper for merge_tess_fails returns a space if both +// arguments are space, otherwise INVALID_UNICHAR_ID. +UNICHAR_ID Tesseract::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) { + if (id1 == id2 && id1 == unicharset.unichar_to_id(" ")) + return id1; + else + return INVALID_UNICHAR_ID; +} + // Change pairs of tess failures to a single one void Tesseract::merge_tess_fails(WERD_RES *word_res) { - PBLOB_IT blob_it; //blobs - int len = word_res->best_choice->length(); - bool modified = false; - - ASSERT_HOST (word_res->reject_map.length () == len); - ASSERT_HOST (word_res->outword->blob_list ()->length () == len); - - UNICHAR_ID unichar_space = unicharset.unichar_to_id(" "); - blob_it = word_res->outword->blob_list (); - int i = 0; - while (i < word_res->best_choice->length()-1) { - if ((word_res->best_choice->unichar_id(i) == unichar_space) && - (word_res->best_choice->unichar_id(i+1) == unichar_space)) { - modified = true; - word_res->best_choice->remove_unichar_id(i); - word_res->reject_map.remove_pos (i); - merge_blobs (blob_it.data_relative (1), blob_it.data ()); - delete blob_it.extract (); //get rid of spare - } else { - i++; - } - blob_it.forward (); - } - len = word_res->best_choice->length(); - ASSERT_HOST (word_res->reject_map.length () == len); - ASSERT_HOST (word_res->outword->blob_list ()->length () == len); - if (modified) { - word_res->best_choice->populate_unichars(unicharset); + if (word_res->ConditionalBlobMerge( + unicharset, + NewPermanentTessCallback(this, &Tesseract::BothSpaces), NULL, + word_res->best_choice->blob_choices())) { + tprintf("Post:bc len=%d, rejmap=%d, boxword=%d, chopword=%d, rebuild=%d\n", + word_res->best_choice->length(), + word_res->reject_map.length(), + word_res->box_word->length(), + word_res->chopped_word->NumBlobs(), + word_res->rebuild_word->NumBlobs()); + int len = word_res->best_choice->length(); + ASSERT_HOST(word_res->reject_map.length() == len); + ASSERT_HOST(word_res->box_word->length() == len); } } @@ -1252,7 +948,6 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { return G_OK; } } -} // namespace tesseract /************************************************************************* @@ -1271,7 +966,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) { * >75% of the outline BBs have longest dimension < 0.5xht *************************************************************************/ -CRUNCH_MODE word_deletable(WERD_RES *word, inT16 &delete_mode) { +CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, inT16 &delete_mode) { int word_len = word->reject_map.length (); float rating_per_ch; TBOX box; //BB of word @@ -1286,13 +981,13 @@ CRUNCH_MODE word_deletable(WERD_RES *word, inT16 &delete_mode) { return CR_DELETE; } - box = word->outword->bounding_box (); - if (box.height () < crunch_del_min_ht * bln_x_height) { + box = word->rebuild_word->bounding_box(); + if (box.height () < crunch_del_min_ht * kBlnXHeight) { delete_mode = 4; return CR_DELETE; } - if (noise_outlines (word->outword)) { + if (noise_outlines(word->rebuild_word)) { delete_mode = 5; return CR_DELETE; } @@ -1314,23 +1009,23 @@ CRUNCH_MODE word_deletable(WERD_RES *word, inT16 &delete_mode) { return CR_LOOSE_SPACE; } - if (box.top () < bln_baseline_offset - crunch_del_low_word * bln_x_height) { + if (box.top () < kBlnBaselineOffset - crunch_del_low_word * kBlnXHeight) { delete_mode = 9; return CR_LOOSE_SPACE; } if (box.bottom () > - bln_baseline_offset + crunch_del_high_word * bln_x_height) { + kBlnBaselineOffset + crunch_del_high_word * kBlnXHeight) { delete_mode = 10; return CR_LOOSE_SPACE; } - if (box.height () > crunch_del_max_ht * bln_x_height) { + if (box.height () > crunch_del_max_ht * kBlnXHeight) { delete_mode = 11; return CR_LOOSE_SPACE; } - if (box.width () < crunch_del_min_width * bln_x_height) { + if (box.width () < crunch_del_min_width * kBlnXHeight) { delete_mode = 3; return CR_LOOSE_SPACE; } @@ -1339,7 +1034,7 @@ CRUNCH_MODE word_deletable(WERD_RES *word, inT16 &delete_mode) { return CR_NONE; } -inT16 failure_count(WERD_RES *word) { +inT16 Tesseract::failure_count(WERD_RES *word) { const char *str = word->best_choice->unichar_string().string(); int tess_rejs = 0; @@ -1351,134 +1046,25 @@ inT16 failure_count(WERD_RES *word) { } -BOOL8 noise_outlines(WERD *word) { - PBLOB_IT blob_it; - OUTLINE_IT outline_it; - TBOX box; //BB of outline +BOOL8 Tesseract::noise_outlines(TWERD *word) { + TBOX box; // BB of outline inT16 outline_count = 0; inT16 small_outline_count = 0; inT16 max_dimension; - float small_limit = bln_x_height * crunch_small_outlines_size; + float small_limit = kBlnXHeight * crunch_small_outlines_size; - blob_it.set_to_list (word->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - outline_it.set_to_list (blob_it.data ()->out_list ()); - for (outline_it.mark_cycle_pt (); - !outline_it.cycled_list (); outline_it.forward ()) { + for (TBLOB* blob = word->blobs; blob != NULL; blob = blob->next) { + for (TESSLINE* ol = blob->outlines; ol != NULL; ol = ol->next) { outline_count++; - box = outline_it.data ()->bounding_box (); - if (box.height () > box.width ()) - max_dimension = box.height (); + box = ol->bounding_box(); + if (box.height() > box.width()) + max_dimension = box.height(); else - max_dimension = box.width (); + max_dimension = box.width(); if (max_dimension < small_limit) small_outline_count++; } } return (small_outline_count >= outline_count); } - - -/************************************************************************* - * insert_rej_cblobs() - * Put rejected word blobs back into the outword. - * NOTE!!! AFTER THIS THE CHOICES LIST WILL NOT HAVE THE CORRECT NUMBER - * OF ELEMENTS. - *************************************************************************/ -namespace tesseract { -void Tesseract::insert_rej_cblobs(WERD_RES *word) { - PBLOB_IT blob_it; //blob iterator - PBLOB_IT rej_blob_it; - const STRING *word_str; - const STRING *word_lengths; - int old_len; - int rej_len; - char new_str[512 * UNICHAR_LEN]; - char new_lengths[512]; - REJMAP new_map; - int i = 0; //new_str index - int j = 0; //old_str index - int i_offset = 0; //new_str offset - int j_offset = 0; //old_str offset - int new_len; - - gblob_sort_list (word->outword->rej_blob_list (), TRUE); - rej_blob_it.set_to_list (word->outword->rej_blob_list ()); - if (rej_blob_it.empty ()) - return; - rej_len = rej_blob_it.length (); - blob_it.set_to_list (word->outword->blob_list ()); - word_str = &(word->best_choice->unichar_string()); - word_lengths = &(word->best_choice->unichar_lengths()); - old_len = word->best_choice->length(); - ASSERT_HOST (word->reject_map.length () == old_len); - ASSERT_HOST (blob_it.length () == old_len); - if ((old_len + rej_len) > 511) - return; //Word is garbage anyway prevent abort - new_map.initialise (old_len + rej_len); - - while (!rej_blob_it.empty ()) { - if ((j >= old_len) || - (rej_blob_it.data ()->bounding_box ().left () <= - blob_it.data ()->bounding_box ().left ())) { - /* Insert reject blob */ - if (j >= old_len) - blob_it.add_to_end (rej_blob_it.extract ()); - else - blob_it.add_before_stay_put (rej_blob_it.extract ()); - if (!rej_blob_it.empty ()) - rej_blob_it.forward (); - new_str[i_offset] = ' '; - new_lengths[i] = 1; - new_map[i].setrej_rej_cblob (); - i_offset += new_lengths[i++]; - } - else { - strncpy(new_str + i_offset, &(*word_str)[j_offset], - (*word_lengths)[j]); - new_lengths[i] = (*word_lengths)[j]; - new_map[i] = word->reject_map[j]; - i_offset += new_lengths[i++]; - j_offset += (*word_lengths)[j++]; - blob_it.forward (); - } - } - /* Add any extra normal blobs to strings */ - while (j < word_lengths->length ()) { - strncpy(new_str + i_offset, &(*word_str)[j_offset], - (*word_lengths)[j]); - new_lengths[i] = (*word_lengths)[j]; - new_map[i] = word->reject_map[j]; - i_offset += new_lengths[i++]; - j_offset += (*word_lengths)[j++]; - } - new_str[i_offset] = '\0'; - new_lengths[i] = 0; - /* - tprintf( - "\nOld len %d; New len %d; New str \"%s\"; New map \"%s\"\n", - old_len, i, new_str, new_map ); - */ - ASSERT_HOST (i == blob_it.length ()); - ASSERT_HOST (i == old_len + rej_len); - word->reject_map = new_map; - - // Update word->best_choice if needed. - if (strcmp(new_str, word->best_choice->unichar_string().string()) != 0 || - strcmp(new_lengths, word->best_choice->unichar_lengths().string()) != 0) { - WERD_CHOICE *new_choice = - new WERD_CHOICE(new_str, new_lengths, - word->best_choice->rating(), - word->best_choice->certainty(), - word->best_choice->permuter(), - getDict().getUnicharset()); - new_choice->populate_unichars(getDict().getUnicharset()); - delete word->best_choice; - word->best_choice = new_choice; - } - new_len = word->best_choice->length(); - ASSERT_HOST (word->reject_map.length () == new_len); - ASSERT_HOST (word->outword->blob_list ()->length () == new_len); - -} } // namespace tesseract diff --git a/ccmain/docqual.h b/ccmain/docqual.h index 402cd51946..61fa6f46ff 100644 --- a/ccmain/docqual.h +++ b/ccmain/docqual.h @@ -31,108 +31,6 @@ enum GARBAGE_LEVEL G_TERRIBLE }; -extern STRING_VAR_H (outlines_odd, "%| ", "Non standard number of outlines"); -extern STRING_VAR_H (outlines_2, "ij!?%\":;", -"Non standard number of outlines"); -extern BOOL_VAR_H (docqual_excuse_outline_errs, FALSE, -"Allow outline errs in unrejection?"); -extern BOOL_VAR_H (tessedit_good_quality_unrej, TRUE, -"Reduce rejection on good docs"); -extern BOOL_VAR_H (tessedit_use_reject_spaces, TRUE, "Reject spaces?"); -extern double_VAR_H (tessedit_reject_doc_percent, 65.00, -"%rej allowed before rej whole doc"); -extern double_VAR_H (tessedit_reject_block_percent, 45.00, -"%rej allowed before rej whole block"); -extern double_VAR_H (tessedit_reject_row_percent, 40.00, -"%rej allowed before rej whole row"); -extern double_VAR_H (tessedit_whole_wd_rej_row_percent, 70.00, -"%of row rejects in whole word rejects which prevents whole row rejection"); -extern BOOL_VAR_H (tessedit_preserve_blk_rej_perfect_wds, TRUE, -"Only rej partially rejected words in block rejection"); -extern BOOL_VAR_H (tessedit_preserve_row_rej_perfect_wds, TRUE, -"Only rej partially rejected words in row rejection"); -extern BOOL_VAR_H (tessedit_dont_blkrej_good_wds, FALSE, -"Use word segmentation quality metric"); -extern BOOL_VAR_H (tessedit_dont_rowrej_good_wds, FALSE, -"Use word segmentation quality metric"); -extern INT_VAR_H (tessedit_preserve_min_wd_len, 2, -"Only preserve wds longer than this"); -extern BOOL_VAR_H (tessedit_row_rej_good_docs, TRUE, -"Apply row rejection to good docs"); -extern double_VAR_H (tessedit_good_doc_still_rowrej_wd, 1.1, -"rej good doc wd if more than this fraction rejected"); -extern BOOL_VAR_H (tessedit_reject_bad_qual_wds, TRUE, -"Reject all bad quality wds"); -extern BOOL_VAR_H (tessedit_debug_doc_rejection, FALSE, "Page stats"); -extern BOOL_VAR_H (tessedit_debug_quality_metrics, FALSE, -"Output data to debug file"); -extern BOOL_VAR_H (bland_unrej, FALSE, "unrej potential with no chekcs"); -extern double_VAR_H (quality_rowrej_pc, 1.1, -"good_quality_doc gte good char limit"); -extern BOOL_VAR_H (unlv_tilde_crunching, TRUE, -"Mark v.bad words for tilde crunch"); -extern BOOL_VAR_H (crunch_early_merge_tess_fails, TRUE, -"Before word crunch?"); -extern BOOL_VAR_H (crunch_early_convert_bad_unlv_chs, FALSE, -"Take out ~^ early?"); -extern double_VAR_H (crunch_terrible_rating, 80.0, "crunch rating lt this"); -extern BOOL_VAR_H (crunch_terrible_garbage, TRUE, "As it says"); -extern double_VAR_H (crunch_poor_garbage_cert, -9.0, -"crunch garbage cert lt this"); -extern double_VAR_H (crunch_poor_garbage_rate, 60, -"crunch garbage rating lt this"); -extern double_VAR_H (crunch_pot_poor_rate, 40, -"POTENTIAL crunch rating lt this"); -extern double_VAR_H (crunch_pot_poor_cert, -8.0, -"POTENTIAL crunch cert lt this"); -extern BOOL_VAR_H (crunch_pot_garbage, TRUE, "POTENTIAL crunch garbage"); -extern double_VAR_H (crunch_del_rating, 60, -"POTENTIAL crunch rating lt this"); -extern double_VAR_H (crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this"); -extern double_VAR_H (crunch_del_min_ht, 0.7, "Del if word ht lt xht x this"); -extern double_VAR_H (crunch_del_max_ht, 3.0, "Del if word ht gt xht x this"); -extern double_VAR_H (crunch_del_min_width, 3.0, -"Del if word width lt xht x this"); -extern double_VAR_H (crunch_del_high_word, 1.5, -"Del if word gt xht x this above bl"); -extern double_VAR_H (crunch_del_low_word, 0.5, -"Del if word gt xht x this below bl"); -extern double_VAR_H (crunch_small_outlines_size, 0.6, -"Small if lt xht x this"); -extern INT_VAR_H (crunch_rating_max, 10, "For adj length in rating per ch"); -extern INT_VAR_H (crunch_pot_indicators, 1, -"How many potential indicators needed"); -extern BOOL_VAR_H (crunch_leave_ok_strings, TRUE, -"Dont touch sensible strings"); -extern BOOL_VAR_H (crunch_accept_ok, TRUE, "Use acceptability in okstring"); -extern BOOL_VAR_H (crunch_leave_accept_strings, FALSE, -"Dont pot crunch sensible strings"); -extern BOOL_VAR_H (crunch_include_numerals, FALSE, "Fiddle alpha figures"); -extern INT_VAR_H (crunch_leave_lc_strings, 4, -"Dont crunch words with long lower case strings"); -extern INT_VAR_H (crunch_leave_uc_strings, 4, -"Dont crunch words with long lower case strings"); -extern INT_VAR_H (crunch_long_repetitions, 3, -"Crunch words with long repetitions"); -extern INT_VAR_H (crunch_debug, 0, "As it says"); -inT16 word_blob_quality( //Blob seg changes - WERD_RES *word, - ROW *row); -//BOOL8 crude_match_blobs(PBLOB *blob1, PBLOB *blob2); -inT16 word_outline_errs( //Outline count errs - WERD_RES *word); -void word_char_quality( //Blob seg changes - WERD_RES *word, - ROW *row, - inT16 *match_count, - inT16 *accepted_match_count); -//void unrej_good_chs(WERD_RES *word, ROW *row); -void print_boxes(WERD *word); -inT16 count_outline_errs(char c, inT16 outline_count); +inT16 word_blob_quality(WERD_RES *word, ROW *row); void reject_whole_page(PAGE_RES_IT &page_res_it); -BOOL8 terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level); - //word to do -CRUNCH_MODE word_deletable(WERD_RES *word, inT16 &delete_mode); -inT16 failure_count(WERD_RES *word); -BOOL8 noise_outlines(WERD *word); #endif diff --git a/ccmain/expandblob.cpp b/ccmain/expandblob.cpp deleted file mode 100644 index f80236a89d..0000000000 --- a/ccmain/expandblob.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/************************************************************************** - * Revision 5.1 89/07/27 11:46:53 11:46:53 ray () - * (C) Copyright 1989, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * -**************************************************************************/ -#include "mfcpch.h" -#include "expandblob.h" -#include "tessclas.h" -#include "const.h" -#include "structures.h" -#include "freelist.h" - -/*********************************************************************** -free_blob(blob) frees the blob and everything it is connected to, -i.e. outlines, nodes, edgepts, bytevecs, ratings etc -*************************************************************************/ -void free_blob( /*blob to free */ - register TBLOB *blob) { - if (blob == NULL) - return; /*duff blob */ - free_tree (blob->outlines); /*do the tree of outlines */ - oldblob(blob); /*free the actual blob */ -} - - -/*************************************************************************** -free_tree(outline) frees the current outline -and then its sub-tree -*****************************************************************************/ -void free_tree( /*outline to draw */ - register TESSLINE *outline) { - if (outline == NULL) - return; /*duff outline */ - if (outline->next != NULL) - free_tree (outline->next); - if (outline->child != NULL) - free_tree (outline->child); /*and sub-tree */ - free_outline(outline); /*free the outline */ -} - - -/******************************************************************************* -free_outline(outline) frees an outline and anything connected to it -*********************************************************************************/ -void free_outline( /*outline to free */ - register TESSLINE *outline) { - if (outline->compactloop != NULL) - /*no compact loop */ - memfree (outline->compactloop); - - if (outline->loop != NULL) - free_loop (outline->loop); - - oldoutline(outline); -} - - -/********************************************************************************* -free_loop(startpt) frees all the elements of the closed loop -starting at startpt -***********************************************************************************/ -void free_loop( /*outline to free */ - register EDGEPT *startpt) { - register EDGEPT *edgept; /*current point */ - - if (startpt == NULL) - return; - edgept = startpt; - do { - edgept = oldedgept (edgept); /*free it and move on */ - } - while (edgept != startpt); -} diff --git a/ccmain/expandblob.h b/ccmain/expandblob.h deleted file mode 100644 index 6d80c288db..0000000000 --- a/ccmain/expandblob.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef EXPANDBLOB_H -#define EXPANDBLOB_H - -#include "tessclas.h" - -void free_blob(register TBLOB *blob); - -void free_tree(register TESSLINE *outline); - -void free_outline(register TESSLINE *outline); - -void free_loop(register EDGEPT *startpt); -#endif diff --git a/ccmain/fixspace.cpp b/ccmain/fixspace.cpp index b2b970cbc1..a7076164f9 100644 --- a/ccmain/fixspace.cpp +++ b/ccmain/fixspace.cpp @@ -1,8 +1,8 @@ /****************************************************************** * File: fixspace.cpp (Formerly fixspace.c) * Description: Implements a pass over the page res, exploring the alternative - * spacing possibilities, trying to use context to improve the - word spacing + * spacing possibilities, trying to use context to improve the + * word spacing * Author: Phil Cheatle * Created: Thu Oct 21 11:38:43 BST 1993 * @@ -32,28 +32,6 @@ #include "globals.h" #include "tesseractclass.h" -#define EXTERN - -EXTERN BOOL_VAR (fixsp_check_for_fp_noise_space, TRUE, -"Try turning noise to space in fixed pitch"); -EXTERN BOOL_VAR (fixsp_fp_eval, TRUE, "Use alternate evaluation for fp"); -EXTERN BOOL_VAR (fixsp_noise_score_fixing, TRUE, "More sophisticated?"); -EXTERN INT_VAR (fixsp_non_noise_limit, 1, -"How many non-noise blbs either side?"); -EXTERN double_VAR (fixsp_small_outlines_size, 0.28, "Small if lt xht x this"); - -EXTERN BOOL_VAR (fixsp_ignore_punct, TRUE, "In uniform spacing calc"); -EXTERN BOOL_VAR (fixsp_numeric_fix, TRUE, "Try to deal with numeric punct"); -EXTERN BOOL_VAR (fixsp_prefer_joined_1s, TRUE, "Arbitrary boost"); -EXTERN BOOL_VAR (tessedit_test_uniform_wd_spacing, FALSE, -"Limit context word spacing"); -EXTERN BOOL_VAR (tessedit_prefer_joined_punct, FALSE, -"Reward punctation joins"); -EXTERN INT_VAR (fixsp_done_mode, 1, "What constitues done for spacing"); -EXTERN INT_VAR (debug_fix_space_level, 0, "Contextual fixspace debug"); -EXTERN STRING_VAR (numeric_punctuation, ".,", -"Punct. chs expected WITHIN numbers"); - #define PERFECT_WERDS 999 #define MAXSPACING 128 /*max expected spacing in pix */ @@ -68,67 +46,75 @@ namespace tesseract { * @param word_count count of words in doc * @param[out] page_res */ -void Tesseract::fix_fuzzy_spaces(volatile ETEXT_DESC *monitor, +void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, inT32 word_count, PAGE_RES *page_res) { - BLOCK_RES_IT block_res_it; //iterators + BLOCK_RES_IT block_res_it; ROW_RES_IT row_res_it; WERD_RES_IT word_res_it_from; WERD_RES_IT word_res_it_to; WERD_RES *word_res; WERD_RES_LIST fuzzy_space_words; inT16 new_length; - BOOL8 prevent_null_wd_fixsp; //DONT process blobless wds - inT32 word_index; //current word + BOOL8 prevent_null_wd_fixsp; // DONT process blobless wds + inT32 word_index; // current word - block_res_it.set_to_list (&page_res->block_res_list); + block_res_it.set_to_list(&page_res->block_res_list); word_index = 0; - for (block_res_it.mark_cycle_pt (); - !block_res_it.cycled_list (); block_res_it.forward ()) { - row_res_it.set_to_list (&block_res_it.data ()->row_res_list); - for (row_res_it.mark_cycle_pt (); - !row_res_it.cycled_list (); row_res_it.forward ()) { - word_res_it_from.set_to_list (&row_res_it.data ()->word_res_list); - while (!word_res_it_from.at_last ()) { - word_res = word_res_it_from.data (); - while (!word_res_it_from.at_last () && + for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list(); + block_res_it.forward()) { + row_res_it.set_to_list(&block_res_it.data()->row_res_list); + for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list(); + row_res_it.forward()) { + word_res_it_from.set_to_list(&row_res_it.data()->word_res_list); + while (!word_res_it_from.at_last()) { + word_res = word_res_it_from.data(); + while (!word_res_it_from.at_last() && !(word_res->combination || - word_res_it_from.data_relative (1)->word->flag (W_FUZZY_NON) || - word_res_it_from.data_relative (1)->word->flag (W_FUZZY_SP))) { + word_res_it_from.data_relative(1)->word->flag(W_FUZZY_NON) || + word_res_it_from.data_relative(1)->word->flag(W_FUZZY_SP))) { fix_sp_fp_word(word_res_it_from, row_res_it.data()->row, block_res_it.data()->block); - word_res = word_res_it_from.forward (); + word_res = word_res_it_from.forward(); word_index++; if (monitor != NULL) { monitor->ocr_alive = TRUE; monitor->progress = 90 + 5 * word_index / word_count; + if (monitor->deadline_exceeded() || + (monitor->cancel != NULL && + (*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) + return; } } - if (!word_res_it_from.at_last ()) { + if (!word_res_it_from.at_last()) { word_res_it_to = word_res_it_from; prevent_null_wd_fixsp = - word_res->word->gblob_list ()->empty (); - if (check_debug_pt (word_res, 60)) - debug_fix_space_level.set_value (10); - word_res_it_to.forward (); + word_res->word->gblob_list()->empty(); + if (check_debug_pt(word_res, 60)) + debug_fix_space_level.set_value(10); + word_res_it_to.forward(); word_index++; if (monitor != NULL) { monitor->ocr_alive = TRUE; monitor->progress = 90 + 5 * word_index / word_count; + if (monitor->deadline_exceeded() || + (monitor->cancel != NULL && + (*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) + return; } while (!word_res_it_to.at_last () && - (word_res_it_to.data_relative (1)->word->flag (W_FUZZY_NON) || - word_res_it_to.data_relative (1)->word->flag (W_FUZZY_SP))) { - if (check_debug_pt (word_res, 60)) - debug_fix_space_level.set_value (10); - if (word_res->word->gblob_list ()->empty ()) + (word_res_it_to.data_relative(1)->word->flag(W_FUZZY_NON) || + word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) { + if (check_debug_pt(word_res, 60)) + debug_fix_space_level.set_value(10); + if (word_res->word->gblob_list()->empty()) prevent_null_wd_fixsp = TRUE; - word_res = word_res_it_to.forward (); + word_res = word_res_it_to.forward(); } - if (check_debug_pt (word_res, 60)) - debug_fix_space_level.set_value (10); - if (word_res->word->gblob_list ()->empty ()) + if (check_debug_pt(word_res, 60)) + debug_fix_space_level.set_value(10); + if (word_res->word->gblob_list()->empty()) prevent_null_wd_fixsp = TRUE; if (prevent_null_wd_fixsp) { word_res_it_from = word_res_it_to; @@ -138,18 +124,20 @@ void Tesseract::fix_fuzzy_spaces(volatile ETEXT_DESC *monitor, fix_fuzzy_space_list(fuzzy_space_words, row_res_it.data()->row, block_res_it.data()->block); - new_length = fuzzy_space_words.length (); - word_res_it_from.add_list_before (&fuzzy_space_words); - for (; (!word_res_it_from.at_last () && (new_length > 0)); new_length--) { - word_res_it_from.forward (); + new_length = fuzzy_space_words.length(); + word_res_it_from.add_list_before(&fuzzy_space_words); + for (; + !word_res_it_from.at_last() && new_length > 0; + new_length--) { + word_res_it_from.forward(); } } if (test_pt) - debug_fix_space_level.set_value (0); + debug_fix_space_level.set_value(0); } - fix_sp_fp_word(word_res_it_from, row_res_it.data ()->row, + fix_sp_fp_word(word_res_it_from, row_res_it.data()->row, block_res_it.data()->block); - //Last word in row + // Last word in row } } } @@ -164,15 +152,15 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, BOOL8 improved = FALSE; best_score = eval_word_spacing(best_perm); // default score - dump_words (best_perm, best_score, 1, improved); + dump_words(best_perm, best_score, 1, improved); if (best_score != PERFECT_WERDS) initialise_search(best_perm, current_perm); - while ((best_score != PERFECT_WERDS) && !current_perm.empty ()) { + while ((best_score != PERFECT_WERDS) && !current_perm.empty()) { match_current_words(current_perm, row, block); - current_score = eval_word_spacing (current_perm); - dump_words (current_perm, current_score, 2, improved); + current_score = eval_word_spacing(current_perm); + dump_words(current_perm, current_score, 2, improved); if (current_score > best_score) { best_perm.clear(); best_perm.deep_copy(¤t_perm, &WERD_RES::deep_copy); @@ -182,7 +170,7 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, if (current_score < PERFECT_WERDS) transform_to_next_perm(current_perm); } - dump_words (best_perm, best_score, 3, improved); + dump_words(best_perm, best_score, 3, improved); } } // namespace tesseract @@ -193,13 +181,13 @@ void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) { WERD_RES *src_wd; WERD_RES *new_wd; - for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { - src_wd = src_it.data (); + for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { + src_wd = src_it.data(); if (!src_wd->combination) { - new_wd = new WERD_RES (*src_wd); + new_wd = new WERD_RES(*src_wd); new_wd->combination = FALSE; new_wd->part_of_combo = FALSE; - new_it.add_after_then_move (new_wd); + new_it.add_after_then_move(new_wd); } } } @@ -210,11 +198,15 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block) { WERD_RES_IT word_it(&words); WERD_RES *word; - - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if ((!word->part_of_combo) && (word->outword == NULL)) + // Since we are not using PAGE_RES to iterate over words, we need to update + // prev_word_best_choice_ before calling classify_word_pass2(). + prev_word_best_choice_ = NULL; + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + word = word_it.data(); + if ((!word->part_of_combo) && (word->box_word == NULL)) { classify_word_pass2(word, block, row); + } + prev_word_best_choice_ = word->best_choice; } } @@ -252,11 +244,11 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { inT16 word_len; inT16 i; inT16 offset; - WERD_RES *word; //current word + WERD_RES *word; // current word inT16 prev_word_score = 0; BOOL8 prev_word_done = FALSE; - BOOL8 prev_char_1 = FALSE; //prev ch a "1/I/l"? - BOOL8 prev_char_digit = FALSE; //prev ch 2..9 or 0 + BOOL8 prev_char_1 = FALSE; // prev ch a "1/I/l"? + BOOL8 prev_char_digit = FALSE; // prev ch 2..9 or 0 BOOL8 current_char_1 = FALSE; BOOL8 current_word_ok_so_far; STRING punct_chars = "!\"`',.:;"; @@ -265,8 +257,8 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { BOOL8 word_done = FALSE; do { - word = word_res_it.data (); - word_done = fixspace_thinks_word_done (word); + word = word_res_it.data(); + word_done = fixspace_thinks_word_done(word); word_count++; if (word->tess_failed) { total_score += prev_word_score; @@ -276,51 +268,42 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { prev_char_1 = FALSE; prev_char_digit = FALSE; prev_word_done = FALSE; - } - else { + } else { /* Can we add the prev word score and potentially count this word? Yes IF it didnt end in a 1 when the first char of this word is a digit AND it didnt end in a digit when the first char of this word is a 1 */ - word_len = word->reject_map.length (); + word_len = word->reject_map.length(); current_word_ok_so_far = FALSE; - if (!((prev_char_1 && - digit_or_numeric_punct (word, 0)) || - (prev_char_digit && - ((word_done && - (word->best_choice->unichar_lengths().string()[0] == 1 && - word->best_choice->unichar_string()[0] == '1')) || - (!word_done && - STRING(conflict_set_I_l_1).contains(word->best_choice->unichar_string ()[0])))))) { + if (!((prev_char_1 && digit_or_numeric_punct(word, 0)) || + (prev_char_digit && ( + (word_done && + word->best_choice->unichar_lengths().string()[0] == 1 && + word->best_choice->unichar_string()[0] == '1') || + (!word_done && STRING(conflict_set_I_l_1).contains( + word->best_choice->unichar_string()[0])))))) { total_score += prev_word_score; if (prev_word_done) done_word_count++; current_word_ok_so_far = word_done; } - if ((current_word_ok_so_far) && - (!tessedit_test_uniform_wd_spacing || - ((word->best_choice->permuter () == NUMBER_PERM) || - uniformly_spaced (word)))) { + if (current_word_ok_so_far) { prev_word_done = TRUE; prev_word_score = word_len; - } - else { + } else { prev_word_done = FALSE; prev_word_score = 0; } - if (fixsp_prefer_joined_1s) { - /* Add 1 to total score for every joined 1 regardless of context and - rejtn */ - - for (i = 0, prev_char_1 = FALSE; i < word_len; i++) { - current_char_1 = word->best_choice->unichar_string()[i] == '1'; - if (prev_char_1 || (current_char_1 && (i > 0))) - total_score++; - prev_char_1 = current_char_1; - } + /* Add 1 to total score for every joined 1 regardless of context and + rejtn */ + for (i = 0, prev_char_1 = FALSE; i < word_len; i++) { + current_char_1 = word->best_choice->unichar_string()[i] == '1'; + if (prev_char_1 || (current_char_1 && (i > 0))) + total_score++; + prev_char_1 = current_char_1; } /* Add 1 to total score for every joined punctuation regardless of context @@ -329,28 +312,25 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { for (i = 0, offset = 0, prev_char_punct = FALSE; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) { current_char_punct = - punct_chars.contains (word->best_choice->unichar_string()[offset]); - if (prev_char_punct || (current_char_punct && (i > 0))) + punct_chars.contains(word->best_choice->unichar_string()[offset]); + if (prev_char_punct || (current_char_punct && i > 0)) total_score++; prev_char_punct = current_char_punct; } } - prev_char_digit = digit_or_numeric_punct (word, word_len - 1); + prev_char_digit = digit_or_numeric_punct(word, word_len - 1); for (i = 0, offset = 0; i < word_len - 1; offset += word->best_choice->unichar_lengths()[i++]); prev_char_1 = - ((word_done - && (word->best_choice->unichar_string()[offset] == '1')) - || (!word_done - && STRING(conflict_set_I_l_1).contains( - word->best_choice->unichar_string()[offset]))); + ((word_done && (word->best_choice->unichar_string()[offset] == '1')) + || (!word_done && STRING(conflict_set_I_l_1).contains( + word->best_choice->unichar_string()[offset]))); } /* Find next word */ - do - word_res_it.forward (); - while (word_res_it.data ()->part_of_combo); - } - while (!word_res_it.at_first ()); + do { + word_res_it.forward(); + } while (word_res_it.data()->part_of_combo); + } while (!word_res_it.at_first()); total_score += prev_word_score; if (prev_word_done) done_word_count++; @@ -360,20 +340,21 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { return total_score; } - BOOL8 Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) { int i; int offset; for (i = 0, offset = 0; i < char_position; offset += word->best_choice->unichar_lengths()[i++]); - return (unicharset.get_isdigit(word->best_choice->unichar_string().string() + offset, - word->best_choice->unichar_lengths()[i]) || - (fixsp_numeric_fix && - (word->best_choice->permuter () == NUMBER_PERM) && - STRING (numeric_punctuation).contains - (word->best_choice->unichar_string().string()[offset]))); + return ( + unicharset.get_isdigit( + word->best_choice->unichar_string().string() + offset, + word->best_choice->unichar_lengths()[i]) || + (word->best_choice->permuter() == NUMBER_PERM && + STRING(numeric_punctuation).contains( + word->best_choice->unichar_string().string()[offset]))); } + } // namespace tesseract @@ -395,95 +376,89 @@ void transform_to_next_perm(WERD_RES_LIST &words) { WERD_RES *prev_word; WERD_RES *combo; WERD *copy_word; - inT16 prev_right = -1; + inT16 prev_right = -MAX_INT16; TBOX box; inT16 gap; inT16 min_gap = MAX_INT16; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + word = word_it.data(); if (!word->part_of_combo) { - box = word->word->bounding_box (); - if (prev_right >= 0) { - gap = box.left () - prev_right; + box = word->word->bounding_box(); + if (prev_right > -MAX_INT16) { + gap = box.left() - prev_right; if (gap < min_gap) min_gap = gap; } - prev_right = box.right (); + prev_right = box.right(); } } if (min_gap < MAX_INT16) { - prev_right = -1; //back to start - word_it.set_to_list (&words); - //cant use cycle pt due to inserted combos at start of list - for (; (prev_right < 0) || !word_it.at_first (); word_it.forward ()) { - word = word_it.data (); + prev_right = -MAX_INT16; // back to start + word_it.set_to_list(&words); + // Note: we can't use cycle_pt due to inserted combos at start of list. + for (; (prev_right == -MAX_INT16) || !word_it.at_first(); + word_it.forward()) { + word = word_it.data(); if (!word->part_of_combo) { - box = word->word->bounding_box (); - if (prev_right >= 0) { - gap = box.left () - prev_right; + box = word->word->bounding_box(); + if (prev_right > -MAX_INT16) { + gap = box.left() - prev_right; if (gap <= min_gap) { - prev_word = prev_word_it.data (); - if (prev_word->combination) + prev_word = prev_word_it.data(); + if (prev_word->combination) { combo = prev_word; - else { - /* Make a new combination and insert before the first word being joined */ + } else { + /* Make a new combination and insert before + * the first word being joined. */ copy_word = new WERD; *copy_word = *(prev_word->word); - //deep copy - combo = new WERD_RES (copy_word); + // deep copy + combo = new WERD_RES(copy_word); combo->combination = TRUE; combo->x_height = prev_word->x_height; prev_word->part_of_combo = TRUE; - prev_word_it.add_before_then_move (combo); + prev_word_it.add_before_then_move(combo); } - combo->word->set_flag (W_EOL, word->word->flag (W_EOL)); + combo->word->set_flag(W_EOL, word->word->flag(W_EOL)); if (word->combination) { - combo->word->join_on (word->word); - //Move blbs to combo - //old combo no longer needed - delete word_it.extract (); - } - else { - //Cpy current wd to combo - combo->copy_on (word); + combo->word->join_on(word->word); + // Move blobs to combo + // old combo no longer needed + delete word_it.extract(); + } else { + // Copy current wd to combo + combo->copy_on(word); word->part_of_combo = TRUE; } combo->done = FALSE; - if (combo->outword != NULL) { - delete combo->outword; - delete combo->best_choice; - delete combo->raw_choice; - combo->outword = NULL; - combo->best_choice = NULL; - combo->raw_choice = NULL; - } + combo->ClearResults(); + } else { + prev_word_it = word_it; // catch up } - else - //catch up - prev_word_it = word_it; } - prev_right = box.right (); + prev_right = box.right(); } } + } else { + words.clear(); // signal termination } - else - words.clear (); //signal termination } - -void dump_words(WERD_RES_LIST &perm, inT16 score, inT16 mode, BOOL8 improved) { +namespace tesseract { +void Tesseract::dump_words(WERD_RES_LIST &perm, inT16 score, + inT16 mode, BOOL8 improved) { WERD_RES_IT word_res_it(&perm); - static STRING initial_str; if (debug_fix_space_level > 0) { if (mode == 1) { - initial_str = ""; - for (word_res_it.mark_cycle_pt (); - !word_res_it.cycled_list (); word_res_it.forward ()) { - if (!word_res_it.data ()->part_of_combo) { - initial_str += word_res_it.data()->best_choice->unichar_string(); - initial_str += ' '; + stats_.dump_words_str = ""; + for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); + word_res_it.forward()) { + if (!word_res_it.data()->part_of_combo) { + stats_.dump_words_str += + word_res_it.data()->best_choice->unichar_string(); + stats_.dump_words_str += ' '; } } } @@ -492,35 +467,36 @@ void dump_words(WERD_RES_LIST &perm, inT16 score, inT16 mode, BOOL8 improved) { if (debug_fix_space_level > 1) { switch (mode) { case 1: - tprintf ("EXTRACTED (%d): \"", score); + tprintf("EXTRACTED (%d): \"", score); break; case 2: - tprintf ("TESTED (%d): \"", score); + tprintf("TESTED (%d): \"", score); break; case 3: - tprintf ("RETURNED (%d): \"", score); + tprintf("RETURNED (%d): \"", score); break; } - for (word_res_it.mark_cycle_pt (); - !word_res_it.cycled_list (); word_res_it.forward ()) { - if (!word_res_it.data ()->part_of_combo) + for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); + word_res_it.forward()) { + if (!word_res_it.data()->part_of_combo) { tprintf("%s/%1d ", word_res_it.data()->best_choice->unichar_string().string(), (int)word_res_it.data()->best_choice->permuter()); + } } - tprintf ("\"\n"); - } - else if (improved) { - tprintf ("FIX SPACING \"%s\" => \"", initial_str.string ()); - for (word_res_it.mark_cycle_pt (); - !word_res_it.cycled_list (); word_res_it.forward ()) { - if (!word_res_it.data ()->part_of_combo) - tprintf ("%s/%1d ", - word_res_it.data()->best_choice->unichar_string().string(), - (int)word_res_it.data()->best_choice->permuter()); + tprintf("\"\n"); + } else if (improved) { + tprintf("FIX SPACING \"%s\" => \"", stats_.dump_words_str.string()); + for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); + word_res_it.forward()) { + if (!word_res_it.data()->part_of_combo) { + tprintf("%s/%1d ", + word_res_it.data()->best_choice->unichar_string().string(), + (int)word_res_it.data()->best_choice->permuter()); + } } - tprintf ("\"\n"); + tprintf("\"\n"); } #endif } @@ -532,81 +508,81 @@ void dump_words(WERD_RES_LIST &perm, inT16 score, inT16 mode, BOOL8 improved) { * Return true if one of the following are true: * - All inter-char gaps are the same width * - The largest gap is no larger than twice the mean/median of the others - * - The largest gap is < 64/5 = 13 and all others are <= 0 + * - The largest gap is < normalised_max_nonspace * **** REMEMBER - WE'RE NOW WORKING WITH A BLN WERD !!! */ -BOOL8 uniformly_spaced(WERD_RES *word) { - PBLOB_IT blob_it; +BOOL8 Tesseract::uniformly_spaced(WERD_RES *word) { TBOX box; inT16 prev_right = -MAX_INT16; inT16 gap; inT16 max_gap = -MAX_INT16; inT16 max_gap_count = 0; - STATS gap_stats (0, MAXSPACING); + STATS gap_stats(0, MAXSPACING); BOOL8 result; - const ROW *row = word->denorm.row (); + const ROW *row = word->denorm.row(); float max_non_space; float normalised_max_nonspace; inT16 i = 0; inT16 offset = 0; STRING punct_chars = "\"`',.:;"; - blob_it.set_to_list (word->outword->blob_list ()); - - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - box = blob_it.data ()->bounding_box (); + for (TBLOB* blob = word->rebuild_word->blobs; blob != NULL; + blob = blob->next) { + box = blob->bounding_box(); if ((prev_right > -MAX_INT16) && - (!fixsp_ignore_punct || - (!punct_chars.contains (word->best_choice->unichar_string() - [offset - word->best_choice->unichar_lengths()[i - 1]]) && - !punct_chars.contains (word->best_choice->unichar_string()[offset])))) { - gap = box.left () - prev_right; - if (gap < max_gap) - gap_stats.add (gap, 1); - else if (gap == max_gap) + (!punct_chars.contains( + word->best_choice->unichar_string() + [offset - word->best_choice->unichar_lengths()[i - 1]]) && + !punct_chars.contains( + word->best_choice->unichar_string()[offset]))) { + gap = box.left() - prev_right; + if (gap < max_gap) { + gap_stats.add(gap, 1); + } else if (gap == max_gap) { max_gap_count++; - else { + } else { if (max_gap_count > 0) - gap_stats.add (max_gap, max_gap_count); + gap_stats.add(max_gap, max_gap_count); max_gap = gap; max_gap_count = 1; } } - prev_right = box.right (); + prev_right = box.right(); offset += word->best_choice->unichar_lengths()[i++]; } - max_non_space = (row->space () + 3 * row->kern ()) / 4; - normalised_max_nonspace = max_non_space * bln_x_height / row->x_height (); + max_non_space = (row->space() + 3 * row->kern()) / 4; + normalised_max_nonspace = max_non_space * kBlnXHeight / row->x_height(); - result = ((gap_stats.get_total () == 0) || - (max_gap <= normalised_max_nonspace) || - ((gap_stats.get_total () > 2) && - (max_gap <= 2 * gap_stats.median ())) || - ((gap_stats.get_total () <= 2) && - (max_gap <= 2 * gap_stats.mean ()))); + result = ( + gap_stats.get_total() == 0 || + max_gap <= normalised_max_nonspace || + (gap_stats.get_total() > 2 && max_gap <= 2 * gap_stats.median()) || + (gap_stats.get_total() <= 2 && max_gap <= 2 * gap_stats.mean())); #ifndef SECURE_NAMES if ((debug_fix_space_level > 1)) { - if (result) - tprintf - ("ACCEPT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n", - word->best_choice->unichar_string().string (), normalised_max_nonspace, - max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (), - gap_stats.median ()); - else - tprintf - ("REJECT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n", - word->best_choice->unichar_string().string (), normalised_max_nonspace, - max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (), - gap_stats.median ()); + if (result) { + tprintf( + "ACCEPT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d " + "total=%d mean=%f median=%f\n", + word->best_choice->unichar_string().string(), normalised_max_nonspace, + max_gap, max_gap_count, gap_stats.get_total(), gap_stats.mean(), + gap_stats.median()); + } else { + tprintf( + "REJECT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d " + "total=%d mean=%f median=%f\n", + word->best_choice->unichar_string().string(), normalised_max_nonspace, + max_gap, max_gap_count, gap_stats.get_total(), gap_stats.mean(), + gap_stats.median()); + } } #endif return result; } - -BOOL8 fixspace_thinks_word_done(WERD_RES *word) { +BOOL8 Tesseract::fixspace_thinks_word_done(WERD_RES *word) { if (word->done) return TRUE; @@ -615,23 +591,22 @@ BOOL8 fixspace_thinks_word_done(WERD_RES *word) { reject.c BUT DONT REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DONT CARE WHETHER WE HAVE of/at on/an etc. */ - if ((fixsp_done_mode > 0) && - (word->tess_accepted || - ((fixsp_done_mode == 2) && - (word->reject_map.reject_count () == 0)) || - (fixsp_done_mode == 3)) && - (strchr (word->best_choice->unichar_string().string (), ' ') == NULL) && - ((word->best_choice->permuter () == SYSTEM_DAWG_PERM) || - (word->best_choice->permuter () == FREQ_DAWG_PERM) || - (word->best_choice->permuter () == USER_DAWG_PERM) || - (word->best_choice->permuter () == NUMBER_PERM))) + if (fixsp_done_mode > 0 && + (word->tess_accepted || + (fixsp_done_mode == 2 && word->reject_map.reject_count() == 0) || + fixsp_done_mode == 3) && + (strchr(word->best_choice->unichar_string().string(), ' ') == NULL) && + ((word->best_choice->permuter() == SYSTEM_DAWG_PERM) || + (word->best_choice->permuter() == FREQ_DAWG_PERM) || + (word->best_choice->permuter() == USER_DAWG_PERM) || + (word->best_choice->permuter() == NUMBER_PERM))) { return TRUE; - else + } else { return FALSE; + } } -namespace tesseract { /** * @name fix_sp_fp_word() * Test the current word to see if it can be split by deleting noise blobs. If @@ -648,30 +623,30 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, inT16 new_length; float junk; - word_res = word_res_it.data (); - if (!fixsp_check_for_fp_noise_space || - word_res->word->flag (W_REP_CHAR) || - word_res->combination || - word_res->part_of_combo || !word_res->word->flag (W_DONT_CHOP)) + word_res = word_res_it.data(); + if (word_res->word->flag(W_REP_CHAR) || + word_res->combination || + word_res->part_of_combo || + !word_res->word->flag(W_DONT_CHOP)) return; - blob_index = worst_noise_blob (word_res, &junk); + blob_index = worst_noise_blob(word_res, &junk); if (blob_index < 0) return; #ifndef SECURE_NAMES if (debug_fix_space_level > 1) { - tprintf ("FP fixspace working on \"%s\"\n", - word_res->best_choice->unichar_string().string()); + tprintf("FP fixspace working on \"%s\"\n", + word_res->best_choice->unichar_string().string()); } #endif - gblob_sort_list ((PBLOB_LIST *) word_res->word->rej_cblob_list (), FALSE); - sub_word_list_it.add_after_stay_put (word_res_it.extract ()); + gblob_sort_list((PBLOB_LIST *)word_res->word->rej_cblob_list(), FALSE); + sub_word_list_it.add_after_stay_put(word_res_it.extract()); fix_noisy_space_list(sub_word_list, row, block); - new_length = sub_word_list.length (); - word_res_it.add_list_before (&sub_word_list); - for (; (!word_res_it.at_last () && (new_length > 1)); new_length--) { - word_res_it.forward (); + new_length = sub_word_list.length(); + word_res_it.add_list_before(&sub_word_list); + for (; !word_res_it.at_last() && new_length > 1; new_length--) { + word_res_it.forward(); } } @@ -686,40 +661,36 @@ void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, inT16 current_score; BOOL8 improved = FALSE; - //default score - best_score = fp_eval_word_spacing (best_perm); + best_score = fp_eval_word_spacing(best_perm); // default score - dump_words (best_perm, best_score, 1, improved); + dump_words(best_perm, best_score, 1, improved); new_word_res = new WERD_RES; - old_word_res = best_perm_it.data (); - //Kludge to force deep copy - old_word_res->combination = TRUE; - *new_word_res = *old_word_res; //deep copy - //Undo kludge - old_word_res->combination = FALSE; - //Undo kludge - new_word_res->combination = FALSE; - current_perm_it.add_to_end (new_word_res); + old_word_res = best_perm_it.data(); + old_word_res->combination = TRUE; // Kludge to force deep copy + *new_word_res = *old_word_res; // deep copy + old_word_res->combination = FALSE; // Undo kludge + new_word_res->combination = FALSE; // Undo kludge + current_perm_it.add_to_end(new_word_res); break_noisiest_blob_word(current_perm); - while ((best_score != PERFECT_WERDS) && !current_perm.empty ()) { + while (best_score != PERFECT_WERDS && !current_perm.empty()) { match_current_words(current_perm, row, block); - current_score = fp_eval_word_spacing (current_perm); - dump_words (current_perm, current_score, 2, improved); + current_score = fp_eval_word_spacing(current_perm); + dump_words(current_perm, current_score, 2, improved); if (current_score > best_score) { best_perm.clear(); best_perm.deep_copy(¤t_perm, &WERD_RES::deep_copy); best_score = current_score; improved = TRUE; } - if (current_score < PERFECT_WERDS) + if (current_score < PERFECT_WERDS) { break_noisiest_blob_word(current_perm); + } } - dump_words (best_perm, best_score, 3, improved); + dump_words(best_perm, best_score, 3, improved); } -} // namespace tesseract /** @@ -727,13 +698,13 @@ void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, * Find the word with the blob which looks like the worst noise. * Break the word into two, deleting the noise blob. */ -void break_noisiest_blob_word(WERD_RES_LIST &words) { +void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { WERD_RES_IT word_it(&words); WERD_RES_IT worst_word_it; float worst_noise_score = 9999; - int worst_blob_index = -1; //noisiest blb of noisiest wd - int blob_index; //of wds noisiest blb - float noise_score; //of wds noisiest blb + int worst_blob_index = -1; // Noisiest blob of noisiest wd + int blob_index; // of wds noisiest blob + float noise_score; // of wds noisiest blob WERD_RES *word_res; C_BLOB_IT blob_it; C_BLOB_IT rej_cblob_it; @@ -744,119 +715,113 @@ void break_noisiest_blob_word(WERD_RES_LIST &words) { inT16 start_of_noise_blob; inT16 i; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - blob_index = worst_noise_blob (word_it.data (), &noise_score); - if ((blob_index > -1) && (worst_noise_score > noise_score)) { + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + blob_index = worst_noise_blob(word_it.data(), &noise_score); + if (blob_index > -1 && worst_noise_score > noise_score) { worst_noise_score = noise_score; worst_blob_index = blob_index; worst_word_it = word_it; } } if (worst_blob_index < 0) { - words.clear (); //signal termination + words.clear(); // signal termination return; } /* Now split the worst_word_it */ - word_res = worst_word_it.data (); + word_res = worst_word_it.data(); /* Move blobs before noise blob to a new bloblist */ - new_blob_it.set_to_list (&new_blob_list); - blob_it.set_to_list (word_res->word->cblob_list ()); - for (i = 0; i < worst_blob_index; i++, blob_it.forward ()) { - new_blob_it.add_after_then_move (blob_it.extract ()); + new_blob_it.set_to_list(&new_blob_list); + blob_it.set_to_list(word_res->word->cblob_list()); + for (i = 0; i < worst_blob_index; i++, blob_it.forward()) { + new_blob_it.add_after_then_move(blob_it.extract()); } - start_of_noise_blob = blob_it.data ()->bounding_box ().left (); - delete blob_it.extract (); //throw out noise blb + start_of_noise_blob = blob_it.data()->bounding_box().left(); + delete blob_it.extract(); // throw out noise blob - new_word = new WERD (&new_blob_list, word_res->word); - new_word->set_flag (W_EOL, FALSE); - word_res->word->set_flag (W_BOL, FALSE); - word_res->word->set_blanks (1);//After break + new_word = new WERD(&new_blob_list, word_res->word); + new_word->set_flag(W_EOL, FALSE); + word_res->word->set_flag(W_BOL, FALSE); + word_res->word->set_blanks(1); // After break - new_rej_cblob_it.set_to_list (new_word->rej_cblob_list ()); - rej_cblob_it.set_to_list (word_res->word->rej_cblob_list ()); + new_rej_cblob_it.set_to_list(new_word->rej_cblob_list()); + rej_cblob_it.set_to_list(word_res->word->rej_cblob_list()); for (; - (!rej_cblob_it.empty () && - (rej_cblob_it.data ()->bounding_box ().left () < - start_of_noise_blob)); rej_cblob_it.forward ()) { - new_rej_cblob_it.add_after_then_move (rej_cblob_it.extract ()); + (!rej_cblob_it.empty() && + (rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob)); + rej_cblob_it.forward()) { + new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract()); } - worst_word_it.add_before_then_move (new WERD_RES (new_word)); + worst_word_it.add_before_then_move(new WERD_RES(new_word)); - word_res->done = FALSE; - if (word_res->outword != NULL) { - delete word_res->outword; - delete word_res->best_choice; - delete word_res->raw_choice; - word_res->outword = NULL; - word_res->best_choice = NULL; - word_res->raw_choice = NULL; - } + word_res->ClearResults(); } - -inT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score) { - PBLOB_IT blob_it; - inT16 blob_count; +inT16 Tesseract::worst_noise_blob(WERD_RES *word_res, + float *worst_noise_score) { float noise_score[512]; int i; - int min_noise_blob; //1st contender - int max_noise_blob; //last contender + int min_noise_blob; // 1st contender + int max_noise_blob; // last contender int non_noise_count; - int worst_noise_blob; //Worst blob - float small_limit = bln_x_height * fixsp_small_outlines_size; - float non_noise_limit = bln_x_height * 0.8; - - blob_it.set_to_list (word_res->outword->blob_list ()); - //normalised - blob_count = blob_it.length (); - ASSERT_HOST (blob_count <= 512); + int worst_noise_blob; // Worst blob + float small_limit = kBlnXHeight * fixsp_small_outlines_size; + float non_noise_limit = kBlnXHeight * 0.8; + + TBLOB* blob = word_res->rebuild_word->blobs; + // Normalised. + int blob_count = word_res->box_word->length(); + ASSERT_HOST(blob_count <= 512); if (blob_count < 5) - return -1; //too short to split + return -1; // too short to split + /* Get the noise scores for all blobs */ #ifndef SECURE_NAMES if (debug_fix_space_level > 5) - tprintf ("FP fixspace Noise metrics for \"%s\": ", - word_res->best_choice->unichar_string().string()); + tprintf("FP fixspace Noise metrics for \"%s\": ", + word_res->best_choice->unichar_string().string()); #endif - for (i = 0; i < blob_count; i++, blob_it.forward ()) { - if (word_res->reject_map[i].accepted ()) + for (i = 0; i < blob_count && blob != NULL; i++, blob = blob->next) { + if (word_res->reject_map[i].accepted()) noise_score[i] = non_noise_limit; else - noise_score[i] = blob_noise_score (blob_it.data ()); + noise_score[i] = blob_noise_score(blob); if (debug_fix_space_level > 5) - tprintf ("%1.1f ", noise_score[i]); + tprintf("%1.1f ", noise_score[i]); } if (debug_fix_space_level > 5) - tprintf ("\n"); + tprintf("\n"); /* Now find the worst one which is far enough away from the end of the word */ non_noise_count = 0; - for (i = 0; - (i < blob_count) && (non_noise_count < fixsp_non_noise_limit); i++) { - if (noise_score[i] >= non_noise_limit) + for (i = 0; i < blob_count && non_noise_count < fixsp_non_noise_limit; i++) { + if (noise_score[i] >= non_noise_limit) { non_noise_count++; + } } if (non_noise_count < fixsp_non_noise_limit) return -1; + min_noise_blob = i; non_noise_count = 0; - for (i = blob_count - 1; - (i >= 0) && (non_noise_count < fixsp_non_noise_limit); i--) { - if (noise_score[i] >= non_noise_limit) + for (i = blob_count - 1; i >= 0 && non_noise_count < fixsp_non_noise_limit; + i--) { + if (noise_score[i] >= non_noise_limit) { non_noise_count++; + } } if (non_noise_count < fixsp_non_noise_limit) return -1; + max_noise_blob = i; if (min_noise_blob > max_noise_blob) @@ -873,69 +838,64 @@ inT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score) { return worst_noise_blob; } - -float blob_noise_score(PBLOB *blob) { - OUTLINE_IT outline_it; - TBOX box; //BB of outline +float Tesseract::blob_noise_score(TBLOB *blob) { + TBOX box; // BB of outline inT16 outline_count = 0; inT16 max_dimension; inT16 largest_outline_dimension = 0; - outline_it.set_to_list (blob->out_list ()); - for (outline_it.mark_cycle_pt (); - !outline_it.cycled_list (); outline_it.forward ()) { + for (TESSLINE* ol = blob->outlines; ol != NULL; ol= ol->next) { outline_count++; - box = outline_it.data ()->bounding_box (); - if (box.height () > box.width ()) - max_dimension = box.height (); - else - max_dimension = box.width (); + box = ol->bounding_box(); + if (box.height() > box.width()) { + max_dimension = box.height(); + } else { + max_dimension = box.width(); + } if (largest_outline_dimension < max_dimension) largest_outline_dimension = max_dimension; } - if (fixsp_noise_score_fixing) { - if (outline_count > 5) - //penalise LOTS of blobs - largest_outline_dimension *= 2; - - box = blob->bounding_box (); + if (outline_count > 5) { + // penalise LOTS of blobs + largest_outline_dimension *= 2; + } - if ((box.bottom () > bln_baseline_offset * 4) || - (box.top () < bln_baseline_offset / 2)) - //Lax blob is if high or low - largest_outline_dimension /= 2; + box = blob->bounding_box(); + if (box.bottom() > kBlnBaselineOffset * 4 || + box.top() < kBlnBaselineOffset / 2) { + // Lax blob is if high or low + largest_outline_dimension /= 2; } + return largest_outline_dimension; } - +} // namespace tesseract void fixspace_dbg(WERD_RES *word) { - TBOX box = word->word->bounding_box (); + TBOX box = word->word->bounding_box(); BOOL8 show_map_detail = FALSE; inT16 i; - box.print (); - #ifndef SECURE_NAMES - tprintf (" \"%s\" ", word->best_choice->unichar_string().string ()); - tprintf ("Blob count: %d (word); %d/%d (outword)\n", - word->word->gblob_list ()->length (), - word->outword->gblob_list ()->length (), - word->outword->rej_blob_list ()->length ()); - word->reject_map.print (debug_fp); - tprintf ("\n"); + box.print(); + tprintf(" \"%s\" ", word->best_choice->unichar_string().string()); + tprintf("Blob count: %d (word); %d/%d (rebuild word)\n", + word->word->gblob_list()->length(), + word->rebuild_word->NumBlobs(), + word->box_word->length()); + word->reject_map.print(debug_fp); + tprintf("\n"); if (show_map_detail) { - tprintf ("\"%s\"\n", word->best_choice->unichar_string().string ()); + tprintf("\"%s\"\n", word->best_choice->unichar_string().string()); for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { - tprintf ("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]); - word->reject_map[i].full_print (debug_fp); + tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]); + word->reject_map[i].full_print(debug_fp); } } - tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); - tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); - #endif + tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); + tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); } @@ -955,25 +915,23 @@ inT16 Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) { inT16 word_length; inT16 score = 0; inT16 i; - float small_limit = bln_x_height * fixsp_small_outlines_size; - - if (!fixsp_fp_eval) - return (eval_word_spacing (word_res_list)); + float small_limit = kBlnXHeight * fixsp_small_outlines_size; - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + word = word_it.data(); word_length = word->reject_map.length(); - if ((word->done || - word->tess_accepted) || - (word->best_choice->permuter() == SYSTEM_DAWG_PERM) || - (word->best_choice->permuter() == FREQ_DAWG_PERM) || - (word->best_choice->permuter() == USER_DAWG_PERM) || - (safe_dict_word(*(word->best_choice)) > 0)) { - blob_it.set_to_list(word->outword->blob_list()); + if (word->done || + word->tess_accepted || + word->best_choice->permuter() == SYSTEM_DAWG_PERM || + word->best_choice->permuter() == FREQ_DAWG_PERM || + word->best_choice->permuter() == USER_DAWG_PERM || + safe_dict_word(*word->best_choice) > 0) { + TBLOB* blob = word->rebuild_word->blobs; UNICHAR_ID space = getDict().getUnicharset().unichar_to_id(" "); - for (i = 0; i < word->best_choice->length(); ++i, blob_it.forward()) { + for (i = 0; i < word->best_choice->length() && blob != NULL; + ++i, blob = blob->next) { if (word->best_choice->unichar_id(i) == space || - (blob_noise_score(blob_it.data()) < small_limit)) { + blob_noise_score(blob) < small_limit) { score -= 1; // penalise possibly erroneous non-space } else if (word->reject_map[i].accepted()) { score++; @@ -985,4 +943,5 @@ inT16 Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) { score = 0; return score; } + } // namespace tesseract diff --git a/ccmain/fixspace.h b/ccmain/fixspace.h index 1d862b4a67..56c3018794 100644 --- a/ccmain/fixspace.h +++ b/ccmain/fixspace.h @@ -23,37 +23,10 @@ #define FIXSPACE_H #include "pageres.h" -#include "varable.h" -#include "ocrclass.h" +#include "params.h" #include "notdll.h" -extern BOOL_VAR_H (fixsp_check_for_fp_noise_space, TRUE, -"Try turning noise to space in fixed pitch"); -extern BOOL_VAR_H (fixsp_fp_eval, TRUE, "Use alternate evaluation for fp"); -extern BOOL_VAR_H (fixsp_noise_score_fixing, TRUE, "More sophisticated?"); -extern INT_VAR_H (fixsp_non_noise_limit, 1, -"How many non-noise blbs either side?"); -extern double_VAR_H (fixsp_small_outlines_size, 0.28, -"Small if lt xht x this"); -extern BOOL_VAR_H (fixsp_ignore_punct, TRUE, "In uniform spacing calc"); -extern BOOL_VAR_H (fixsp_numeric_fix, TRUE, "Try to deal with numeric punct"); -extern BOOL_VAR_H (fixsp_prefer_joined_1s, TRUE, "Arbitrary boost"); -extern BOOL_VAR_H (tessedit_test_uniform_wd_spacing, FALSE, -"Limit context word spacing"); -extern BOOL_VAR_H (tessedit_prefer_joined_punct, FALSE, -"Reward punctation joins"); -extern INT_VAR_H (fixsp_done_mode, 1, "What constitutes done for spacing"); -extern INT_VAR_H (debug_fix_space_level, 0, "Contextual fixspace debug"); -extern STRING_VAR_H (numeric_punctuation, ".,", -"Punct. chs expected WITHIN numbers"); void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list); void transform_to_next_perm(WERD_RES_LIST &words); -void dump_words(WERD_RES_LIST &perm, inT16 score, inT16 mode, BOOL8 improved); -BOOL8 uniformly_spaced( //sensible word - WERD_RES *word); -BOOL8 fixspace_thinks_word_done(WERD_RES *word); -void break_noisiest_blob_word(WERD_RES_LIST &words); -inT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score); -float blob_noise_score(PBLOB *blob); void fixspace_dbg(WERD_RES *word); #endif diff --git a/ccmain/fixxht.cpp b/ccmain/fixxht.cpp index 07e35537ea..ded7dd7e57 100644 --- a/ccmain/fixxht.cpp +++ b/ccmain/fixxht.cpp @@ -17,816 +17,150 @@ * **********************************************************************/ -#ifdef _MSC_VER -#pragma warning(disable:4244) // Conversion warnings -#endif - #include "mfcpch.h" #include #include -#include "varable.h" -#include "tessvars.h" -#include "control.h" -#include "reject.h" -#include "fixxht.h" -#include "secname.h" +#include "params.h" +#include "float2int.h" #include "tesseractclass.h" -#define EXTERN - -EXTERN double_VAR (x_ht_fraction_of_caps_ht, 0.7, -"Fract of cps ht est of xht"); -EXTERN double_VAR (x_ht_variation, 0.35, -"Err band as fract of caps/xht dist"); -EXTERN double_VAR (x_ht_sub_variation, 0.5, -"Err band as fract of caps/xht dist"); -EXTERN BOOL_VAR (rej_trial_ambigs, TRUE, -"reject x-ht ambigs when under trial"); -EXTERN BOOL_VAR (x_ht_conservative_ambigs, FALSE, -"Dont rely on ambigs + maxht"); -EXTERN BOOL_VAR (x_ht_check_est, TRUE, "Cross check estimates"); -EXTERN BOOL_VAR (x_ht_case_flip, FALSE, "Flip or reject suspect case"); -EXTERN BOOL_VAR (x_ht_include_dodgy_blobs, TRUE, -"Include blobs with possible noise?"); -EXTERN BOOL_VAR (x_ht_limit_flip_trials, TRUE, -"Dont do trial flips when ambigs are close to xht?"); -EXTERN BOOL_VAR (rej_use_check_block_occ, TRUE, -"Analyse rejection behaviour"); - -EXTERN STRING_VAR (chs_non_ambig_caps_ht, -"!#$%&()/12346789?ABDEFGHIKLNQRT[]\\bdfhkl", -"Reliable ascenders"); -EXTERN STRING_VAR (chs_x_ht, "acegmnopqrsuvwxyz", "X height chars"); -EXTERN STRING_VAR (chs_non_ambig_x_ht, "aenqr", "reliable X height chars"); -EXTERN STRING_VAR (chs_ambig_caps_x, "cCmMoO05sSuUvVwWxXzZ", -"X ht or caps ht chars"); -EXTERN STRING_VAR (chs_bl_ambig_caps_x, "pPyY", " Caps or descender ambigs"); - -/* The following arent used in this module but are used in applybox.c */ -EXTERN STRING_VAR (chs_caps_ht, -"!#$%&()/0123456789?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]\\bdfhkl{|}", -"Ascender chars"); -EXTERN STRING_VAR (chs_desc, "gjpqy", "Descender chars"); -EXTERN STRING_VAR (chs_non_ambig_bl, -"!#$%&01246789?ABCDEFGHIKLMNORSTUVWXYZabcdehiklmnorstuvwxz", -"Reliable baseline chars"); -EXTERN STRING_VAR (chs_odd_top, "ijt", "Chars with funny ascender region"); -EXTERN STRING_VAR (chs_odd_bot, "()35JQ[]\\/{}|", "Chars with funny base"); - -/* The following arent used but are defined for completeness */ -EXTERN STRING_VAR (chs_bl, -"!#$%&()/01246789?ABCDEFGHIJKLMNOPRSTUVWXYZ[]\\abcdefhiklmnorstuvwxz{}", -"Baseline chars"); -EXTERN STRING_VAR (chs_non_ambig_desc, "gq", "Reliable descender chars"); - -/** - * re_estimate_x_ht() - * - * Walk the blobs in the word together with the text string and reject map. - * NOTE: All evaluation is done on the baseline normalised word. This is so that - * the TBOX class can be used (integer). The reasons for this are: - * a) We must use the outword - ie the Tess result - * b) The outword is always converted to integer representation as that is how - * Tess works - * c) We would like to use the TBOX class, cos its there - this is integer - * precision. - * d) If we de-normed the outword we would get rounding errors and would find - * that integers are too imprecise (x-height around 15 pixels instead of a - * scale of 128 in bln form. - * CONVINCED? - * - * A) Try to re-estimatate x-ht and caps ht from confirmed pts in word. - * - * @verbatim - FOR each non reject blob - IF char is baseline posn ambiguous - Remove ambiguity by comparing its posn with respect to baseline. - IF char is a confirmed x-ht char - Add x-ht posn to confirmed_x_ht pts for word - IF char is a confirmed caps-ht char - Add blob_ht to caps ht pts for word - - IF Std Dev of caps hts < 2 (AND # samples > 0) - Use mean as caps ht estimate (Dont use median as we can expect a - fair variation between the heights of the NON_AMBIG_CAPS_HT_CHS) - IF Std Dev of caps hts >= 2 (AND # samples > 0) - Suspect small caps font. - Look for 2 clusters, each with Std Dev < 2. - IF 2 clusters found - Pick the smaller median as the caps ht estimate of the smallcaps. - - IF failed to estimate a caps ht - Use the median caps ht if there is one, - ELSE use the caps ht estimate of the previous word. NO!!! - - - IF there are confirmed x-height chars - Estimate confirmed x-height as the median value - ELSE IF there is a confirmed caps ht - Estimate confirmed x-height as a fraction of confirmed caps ht value - ELSE - Use the value for the previous word or the row value if this is the - first word in the block. NO!!! - @endverbatim - * - * B) Add in case ambiguous blobs based on confirmed x-ht/caps ht, changing case - * as necessary. Reestimate caps ht and x-ht as in A, using the extended - * clusters. - * - * C) If word contains rejects, and x-ht estimate significantly differs from - * original estimate, return TRUE so that the word can be rematched - */ - -void re_estimate_x_ht( //improve for 1 word - WERD_RES *word_res, //word to do - float *trial_x_ht //new match value - ) { - PBLOB_IT blob_it; - inT16 blob_ht_above_baseline; - - const char *word_str; - inT16 i; - inT16 offset; - - STATS all_blobs_ht (0, 300); //every blob in word - STATS x_ht (0, 300); //confirmed pts in wd - STATS caps_ht (0, 300); //confirmed pts in wd - STATS case_ambig (0, 300); //lower case ambigs - - inT16 rej_blobs_count = 0; - inT16 rej_blobs_max_height = 0; - inT32 rej_blobs_max_area = 0; - float x_ht_ok_variation; - float max_blob_ht; - float marginally_above_x_ht; - - TBOX blob_box; //blob bounding box - float est_x_ht = 0.0; //word estimate - float est_caps_ht = 0.0; //word estimate - //based on hard data? - BOOL8 est_caps_ht_certain = FALSE; - BOOL8 est_x_ht_certain = FALSE;//based on hard data? - BOOL8 trial = FALSE; //Sepeculative values? - BOOL8 no_comment = FALSE; //No change in xht - float ambig_lc_x_est; - float ambig_uc_caps_est; - inT16 x_ht_ambigs = 0; - inT16 caps_ht_ambigs = 0; - - /* Calculate default variation of blob x_ht from bln x_ht for bln word */ - x_ht_ok_variation = - (bln_x_height / x_ht_fraction_of_caps_ht - bln_x_height) * x_ht_variation; - - word_str = word_res->best_choice->unichar_string().string(); - /* - Cycle blobs, allocating to one of the stats sets when possible. - */ - blob_it.set_to_list (word_res->outword->blob_list ()); - for (blob_it.mark_cycle_pt (), i = 0, offset = 0; - !blob_it.cycled_list (); blob_it.forward (), - offset += word_res->best_choice->unichar_lengths()[i++]) { - if (!dodgy_blob (blob_it.data ())) { - blob_box = blob_it.data ()->bounding_box (); - blob_ht_above_baseline = blob_box.top () - bln_baseline_offset; - all_blobs_ht.add (blob_ht_above_baseline, 1); - - if (word_res->reject_map[i].rejected ()) { - rej_blobs_count++; - if (blob_box.height () > rej_blobs_max_height) - rej_blobs_max_height = blob_box.height (); - if (blob_box.area () > rej_blobs_max_area) - rej_blobs_max_area = blob_box.area (); - } - else { - if (STRING (chs_non_ambig_x_ht).contains (word_str[offset])) - x_ht.add (blob_ht_above_baseline, 1); - - if (STRING (chs_non_ambig_caps_ht).contains (word_str[offset])) - caps_ht.add (blob_ht_above_baseline, 1); +namespace tesseract { - if (STRING (chs_ambig_caps_x).contains (word_str[offset])) { - case_ambig.add (blob_ht_above_baseline, 1); - if (STRING (chs_x_ht).contains (word_str[offset])) - x_ht_ambigs++; - else - caps_ht_ambigs++; - } +// Fixxht overview. +// Premise: Initial estimate of x-height is adequate most of the time, but +// occasionally it is incorrect. Most notable causes of failure are: +// 1. Small caps, where the top of the caps is the same as the body text +// xheight. For small caps words the xheight needs to be reduced to correctly +// recognize the caps in the small caps word. +// 2. All xheight lines, such as summer. Here the initial estimate will have +// guessed that the blob tops are caps and will have placed the xheight too low. +// 3. Noise/logos beside words, or changes in font size on a line. Such +// things can blow the statistics and cause an incorrect estimate. +// +// Algorithm. +// Compare the vertical position (top only) of alphnumerics in a word with +// the range of positions in training data (in the unicharset). +// See CountMisfitTops. If any characters disagree sufficiently with the +// initial xheight estimate, then recalculate the xheight, re-run OCR on +// the word, and if the number of vertical misfits goes down, along with +// either the word rating or certainty, then keep the new xheight. +// The new xheight is calculated as follows:ComputeCompatibleXHeight +// For each alphanumeric character that has a vertically misplaced top +// (a misfit), yet its bottom is within the acceptable range (ie it is not +// likely a sub-or super-script) calculate the range of acceptable xheight +// positions from its range of tops, and give each value in the range a +// number of votes equal to the distance of its top from its acceptance range. +// The x-height position with the median of the votes becomes the new +// x-height. This assumes that most characters will be correctly recognized +// even if the x-height is incorrect. This is not a terrible assumption, but +// it is not great. An improvement would be to use a classifier that does +// not care about vertical position or scaling at all. + +// If the max-min top of a unicharset char is bigger than kMaxCharTopRange +// then the char top cannot be used to judge misfits or suggest a new top. +const int kMaxCharTopRange = 48; + +// Returns the number of misfit blob tops in this word. +int Tesseract::CountMisfitTops(WERD_RES *word_res) { + int bad_blobs = 0; + TBLOB* blob = word_res->rebuild_word->blobs; + int blob_id = 0; + for (; blob != NULL; blob = blob->next, ++blob_id) { + UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id); + if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) { + int top = blob->bounding_box().top(); + if (top >= INT_FEAT_RANGE) + top = INT_FEAT_RANGE - 1; + int min_bottom, max_bottom, min_top, max_top; + unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, + &min_top, &max_top); + if (max_top - min_top > kMaxCharTopRange) + continue; + bool bad = top < min_top - x_ht_acceptance_tolerance || + top > max_top + x_ht_acceptance_tolerance; + if (bad) + ++bad_blobs; + if (debug_x_ht_level >= 1) { + tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d\n", + unicharset.id_to_unichar(class_id), + bad ? "Misfit" : "OK", top, min_top, max_top, + static_cast(x_ht_acceptance_tolerance)); + } + } + } + return bad_blobs; +} - if (STRING (chs_bl_ambig_caps_x).contains (word_str[offset])) { - if (STRING (chs_x_ht).contains (word_str[offset])) { - /* confirm x_height provided > 15% total height below baseline */ - if ((bln_baseline_offset - blob_box.bottom ()) / - (float) blob_box.height () > 0.15) - x_ht.add (blob_ht_above_baseline, 1); - } - else { - /* confirm caps_height provided < 5% total height below baseline */ - if ((bln_baseline_offset - blob_box.bottom ()) / - (float) blob_box.height () < 0.05) - caps_ht.add (blob_ht_above_baseline, 1); - } +// Returns a new x-height maximally compatible with the result in word_res. +// See comment above for overall algorithm. +float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res) { + STATS top_stats(0, MAX_UINT8); + TBLOB* blob = word_res->rebuild_word->blobs; + int blob_id = 0; + for (; blob != NULL; blob = blob->next, ++blob_id) { + UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id); + if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) { + int top = blob->bounding_box().top(); + // Clip the top to the limit of normalized feature space. + if (top >= INT_FEAT_RANGE) + top = INT_FEAT_RANGE - 1; + int bottom = blob->bounding_box().bottom(); + int min_bottom, max_bottom, min_top, max_top; + unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, + &min_top, &max_top); + // Chars with a wild top range would mess up the result so ignore them. + if (max_top - min_top > kMaxCharTopRange) + continue; + int misfit_dist = MAX((min_top - x_ht_acceptance_tolerance) - top, + top - (max_top + x_ht_acceptance_tolerance)); + int height = top - kBlnBaselineOffset; + if (debug_x_ht_level >= 20) { + tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d : ", + unicharset.id_to_unichar(class_id), + height, min_bottom, max_bottom, min_top, max_top, + bottom, top); + } + // Use only chars that fit in the expected bottom range, and where + // the range of tops is sensibly near the xheight. + if (min_bottom <= bottom + x_ht_acceptance_tolerance && + bottom - x_ht_acceptance_tolerance <= max_bottom && + min_top > kBlnBaselineOffset && + max_top - kBlnBaselineOffset >= kBlnXHeight && + misfit_dist > 0) { + // Compute the x-height position using proportionality between the + // actual height and expected height. + int min_xht = DivRounded(height * kBlnXHeight, + max_top - kBlnBaselineOffset); + int max_xht = DivRounded(height * kBlnXHeight, + min_top - kBlnBaselineOffset); + if (debug_x_ht_level >= 20) { + tprintf(" xht range min=%d, max=%d\n", + min_xht, max_xht); } + // The range of expected heights gets a vote equal to the distance + // of the actual top from the expected top. + for (int y = min_xht; y <= max_xht; ++y) + top_stats.add(y, misfit_dist); + } else if (debug_x_ht_level >= 20) { + tprintf(" already OK\n"); } } } - est_caps_ht = estimate_from_stats (caps_ht); - est_x_ht = estimate_from_stats (x_ht); - est_ambigs(word_res, case_ambig, &ambig_lc_x_est, &ambig_uc_caps_est); - max_blob_ht = all_blobs_ht.ile (0.9999); - - #ifndef SECURE_NAMES + if (top_stats.get_total() == 0) + return 0.0f; + // The new xheight is just the median vote, which is then scaled out + // of BLN space back to pixel space to get the x-height in pixel space. + float new_xht = top_stats.median(); if (debug_x_ht_level >= 20) { - tprintf ("Mode20:A: %s ", word_str); - word_res->reject_map.print (debug_fp); - tprintf (" XHT:%f CAP:%f MAX:%f AMBIG X:%f CAP:%f\n", - est_x_ht, est_caps_ht, max_blob_ht, - ambig_lc_x_est, ambig_uc_caps_est); + tprintf("Median xht=%f\n", new_xht); + tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n", + new_xht, new_xht / word_res->denorm.scale()); } - #endif - if (!x_ht_conservative_ambigs && - (ambig_lc_x_est > 0) && - (ambig_lc_x_est == ambig_uc_caps_est) && - (max_blob_ht > ambig_lc_x_est + x_ht_ok_variation)) { - //may be zero but believe xht - ambig_uc_caps_est = est_caps_ht; - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 20) - tprintf ("Mode20:B: Fiddle ambig_uc_caps_est to %f\n", - ambig_lc_x_est); - #endif - } - - /* Now make some estimates */ - - if ((est_x_ht > 0) || (est_caps_ht > 0) || - ((ambig_lc_x_est > 0) && (ambig_lc_x_est != ambig_uc_caps_est))) { - /* There is some sensible data to go on so make the most of it. */ - if (debug_x_ht_level >= 20) - tprintf ("Mode20:C: Sensible Data\n", ambig_lc_x_est); - if (est_x_ht > 0) { - est_x_ht_certain = TRUE; - if (est_caps_ht == 0) { - if ((ambig_uc_caps_est > ambig_lc_x_est) && - (ambig_uc_caps_est > est_x_ht + x_ht_ok_variation)) - est_caps_ht = ambig_uc_caps_est; - else - est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht; - } - if (case_ambig.get_total () > 0) - improve_estimate(word_res, est_x_ht, est_caps_ht, x_ht, caps_ht); - est_caps_ht_certain = caps_ht.get_total () > 0; - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 20) - tprintf ("Mode20:D: Est from xht XHT:%f CAP:%f\n", - est_x_ht, est_caps_ht); - #endif - } - else if (est_caps_ht > 0) { - est_caps_ht_certain = TRUE; - if ((ambig_lc_x_est > 0) && - (ambig_lc_x_est < est_caps_ht - x_ht_ok_variation)) - est_x_ht = ambig_lc_x_est; - else - est_x_ht = est_caps_ht * x_ht_fraction_of_caps_ht; - if (ambig_lc_x_est + ambig_uc_caps_est > 0) - improve_estimate(word_res, est_x_ht, est_caps_ht, x_ht, caps_ht); - est_x_ht_certain = x_ht.get_total () > 0; - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 20) - tprintf ("Mode20:E: Est from caps XHT:%f CAP:%f\n", - est_x_ht, est_caps_ht); - #endif - } - else { - /* Do something based on case ambig chars alone - we have guessed that the - ambigs are lower case. */ - est_x_ht = ambig_lc_x_est; - est_x_ht_certain = TRUE; - if (ambig_uc_caps_est > ambig_lc_x_est) { - est_caps_ht = ambig_uc_caps_est; - est_caps_ht_certain = TRUE; - } - else - est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht; - - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 20) - tprintf ("Mode20:F: Est from ambigs XHT:%f CAP:%f\n", - est_x_ht, est_caps_ht); - #endif - } - /* Check for sane interpretation of evidence: - Try shifting caps ht if min certain caps ht is not significantly greater - than the estimated x ht or the max certain x ht is not significantly less - than the estimated caps ht. */ - if (x_ht_check_est) { - if ((caps_ht.get_total () > 0) && - (est_x_ht + x_ht_ok_variation >= caps_ht.ile (0.0001))) { - trial = TRUE; - est_caps_ht = est_x_ht; - est_x_ht = x_ht_fraction_of_caps_ht * est_caps_ht; - - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 20) - tprintf ("Mode20:G: Trial XHT:%f CAP:%f\n", - est_x_ht, est_caps_ht); - #endif - } - else if ((x_ht.get_total () > 0) && - (est_caps_ht - x_ht_ok_variation <= x_ht.ile (0.9999))) { - trial = TRUE; - est_x_ht = est_caps_ht; - est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht; - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 20) - tprintf ("Mode20:H: Trial XHT:%f CAP:%f\n", - est_x_ht, est_caps_ht); - #endif - } - } - } - - else { - /* There is no sensible data so we're in the dark. */ - - marginally_above_x_ht = bln_x_height + - x_ht_ok_variation * x_ht_sub_variation; - /* - If there are no rejects, or the only rejects have a narrow height, or have - a small area compared to a normal char, then estimate the x-height as the - original one. (I.e dont fiddle about if the only rejects look like - punctuation) - we use max height as mean or median will be too low if - there are only two blobs - Eg "F." - */ - - if (debug_x_ht_level >= 20) - tprintf ("Mode20:I: In the dark\n"); - - if ((rej_blobs_count == 0) || - (rej_blobs_max_height < 0.3 * max_blob_ht) || - (rej_blobs_max_area < 0.3 * max_blob_ht * max_blob_ht)) { - no_comment = TRUE; - if (debug_x_ht_level >= 20) - tprintf ("Mode20:J: No comment due to no rejects\n"); - } - else if (x_ht_limit_flip_trials && - ((max_blob_ht < marginally_above_x_ht) || - ((ambig_lc_x_est > 0) && - (ambig_lc_x_est == ambig_uc_caps_est) && - (ambig_lc_x_est < marginally_above_x_ht)))) { - no_comment = TRUE; - if (debug_x_ht_level >= 20) - tprintf ("Mode20:K: No comment as close to xht %f < %f\n", - ambig_lc_x_est, marginally_above_x_ht); - } - else if (x_ht_conservative_ambigs && (ambig_uc_caps_est > 0)) { - trial = TRUE; - est_caps_ht = ambig_lc_x_est; - est_x_ht = x_ht_fraction_of_caps_ht * est_caps_ht; - - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 20) - tprintf ("Mode20:L: Trial XHT:%f CAP:%f\n", - est_x_ht, est_caps_ht); - #endif - } - /* - If the top of the word is nowhere near where we expect ascenders to be - (less than half the x_ht -> caps_ht distance) - suspect an all caps word - at the x-ht. Estimate x-ht accordingly - but only as a TRIAL! - NOTE we do NOT check location of baseline. Commas can descend as much as - real descenders so we would need to do something to make sure that any - disqualifying descenders were not at the end. - */ - else { - if (max_blob_ht < - (bln_x_height + bln_x_height / x_ht_fraction_of_caps_ht) / 2.0) { - trial = TRUE; - est_x_ht = x_ht_fraction_of_caps_ht * max_blob_ht; - est_caps_ht = max_blob_ht; - - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 20) - tprintf ("Mode20:M: Trial XHT:%f CAP:%f\n", - est_x_ht, est_caps_ht); - #endif - } - else { - no_comment = TRUE; - if (debug_x_ht_level >= 20) - tprintf ("Mode20:N: No comment as nothing else matched\n"); - } - } - } - - /* Sanity check - reject word if fails */ - - if (!no_comment && - ((est_x_ht > 2 * bln_x_height) || - (est_x_ht / word_res->denorm.scale () <= min_sane_x_ht_pixels) || - (est_caps_ht <= est_x_ht) || (est_caps_ht >= 2.5 * est_x_ht))) { - no_comment = TRUE; - if (!trial && rej_use_xht) { - if (debug_x_ht_level >= 2) { - tprintf ("Sanity check rejecting %s ", word_str); - word_res->reject_map.print (debug_fp); - tprintf ("\n"); - } - word_res->reject_map.rej_word_xht_fixup (); - - } - if (debug_x_ht_level >= 20) - tprintf ("Mode20:O: No comment as nothing else matched\n"); - } - - if (no_comment || trial) { - word_res->x_height = bln_x_height / word_res->denorm.scale (); - word_res->guessed_x_ht = TRUE; - word_res->caps_height = (bln_x_height / x_ht_fraction_of_caps_ht) / - word_res->denorm.scale (); - word_res->guessed_caps_ht = TRUE; - /* - Reject ambigs in the current word if we are uncertain and: - there are rejects OR - there is only one char which is an ambig OR - there is conflict between the case of the ambigs even though there is - no height separation Eg "Ms" recognised from "MS" - */ - if (rej_trial_ambigs && - ((word_res->reject_map.reject_count () > 0) || - (word_res->reject_map.length () == 1) || - ((x_ht_ambigs > 0) && (caps_ht_ambigs > 0)))) { - #ifndef SECURE_NAMES - if (debug_x_ht_level >= 2) { - tprintf ("TRIAL Rej Ambigs %s ", word_str); - word_res->reject_map.print (debug_fp); - } - #endif - reject_ambigs(word_res); - if (debug_x_ht_level >= 2) { - tprintf (" "); - word_res->reject_map.print (debug_fp); - tprintf ("\n"); - } - } - } - else { - word_res->x_height = est_x_ht / word_res->denorm.scale (); - word_res->guessed_x_ht = !est_x_ht_certain; - word_res->caps_height = est_caps_ht / word_res->denorm.scale (); - word_res->guessed_caps_ht = !est_caps_ht_certain; - } - - if (!no_comment && (fabs (est_x_ht - bln_x_height) > x_ht_ok_variation)) - *trial_x_ht = est_x_ht / word_res->denorm.scale (); + // The xheight must change by at least x_ht_min_change to be used. + if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) + return new_xht / word_res->denorm.scale(); else - *trial_x_ht = 0.0; - - #ifndef SECURE_NAMES - if (((*trial_x_ht > 0) && (debug_x_ht_level >= 3)) || - (debug_x_ht_level >= 5)) { - tprintf ("%s ", word_str); - word_res->reject_map.print (debug_fp); - tprintf - (" X:%0.2f Cps:%0.2f Mxht:%0.2f RJ MxHt:%d MxAr:%d Rematch:%c\n", - est_x_ht, est_caps_ht, max_blob_ht, rej_blobs_max_height, - rej_blobs_max_area, *trial_x_ht > 0 ? '*' : ' '); - } - #endif - + return 0.0f; } - -namespace tesseract { -/** - * check_block_occ() - * Checks word for coarse block occupancy, rejecting more chars and flipping - * case of case ambiguous chars as required. - */ -void Tesseract::check_block_occ(WERD_RES *word_res) { - PBLOB_IT blob_it; - STRING new_string; - STRING new_string_lengths(word_res->best_choice->unichar_lengths()); - REJMAP new_map = word_res->reject_map; - WERD_CHOICE *new_choice; - - const char *word_str = word_res->best_choice->unichar_string().string(); - inT16 i; - inT16 offset; - inT16 reject_count = 0; - char confirmed_char[UNICHAR_LEN + 1]; - char temp_char[UNICHAR_LEN + 1]; - float x_ht; - float caps_ht; - - new_string_lengths[0] = 0; - - if (word_res->x_height > 0) - x_ht = word_res->x_height * word_res->denorm.scale (); - else - x_ht = bln_x_height; - - if (word_res->caps_height > 0) - caps_ht = word_res->caps_height * word_res->denorm.scale (); - else - caps_ht = x_ht / x_ht_fraction_of_caps_ht; - - blob_it.set_to_list (word_res->outword->blob_list ()); - - for (blob_it.mark_cycle_pt (), i = 0, offset = 0; - !blob_it.cycled_list (); blob_it.forward (), - offset += word_res->best_choice->unichar_lengths()[i++]) { - strncpy(temp_char, word_str + offset, - word_res->best_choice->unichar_lengths()[i]); //default copy - temp_char[word_res->best_choice->unichar_lengths()[i]] = '\0'; - if (word_res->reject_map[i].accepted ()) { - check_blob_occ (temp_char, - blob_it.data ()->bounding_box (). - top () - bln_baseline_offset, x_ht, - caps_ht, confirmed_char); - - if (strcmp(confirmed_char, "") == 0) { - if (rej_use_check_block_occ) { - new_map[i].setrej_xht_fixup (); - reject_count++; - } - } - else - strcpy(temp_char, confirmed_char); - } - new_string += temp_char; - new_string_lengths[i] = strlen(temp_char); - new_string_lengths[i + 1] = 0; - - } - if ((reject_count > 0) || (new_string != word_str)) { - if (debug_x_ht_level >= 2) { - tprintf ("Shape Verification: %s ", word_str); - word_res->reject_map.print (debug_fp); - tprintf (" -> %s ", new_string.string ()); - new_map.print (debug_fp); - tprintf ("\n"); - } - new_choice = new WERD_CHOICE(new_string.string(), - new_string_lengths.string(), - word_res->best_choice->rating(), - word_res->best_choice->certainty(), - word_res->best_choice->permuter(), - unicharset); - new_choice->populate_unichars(unicharset); - delete word_res->best_choice; - word_res->best_choice = new_choice; - word_res->reject_map = new_map; - } -} } // namespace tesseract - -/** - * check_blob_occ() - * - * Checks blob for position relative to position above baseline - * @return 0 for reject, or (possibly case shifted) confirmed char - */ - -void check_blob_occ(char* proposed_char, - inT16 blob_ht_above_baseline, - float x_ht, - float caps_ht, - char* confirmed_char) { - BOOL8 blob_definite_x_ht; - BOOL8 blob_definite_caps_ht; - float acceptable_variation; - - acceptable_variation = (caps_ht - x_ht) * x_ht_variation; - /* ??? REJECT if expected descender and nothing significantly below BL */ - - /* ??? REJECT if expected ascender and nothing significantly above x-ht */ - - /* - IF AMBIG_CAPS_X_CHS - IF blob is definitely an ascender ( > xht + xht err )AND - char is an x-ht char - THEN - flip case - IF blob is defintiely an x-ht ( <= xht + xht err ) AND - char is an ascender char - THEN - flip case - */ - blob_definite_x_ht = blob_ht_above_baseline <= x_ht + acceptable_variation; - blob_definite_caps_ht = blob_ht_above_baseline >= - caps_ht - acceptable_variation; - - if (STRING (chs_ambig_caps_x).contains (*proposed_char)) { - if ((!blob_definite_x_ht && !blob_definite_caps_ht) || - ((strcmp(proposed_char, "0") == 0) && !blob_definite_caps_ht) || - ((strcmp(proposed_char, "o") == 0) && !blob_definite_x_ht)) { - strcpy(confirmed_char, ""); - return; - } - - else if (blob_definite_caps_ht && - STRING (chs_x_ht).contains (*proposed_char)) { - if (x_ht_case_flip) { - //flip to upper case - proposed_char[0] = (char) toupper (*proposed_char); - return; - } else { - strcpy(confirmed_char, ""); - return; - } - } - - else if (blob_definite_x_ht && - !STRING (chs_x_ht).contains (*proposed_char)) { - if (x_ht_case_flip) { - //flip to lower case - proposed_char[0] = (char) tolower (*proposed_char); - } else { - strcpy(confirmed_char, ""); - return; - } - } - } - else - if ((STRING (chs_non_ambig_x_ht).contains (*proposed_char) - && !blob_definite_x_ht) - || (STRING (chs_non_ambig_caps_ht).contains (*proposed_char) - && !blob_definite_caps_ht)) { - strcpy(confirmed_char, ""); - return; - } - strcpy(confirmed_char, proposed_char); - return; -} - - -float estimate_from_stats(STATS &stats) { - if (stats.get_total () <= 0) - return 0.0; - else if (stats.get_total () >= 3) - return stats.ile (0.5); //median - else - return stats.mean (); -} - - -void improve_estimate(WERD_RES *word_res, - float &est_x_ht, - float &est_caps_ht, - STATS &x_ht, - STATS &caps_ht) { - PBLOB_IT blob_it; - inT16 blob_ht_above_baseline; - - const char *word_str; - inT16 i; - inT16 offset; - TBOX blob_box; //blob bounding box - char confirmed_char[UNICHAR_LEN + 1]; - char temp_char[UNICHAR_LEN + 1]; - float new_val; - - /* IMPROVE estimates here - if good estimates, and case ambig chars, - rescan blobs to fix case ambig blobs, re-estimate hts ??? maybe always do - it after deciding x-height - */ - - blob_it.set_to_list (word_res->outword->blob_list ()); - word_str = word_res->best_choice->unichar_string().string(); - for (blob_it.mark_cycle_pt (), i = 0, offset = 0; - !blob_it.cycled_list (); blob_it.forward (), - offset += word_res->best_choice->unichar_lengths()[i++]) { - if ((STRING (chs_ambig_caps_x).contains (word_str[offset])) && - (!dodgy_blob (blob_it.data ()))) { - blob_box = blob_it.data ()->bounding_box (); - blob_ht_above_baseline = blob_box.top () - bln_baseline_offset; - strncpy(temp_char, word_str + offset, - word_res->best_choice->unichar_lengths()[i]); - temp_char[word_res->best_choice->unichar_lengths()[i]] = '\0'; - check_blob_occ (temp_char, - blob_ht_above_baseline, - est_x_ht, est_caps_ht, confirmed_char); - if (strcmp(confirmed_char, "") != 0) { - if (STRING (chs_x_ht).contains (*confirmed_char)) - x_ht.add (blob_ht_above_baseline, 1); - else - caps_ht.add (blob_ht_above_baseline, 1); - } - } - } - new_val = estimate_from_stats (x_ht); - if (new_val > 0) - est_x_ht = new_val; - new_val = estimate_from_stats (caps_ht); - if (new_val > 0) - est_caps_ht = new_val; -} - - -void reject_ambigs( //rej any accepted xht ambig chars - WERD_RES *word) { - const char *word_str; - int i = 0; - - word_str = word->best_choice->unichar_string().string(); - while (*word_str != '\0') { - if (STRING (chs_ambig_caps_x).contains (*word_str)) - word->reject_map[i].setrej_xht_fixup (); - word_str += word->best_choice->unichar_lengths()[i++]; - } -} - - -void est_ambigs( //xht ambig ht stats - WERD_RES *word_res, - STATS &stats, - float *ambig_lc_x_est, //xht est - float *ambig_uc_caps_est //caps est - ) { - float x_ht_ok_variation; - STATS short_ambigs (0, 300); - STATS tall_ambigs (0, 300); - PBLOB_IT blob_it; - TBOX blob_box; //blob bounding box - inT16 blob_ht_above_baseline; - - const char *word_str; - inT16 i; - inT16 offset; - float min; //min ambig ch ht - float max; //max ambig ch ht - float short_limit; // for lower case - float tall_limit; // for upper case - - x_ht_ok_variation = - (bln_x_height / x_ht_fraction_of_caps_ht - bln_x_height) * x_ht_variation; - - if (stats.get_total () == 0) { - *ambig_lc_x_est = 0; - *ambig_uc_caps_est = 0; - } - else { - min = stats.ile (0.0); - max = stats.ile (0.99999); - if ((max - min) < x_ht_ok_variation) { - *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean (); - //close enough - } - else { - /* Try reclustering into lower and upper case chars */ - short_limit = min + (max - min) * x_ht_variation; - tall_limit = max - (max - min) * x_ht_variation; - word_str = word_res->best_choice->unichar_string().string(); - blob_it.set_to_list (word_res->outword->blob_list ()); - for (blob_it.mark_cycle_pt (), i = 0, offset = 0; - !blob_it.cycled_list (); blob_it.forward (), - offset += word_res->best_choice->unichar_lengths()[i++]) { - if (word_res->reject_map[i].accepted () && - STRING (chs_ambig_caps_x).contains (word_str[offset]) && - (!dodgy_blob (blob_it.data ()))) { - blob_box = blob_it.data ()->bounding_box (); - blob_ht_above_baseline = - blob_box.top () - bln_baseline_offset; - if (blob_ht_above_baseline <= short_limit) - short_ambigs.add (blob_ht_above_baseline, 1); - else if (blob_ht_above_baseline >= tall_limit) - tall_ambigs.add (blob_ht_above_baseline, 1); - } - } - *ambig_lc_x_est = short_ambigs.mean (); - *ambig_uc_caps_est = tall_ambigs.mean (); - /* Cop out if we havent got sensible clusters. */ - if (*ambig_uc_caps_est - *ambig_lc_x_est <= x_ht_ok_variation) - *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean (); - //close enough - } - } -} - - -/** - * dodgy_blob() - * Returns true if the blob has more than one outline, one above the other. - * These are dodgy as the top blob could be noise, causing the bounding box xht - * to be misleading - */ - -BOOL8 dodgy_blob(PBLOB *blob) { - OUTLINE_IT outline_it = blob->out_list (); - inT16 highest_bottom = -MAX_INT16; - inT16 lowest_top = MAX_INT16; - TBOX outline_box; - - if (x_ht_include_dodgy_blobs) - return FALSE; //no blob is ever dodgy - for (outline_it.mark_cycle_pt (); - !outline_it.cycled_list (); outline_it.forward ()) { - outline_box = outline_it.data ()->bounding_box (); - if (lowest_top > outline_box.top ()) - lowest_top = outline_box.top (); - if (highest_bottom < outline_box.bottom ()) - highest_bottom = outline_box.bottom (); - } - return highest_bottom >= lowest_top; -} diff --git a/ccmain/fixxht.h b/ccmain/fixxht.h deleted file mode 100644 index 18a62de554..0000000000 --- a/ccmain/fixxht.h +++ /dev/null @@ -1,92 +0,0 @@ -/********************************************************************** - * File: fixxht.h (Formerly fixxht.h) - * Description: Improve x_ht and look out for case inconsistencies - * Author: Phil Cheatle - * Created: Thu Aug 5 14:11:08 BST 1993 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef FIXXHT_H -#define FIXXHT_H - -#include "varable.h" -#include "statistc.h" -#include "pageres.h" -#include "notdll.h" - -extern double_VAR_H (x_ht_fraction_of_caps_ht, 0.7, -"Fract of cps ht est of xht"); -extern double_VAR_H (x_ht_variation, 0.35, -"Err band as fract of caps/xht dist"); -extern double_VAR_H (x_ht_sub_variation, 0.5, -"Err band as fract of caps/xht dist"); -extern BOOL_VAR_H (rej_trial_ambigs, TRUE, -"reject x-ht ambigs when under trial"); -extern BOOL_VAR_H (x_ht_conservative_ambigs, FALSE, -"Dont rely on ambigs + maxht"); -extern BOOL_VAR_H (x_ht_check_est, TRUE, "Cross check estimates"); -extern BOOL_VAR_H (x_ht_case_flip, FALSE, "Flip or reject suspect case"); -extern BOOL_VAR_H (x_ht_include_dodgy_blobs, TRUE, -"Include blobs with possible noise?"); -extern BOOL_VAR_H (x_ht_limit_flip_trials, TRUE, -"Dont do trial flips when ambigs are close to xht?"); -extern BOOL_VAR_H (rej_use_check_block_occ, TRUE, -"Analyse rejection behaviour"); -extern STRING_VAR_H (chs_non_ambig_caps_ht, -"!#$%&()/12346789?ABDEFGHIKLNQRT[]\\bdfhkl", -"Reliable ascenders"); -extern STRING_VAR_H (chs_x_ht, "acegmnopqrsuvwxyz", "X height chars"); -extern STRING_VAR_H (chs_non_ambig_x_ht, "aenqr", "reliable X height chars"); -extern STRING_VAR_H (chs_ambig_caps_x, "cCmMoO05sSuUvVwWxXzZ", -"X ht or caps ht chars"); -extern STRING_VAR_H (chs_bl_ambig_caps_x, "pPyY", -" Caps or descender ambigs"); -extern STRING_VAR_H (chs_caps_ht, -"!#$%&()/0123456789?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]\\bdfhkl{|}", -"Ascender chars"); -extern STRING_VAR_H (chs_desc, "gjpqy", "Descender chars"); -extern STRING_VAR_H (chs_non_ambig_bl, -"!#$%&01246789?ABCDEFGHIKLMNORSTUVWXYZabcdehiklmnorstuvwxz", -"Reliable baseline chars"); -extern STRING_VAR_H (chs_odd_top, "ijt", "Chars with funny ascender region"); -extern STRING_VAR_H (chs_odd_bot, "()35JQ[]\\/{}|", "Chars with funny base"); -extern STRING_VAR_H (chs_bl, -"!#$%&()/01246789?ABCDEFGHIJKLMNOPRSTUVWXYZ[]\\abcdefhiklmnorstuvwxz{}", -"Baseline chars"); -extern STRING_VAR_H (chs_non_ambig_desc, "gq", "Reliable descender chars"); -void re_estimate_x_ht( //improve for 1 word - WERD_RES *word_res, //word to do - float *trial_x_ht //new match value - ); -void check_blob_occ(char *proposed_char, - inT16 blob_ht_above_baseline, - float x_ht, - float caps_ht, - char *confirmed_char); -float estimate_from_stats(STATS &stats); -void improve_estimate(WERD_RES *word_res, - float &est_x_ht, - float &est_caps_ht, - STATS &x_ht, - STATS &caps_ht); -void reject_ambigs( //rej any accepted xht ambig chars - WERD_RES *word); - //xht ambig ht stats -void est_ambigs(WERD_RES *word_res, - STATS &stats, - float *ambig_lc_x_est, //xht est - float *ambig_uc_caps_est //caps est - ); -BOOL8 dodgy_blob(PBLOB *blob); -#endif diff --git a/ccmain/matmatch.cpp b/ccmain/matmatch.cpp deleted file mode 100644 index 694510911d..0000000000 --- a/ccmain/matmatch.cpp +++ /dev/null @@ -1,396 +0,0 @@ -/********************************************************************** - * File: matmatch.cpp (Formerly matrix_match.c) - * Description: matrix matching routines for Tessedit - * Author: Chris Newton - * Created: Wed Nov 24 15:57:41 GMT 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mfcpch.h" -#include -#include -#include -#include -#ifdef __UNIX__ -#include -#endif -#include "tessvars.h" -#include "stderr.h" -#include "img.h" -//#include "evnts.h" -//#include "showim.h" -#include "hosthplb.h" -#include "scrollview.h" -//#include "evnts.h" -#include "adaptions.h" -#include "matmatch.h" -#include "secname.h" -#include "svshowim.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define EXTERN - -EXTERN BOOL_VAR (tessedit_display_mm, FALSE, "Display matrix matches"); -EXTERN BOOL_VAR (tessedit_mm_debug, FALSE, -"Print debug information for matrix matcher"); -EXTERN INT_VAR (tessedit_mm_prototype_min_size, 3, -"Smallest number of samples in a cluster for a prototype to be used"); - -// Colours for displaying the match -#define BB_COLOUR 0 -#define BW_COLOUR 1 -#define WB_COLOUR 3 -#define UB_COLOUR 5 -#define BU_COLOUR 7 -#define UU_COLOUR 9 -#define WU_COLOUR 11 -#define UW_COLOUR 13 -#define WW_COLOUR 15 - -#define BINIM_BLACK 0 -#define BINIM_WHITE 1 - -float matrix_match( // returns match score - IMAGE *image1, - IMAGE *image2) { - ASSERT_HOST (image1->get_bpp () == 1 && image2->get_bpp () == 1); - - if (image1->get_xsize () >= image2->get_xsize ()) - return match1 (image1, image2); - else - return match1 (image2, image1); -} - - -float match1( /* returns match score */ - IMAGE *image_w, - IMAGE *image_n) { - inT32 x_offset; - inT32 y_offset; - inT32 x_size = image_w->get_xsize (); - inT32 y_size; - inT32 x_size2 = image_n->get_xsize (); - inT32 y_size2; - IMAGE match_image; - IMAGELINE imline_w; - IMAGELINE imline_n; - IMAGELINE match_imline; - inT32 x; - inT32 y; - float sum = 0.0; - - x_offset = (image_w->get_xsize () - image_n->get_xsize ()) / 2; - - ASSERT_HOST (x_offset >= 0); - match_imline.init (x_size); - - sum = 0; - - if (image_w->get_ysize () < image_n->get_ysize ()) { - y_size = image_n->get_ysize (); - y_size2 = image_w->get_ysize (); - y_offset = (y_size - y_size2) / 2; - - if (tessedit_display_mm && !tessedit_mm_use_prototypes) - tprintf ("I1 (%d, %d), I2 (%d, %d), MI (%d, %d)\n", x_size, - image_w->get_ysize (), x_size2, image_n->get_ysize (), - x_size, y_size); - - match_image.create (x_size, y_size, 4); - - for (y = 0; y < y_offset; y++) { - image_n->fast_get_line (0, y, x_size2, &imline_n); - for (x = 0; x < x_size2; x++) { - if (imline_n.pixels[x] == BINIM_BLACK) { - sum += -1; - match_imline.pixels[x] = UB_COLOUR; - } - else { - match_imline.pixels[x] = UW_COLOUR; - } - } - match_image.fast_put_line (x_offset, y, x_size2, &match_imline); - } - - for (y = y_offset + y_size2; y < y_size; y++) { - image_n->fast_get_line (0, y, x_size2, &imline_n); - for (x = 0; x < x_size2; x++) { - if (imline_n.pixels[x] == BINIM_BLACK) { - sum += -1.0; - match_imline.pixels[x] = UB_COLOUR; - } - else { - match_imline.pixels[x] = UW_COLOUR; - } - } - match_image.fast_put_line (x_offset, y, x_size2, &match_imline); - } - - for (y = y_offset; y < y_offset + y_size2; y++) { - image_w->fast_get_line (0, y - y_offset, x_size, &imline_w); - image_n->fast_get_line (0, y, x_size2, &imline_n); - for (x = 0; x < x_offset; x++) { - if (imline_w.pixels[x] == BINIM_BLACK) { - sum += -1.0; - match_imline.pixels[x] = BU_COLOUR; - } - else { - match_imline.pixels[x] = WU_COLOUR; - } - } - - for (x = x_offset + x_size2; x < x_size; x++) { - if (imline_w.pixels[x] == BINIM_BLACK) { - sum += -1.0; - match_imline.pixels[x] = BU_COLOUR; - } - else { - match_imline.pixels[x] = WU_COLOUR; - } - } - - for (x = x_offset; x < x_offset + x_size2; x++) { - if (imline_n.pixels[x - x_offset] == imline_w.pixels[x]) { - sum += 1.0; - if (imline_w.pixels[x] == BINIM_BLACK) - match_imline.pixels[x] = BB_COLOUR; - else - match_imline.pixels[x] = WW_COLOUR; - } - else { - sum += -1.0; - if (imline_w.pixels[x] == BINIM_BLACK) - match_imline.pixels[x] = BW_COLOUR; - else - match_imline.pixels[x] = WB_COLOUR; - } - } - - match_image.fast_put_line (0, y, x_size, &match_imline); - } - } - else { - y_size = image_w->get_ysize (); - y_size2 = image_n->get_ysize (); - y_offset = (y_size - y_size2) / 2; - - if (tessedit_display_mm && !tessedit_mm_use_prototypes) - tprintf ("I1 (%d, %d), I2 (%d, %d), MI (%d, %d)\n", x_size, - image_w->get_ysize (), x_size2, image_n->get_ysize (), - x_size, y_size); - - match_image.create (x_size, y_size, 4); - - for (y = 0; y < y_offset; y++) { - image_w->fast_get_line (0, y, x_size, &imline_w); - for (x = 0; x < x_size; x++) { - if (imline_w.pixels[x] == BINIM_BLACK) { - sum += -1; - match_imline.pixels[x] = BU_COLOUR; - } - else { - match_imline.pixels[x] = WU_COLOUR; - } - } - match_image.fast_put_line (0, y, x_size, &match_imline); - } - - for (y = y_offset + y_size2; y < y_size; y++) { - image_w->fast_get_line (0, y, x_size, &imline_w); - for (x = 0; x < x_size; x++) { - if (imline_w.pixels[x] == BINIM_BLACK) { - sum += -1; - match_imline.pixels[x] = BU_COLOUR; - } - else { - match_imline.pixels[x] = WU_COLOUR; - } - } - match_image.fast_put_line (0, y, x_size, &match_imline); - } - - for (y = y_offset; y < y_offset + y_size2; y++) { - image_w->fast_get_line (0, y, x_size, &imline_w); - image_n->fast_get_line (0, y - y_offset, x_size2, &imline_n); - for (x = 0; x < x_offset; x++) { - if (imline_w.pixels[x] == BINIM_BLACK) { - sum += -1.0; - match_imline.pixels[x] = BU_COLOUR; - } - else { - match_imline.pixels[x] = WU_COLOUR; - } - } - - for (x = x_offset + x_size2; x < x_size; x++) { - if (imline_w.pixels[x] == BINIM_BLACK) { - sum += -1.0; - match_imline.pixels[x] = BU_COLOUR; - } - else { - match_imline.pixels[x] = WU_COLOUR; - } - } - - for (x = x_offset; x < x_offset + x_size2; x++) { - if (imline_n.pixels[x - x_offset] == imline_w.pixels[x]) { - sum += 1.0; - if (imline_w.pixels[x] == BINIM_BLACK) - match_imline.pixels[x] = BB_COLOUR; - else - match_imline.pixels[x] = WW_COLOUR; - } - else { - sum += -1.0; - if (imline_w.pixels[x] == BINIM_BLACK) - match_imline.pixels[x] = BW_COLOUR; - else - match_imline.pixels[x] = WB_COLOUR; - } - } - - match_image.fast_put_line (0, y, x_size, &match_imline); - } - } - -#ifndef GRAPHICS_DISABLED - if (tessedit_display_mm && !tessedit_mm_use_prototypes) { - tprintf ("Match score %f\n", 1.0 - sum / (x_size * y_size)); - display_images(image_w, image_n, &match_image); - } -#endif - - if (tessedit_mm_debug) - tprintf ("Match score %f\n", 1.0 - sum / (x_size * y_size)); - - return (1.0 - sum / (x_size * y_size)); -} - - -/************************************************************************* - * display_images() - * - * Show a pair of images, plus the match image - * - *************************************************************************/ - -#ifndef GRAPHICS_DISABLED -void display_images(IMAGE *image_w, IMAGE *image_n, IMAGE *match_image) { - ScrollView* w_im_window; - ScrollView* n_im_window; - ScrollView* match_window; - inT16 i; - - w_im_window = new ScrollView("Image 1", 20, 100, - 10 * image_w->get_xsize (), 10 * image_w->get_ysize (), - image_w->get_xsize (), image_w->get_ysize ()); - - sv_show_sub_image (image_w, - 0, 0, - image_w->get_xsize (), image_w->get_ysize (), - w_im_window, 0, 0); - - w_im_window->Pen(255,0,0); - for (i = 1; i < image_w->get_xsize (); i++) { - w_im_window->Line(i, 0, i, image_w->get_ysize ()); - } - for (i = 1; i < image_w->get_ysize (); i++) { - w_im_window->Line(0, i, image_w->get_xsize (), i); - } - - n_im_window = new ScrollView ("Image 2", 240, 100, - 10 * image_n->get_xsize (), 10 * image_n->get_ysize (), - image_n->get_xsize (), image_n->get_ysize ()); - - sv_show_sub_image (image_n, - 0, 0, - image_n->get_xsize (), image_n->get_ysize (), - n_im_window, 0, 0); - - n_im_window->Pen(255,0,0); - for (i = 1; i < image_n->get_xsize (); i++) { - n_im_window->Line(i, 0, i, image_n->get_ysize ()); - } - for (i = 1; i < image_n->get_ysize (); i++) { - n_im_window->Line(0, i, image_n->get_xsize (), i); - } - - match_window = new ScrollView ("Match Result", 460, 100, - 10 * match_image->get_xsize (), 10 * match_image->get_ysize (), - match_image->get_xsize (), match_image->get_ysize ()); - - match_window->Clear(); - sv_show_sub_image (match_image, - 0, 0, - match_image->get_xsize (), match_image->get_ysize (), - match_window, 0, 0); - - match_window->Pen(255,0,0); - for (i = 1; i < match_image->get_xsize (); i++) { - match_window->Line(i, 0, i, match_image->get_ysize ()); - } - for (i = 1; i < match_image->get_ysize (); i++) { - match_window->Line(0, i, match_image->get_xsize (), i); - } - SVEvent* sve = match_window->AwaitEvent(SVET_DESTROY); - delete sve; - - delete w_im_window; - delete n_im_window; - delete match_window; -} - - -/************************************************************************* - * display_image() - * - * Show a single image - * - *************************************************************************/ - -ScrollView* display_image(IMAGE *image, - const char *title, - inT32 x, - inT32 y, - BOOL8 wait) { - ScrollView* im_window; - inT16 i; - - im_window = new ScrollView (title, x, y, - 10 * image->get_xsize (), 10 * image->get_ysize (), - image->get_xsize (), image->get_ysize ()); - - sv_show_sub_image (image, - 0, 0, - image->get_xsize (), image->get_ysize (), im_window, 0, 0); - - im_window->Pen(255,0,0); - for (i = 1; i < image->get_xsize (); i++) { - im_window->SetCursor(i, 0); - im_window->DrawTo(i, image->get_ysize()); - } - for (i = 1; i < image->get_ysize (); i++) { - im_window->SetCursor(0, i); - im_window->DrawTo(image->get_xsize(),i); - } - - if (wait) { delete im_window->AwaitEvent(SVET_CLICK); } - - return im_window; -} -#endif diff --git a/ccmain/matmatch.h b/ccmain/matmatch.h deleted file mode 100644 index a77f13a05a..0000000000 --- a/ccmain/matmatch.h +++ /dev/null @@ -1,48 +0,0 @@ -/********************************************************************** - * File: matmatch.h (Formerly matrix_match.h) - * Description: matrix matching routines for Tessedit - * Author: Chris Newton - * Created: Wed Nov 24 15:57:41 GMT 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef MATMATCH_H -#define MATMATCH_H - -#include "img.h" -#include "hosthplb.h" -#include "notdll.h" - -#define BINIM_BLACK 0 -#define BINIM_WHITE 1 -#define BAD_MATCH 9999.0 - -extern BOOL_VAR_H (tessedit_display_mm, FALSE, "Display matrix matches"); -extern BOOL_VAR_H (tessedit_mm_debug, FALSE, -"Print debug information for matrix matcher"); -extern INT_VAR_H (tessedit_mm_prototype_min_size, 3, -"Smallest number of samples in a cluster for a prototype to be used"); -float matrix_match( // returns match score - IMAGE *image1, - IMAGE *image2); -float match1( /* returns match score */ - IMAGE *image_w, - IMAGE *image_n); -void display_images(IMAGE *image_w, IMAGE *image_n, IMAGE *match_image); -ScrollView* display_image(IMAGE *image, - const char *title, - inT32 x, - inT32 y, - BOOL8 wait); -#endif diff --git a/ccmain/osdetect.cpp b/ccmain/osdetect.cpp index be7d5d6c7a..157dff5dc7 100644 --- a/ccmain/osdetect.cpp +++ b/ccmain/osdetect.cpp @@ -2,6 +2,7 @@ // File: osdetect.cpp // Description: Orientation and script detection. // Author: Samuel Charron +// Ranjith Unnikrishnan // // (C) Copyright 2008, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,24 +19,25 @@ #include "osdetect.h" -#include "strngs.h" #include "blobbox.h" #include "blread.h" -#include "tordmain.h" -#include "ratngs.h" +#include "colfind.h" +#include "imagefind.h" +#include "linefind.h" #include "oldlist.h" -#include "adaptmatch.h" -#include "tstruct.h" -#include "expandblob.h" -#include "tesseractclass.h" #include "qrsequence.h" - -extern IMAGE page_image; +#include "ratngs.h" +#include "strngs.h" +#include "tabvector.h" +#include "tesseractclass.h" +#include "textord.h" +#include "tstruct.h" const int kMinCharactersToTry = 50; const int kMaxCharactersToTry = 5 * kMinCharactersToTry; const float kSizeRatioToReject = 2.0; +const int kMinAcceptableBlobHeight = 10; const float kOrientationAcceptRatio = 1.3; const float kScriptAcceptRatio = 1.3; @@ -43,8 +45,6 @@ const float kScriptAcceptRatio = 1.3; const float kHanRatioInKorean = 0.7; const float kHanRatioInJapanese = 0.3; -const float kLatinRationInFraktur = 0.7; - const float kNonAmbiguousMargin = 1.0; // General scripts @@ -59,45 +59,140 @@ const char* ScriptDetector::korean_script_ = "Korean"; const char* ScriptDetector::japanese_script_ = "Japanese"; const char* ScriptDetector::fraktur_script_ = "Fraktur"; -CLISTIZEH(BLOBNBOX); -CLISTIZE(BLOBNBOX); +// Minimum believable resolution. +const int kMinCredibleResolution = 70; +// Default resolution used if input is not believable. +const int kDefaultResolution = 300; + +void OSResults::update_best_orientation() { + float first = orientations[0]; + float second = orientations[1]; + best_result.orientation_id = 0; + if (orientations[0] < orientations[1]) { + first = orientations[1]; + second = orientations[0]; + best_result.orientation_id = 1; + } + for (int i = 2; i < 4; ++i) { + if (orientations[i] > first) { + second = first; + first = orientations[i]; + best_result.orientation_id = i; + } else if (orientations[i] > second) { + second = orientations[i]; + } + } + // Store difference of top two orientation scores. + best_result.oconfidence = first - second; +} + +void OSResults::set_best_orientation(int orientation_id) { + best_result.orientation_id = orientation_id; + best_result.oconfidence = 0; +} + +void OSResults::update_best_script(int orientation) { + // We skip index 0 to ignore the "Common" script. + float first = scripts_na[orientation][1]; + float second = scripts_na[orientation][2]; + best_result.script_id = 1; + if (scripts_na[orientation][1] < scripts_na[orientation][2]) { + first = scripts_na[orientation][2]; + second = scripts_na[orientation][1]; + best_result.script_id = 2; + } + for (int i = 3; i < kMaxNumberOfScripts; ++i) { + if (scripts_na[orientation][i] > first) { + best_result.script_id = i; + second = first; + first = scripts_na[orientation][i]; + } else if (scripts_na[orientation][i] > second) { + second = scripts_na[orientation][i]; + } + } + best_result.sconfidence = + (first / second - 1.0) / (kScriptAcceptRatio - 1.0); +} + +// Detect and erase horizontal/vertical lines and picture regions from the +// image, so that non-text blobs are removed from consideration. +void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks, + TO_BLOCK_LIST *to_blocks) { + Pix *pix = tess->pix_binary(); + ASSERT_HOST(pix != NULL); + int vertical_x = 0; + int vertical_y = 1; + tesseract::TabVector_LIST v_lines; + tesseract::TabVector_LIST h_lines; + Boxa* boxa = NULL; + Pixa* pixa = NULL; + const int kMinCredibleResolution = 70; + int resolution = (kMinCredibleResolution > pixGetXRes(pix)) ? + kMinCredibleResolution : pixGetXRes(pix); + + tesseract::LineFinder::FindVerticalLines(resolution, pix, &vertical_x, + &vertical_y, &v_lines); + tesseract::LineFinder::FindHorizontalLines(resolution, pix, &h_lines); + tesseract::ImageFinder::FindImages(pix, &boxa, &pixa); + pixaDestroy(&pixa); + boxaDestroy(&boxa); + tess->mutable_textord()->find_components(tess->pix_binary(), + blocks, to_blocks); +} // Find connected components in the page and process a subset until finished or // a stopping criterion is met. -// Returns true if the page was successfully processed. -bool orientation_and_script_detection(STRING& filename, - OSResults* osr, - tesseract::Tesseract* tess) { +// Returns the number of blobs used in making the estimate. 0 implies failure. +int orientation_and_script_detection(STRING& filename, + OSResults* osr, + tesseract::Tesseract* tess) { STRING name = filename; //truncated name const char *lastdot; //of name - TO_BLOCK_LIST land_blocks, port_blocks; - BLOCK_LIST blocks; TBOX page_box; lastdot = strrchr (name.string (), '.'); if (lastdot != NULL) name[lastdot-name.string()] = '\0'; - if (!read_unlv_file(name, page_image.get_xsize(), page_image.get_ysize(), - &blocks)) - FullPageBlock(page_image.get_xsize(), page_image.get_ysize(), &blocks); - find_components(&blocks, &land_blocks, &port_blocks, &page_box); + + ASSERT_HOST(tess->pix_binary() != NULL) + int width = pixGetWidth(tess->pix_binary()); + int height = pixGetHeight(tess->pix_binary()); + int resolution = pixGetXRes(tess->pix_binary()); + // Zero resolution messes up the algorithms, so make sure it is credible. + if (resolution < kMinCredibleResolution) + resolution = kDefaultResolution; + + BLOCK_LIST blocks; + if (!read_unlv_file(name, width, height, &blocks)) + FullPageBlock(width, height, &blocks); + + // Try to remove non-text regions from consideration. + TO_BLOCK_LIST land_blocks, port_blocks; + remove_nontext_regions(tess, &blocks, &port_blocks); + + if (port_blocks.empty()) { + // page segmentation did not succeed, so we need to find_components first. + tess->mutable_textord()->find_components(tess->pix_binary(), + &blocks, &port_blocks); + } else { + page_box.set_left(0); + page_box.set_bottom(0); + page_box.set_right(width); + page_box.set_top(height); + // Filter_blobs sets up the TO_BLOCKs the same as find_components does. + tess->mutable_textord()->filter_blobs(page_box.topright(), + &port_blocks, true); + } + return os_detect(&port_blocks, osr, tess); } // Filter and sample the blobs. -// Returns true if the page was successfully processed, or false if the page had -// too few characters to be reliable -bool os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, - tesseract::Tesseract* tess) { +// Returns a non-zero number of blobs if the page was successfully processed, or +// zero if the page had too few characters to be reliable +int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, + tesseract::Tesseract* tess) { int blobs_total = 0; - OSResults osr_; - if (osr == NULL) - osr = &osr_; - - osr->unicharset = &tess->unicharset; - OrientationDetector o(osr); - ScriptDetector s(osr, tess); - TO_BLOCK_IT block_it; block_it.set_to_list(port_blocks); @@ -106,9 +201,11 @@ bool os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward ()) { - TO_BLOCK* block = block_it.data(); + TO_BLOCK* to_block = block_it.data(); + if (to_block->block->poly_block() && + !to_block->block->poly_block()->IsText()) continue; BLOBNBOX_IT bbox_it; - bbox_it.set_to_list(&block->blobs); + bbox_it.set_to_list(&to_block->blobs); for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list (); bbox_it.forward ()) { BLOBNBOX* bbox = bbox_it.data(); @@ -122,22 +219,36 @@ bool os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, float ratio = x_y > y_x ? x_y : y_x; // Blob is ambiguous if (ratio > kSizeRatioToReject) continue; - if (box.height() < 10) continue; + if (box.height() < kMinAcceptableBlobHeight) continue; filtered_it.add_to_end(bbox); } } - if (filtered_it.length() > 0) - filtered_it.move_to_first(); + return os_detect_blobs(&filtered_list, osr, tess); +} + +// Detect orientation and script from a list of blobs. +// Returns a non-zero number of blobs if the list was successfully processed, or +// zero if the list had too few characters to be reliable +int os_detect_blobs(BLOBNBOX_CLIST* blob_list, OSResults* osr, + tesseract::Tesseract* tess) { + OSResults osr_; + if (osr == NULL) + osr = &osr_; + osr->unicharset = &tess->unicharset; + OrientationDetector o(osr); + ScriptDetector s(osr, tess); + + BLOBNBOX_C_IT filtered_it(blob_list); int real_max = MIN(filtered_it.length(), kMaxCharactersToTry); - printf("Total blobs found = %d\n", blobs_total); - printf("Number of blobs post-filtering = %d\n", filtered_it.length()); - printf("Number of blobs to try = %d\n", real_max); + // printf("Total blobs found = %d\n", blobs_total); + // printf("Number of blobs post-filtering = %d\n", filtered_it.length()); + // printf("Number of blobs to try = %d\n", real_max); // If there are too few characters, skip this page entirely. if (real_max < kMinCharactersToTry / 2) { printf("Too few characters. Skipping this page\n"); - return false; + return 0; } BLOBNBOX** blobs = new BLOBNBOX*[filtered_it.length()]; @@ -147,18 +258,20 @@ bool os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, blobs[number_of_blobs++] = (BLOBNBOX*)filtered_it.data(); } QRSequenceGenerator sequence(number_of_blobs); + int num_blobs_evaluated = 0; for (int i = 0; i < real_max; ++i) { if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) && i > kMinCharactersToTry) { break; } + ++num_blobs_evaluated; } delete [] blobs; // Make sure the best_result is up-to-date int orientation = o.get_orientation(); - s.update_best_script(orientation); - return true; + osr->update_best_script(orientation); + return num_blobs_evaluated; } // Processes a single blob to estimate script and orientation. @@ -173,39 +286,40 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, int x_mid = (box.left() + box.right()) / 2.0f; int y_mid = (box.bottom() + box.top()) / 2.0f; - PBLOB pblob(blob, box.height()); + PBLOB pblob(blob); BLOB_CHOICE_LIST ratings[4]; // Test the 4 orientations for (int i = 0; i < 4; ++i) { // normalize the blob + float scaling = static_cast(kBlnXHeight) / box.height(); + DENORM denorm(x_mid, scaling, 0.0, box.bottom(), 0, NULL, false, NULL); pblob.move(FCOORD(-x_mid, -box.bottom())); - pblob.scale(static_cast(bln_x_height) / box.height()); - pblob.move(FCOORD(0.0f, bln_baseline_offset)); + pblob.scale(scaling); + pblob.move(FCOORD(0.0f, kBlnBaselineOffset)); { // List of choices given by the classifier - TBLOB *tessblob; //converted blob - TEXTROW tessrow; //dummy row - - tess_cn_matching.set_value(true); // turn it on - tess_bn_matching.set_value(false); - //convert blob - tessblob = make_tess_blob (&pblob, TRUE); - //make dummy row - make_tess_row(NULL, &tessrow); - //classify - tess->AdaptiveClassifier (tessblob, NULL, &tessrow, ratings + i, NULL); - free_blob(tessblob); + tess->tess_cn_matching.set_value(true); // turn it on + tess->tess_bn_matching.set_value(false); + // Convert blob + TBLOB* tessblob = make_tess_blob(&pblob); + // Classify + tess->set_denorm(&denorm); + tess->AdaptiveClassifier(tessblob, ratings + i, NULL); + delete tessblob; } // undo normalize - pblob.move(FCOORD(0.0f, -bln_baseline_offset)); - pblob.scale(1.0f / (static_cast(bln_x_height) / box.height())); + pblob.move(FCOORD(0.0f, -kBlnBaselineOffset)); + pblob.scale(1.0f / scaling); pblob.move(FCOORD(x_mid, box.bottom())); // center the blob pblob.move(FCOORD(-x_mid, -y_mid)); + // TODO(rays) Although we should now get the correct image coords with + // the DENORM, there is nothing to tell the classifier to rotate the + // image or to actually rotate the image for it. // Rotate it pblob.rotate(); @@ -233,14 +347,24 @@ OrientationDetector::OrientationDetector(OSResults* osr) { // Score the given blob and return true if it is now sure of the orientation // after adding this block. bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) { + float blob_o_score[4] = {0.0, 0.0, 0.0, 0.0}; + float total_blob_o_score = 0.0; + for (int i = 0; i < 4; ++i) { BLOB_CHOICE_IT choice_it; choice_it.set_to_list(scores + i); - if (!choice_it.empty()) { - osr_->orientations[i] += (100 + choice_it.data()->certainty()); + // The certainty score ranges between [-20,0]. This is converted here to + // [0,1], with 1 indicating best match. + blob_o_score[i] = 1 + 0.05 * choice_it.data()->certainty(); + total_blob_o_score += blob_o_score[i]; } } + // Normalize the orientation scores for the blob and use them to + // update the aggregated orientation score. + for (int i = 0; total_blob_o_score != 0 && i < 4; ++i) { + osr_->orientations[i] += log(blob_o_score[i] / total_blob_o_score); + } float first = -1; float second = -1; @@ -259,35 +383,9 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) { return first / second > kOrientationAcceptRatio; } -void OrientationDetector::update_best_orientation() { - float first = osr_->orientations[0]; - float second = osr_->orientations[1]; - - if (first < second) { - second = first; - first = osr_->orientations[1]; - } - - osr_->best_result.orientation = 0; - osr_->best_result.oconfidence = 0; - - for (int i = 0; i < 4; ++i) { - if (osr_->orientations[i] > first) { - second = first; - first = osr_->orientations[i]; - osr_->best_result.orientation = i; - } else if (osr_->orientations[i] > second) { - second = osr_->orientations[i]; - } - } - - osr_->best_result.oconfidence = - (first / second - 1.0) / (kOrientationAcceptRatio - 1.0); -} - int OrientationDetector::get_orientation() { - update_best_orientation(); - return osr_->best_result.orientation; + osr_->update_best_orientation(); + return osr_->best_result.orientation_id; } @@ -347,7 +445,7 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { prev_class_id = choice->unichar_id(); prev_config = choice->config(); } else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) { - script_count++; + ++script_count; next_best_score = -choice->certainty(); next_best_script_id = choice->script_id(); next_best_unichar = tess_->unicharset.id_to_unichar(choice->unichar_id()); @@ -365,7 +463,7 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { // Character is non ambiguous if (script_count == 1) { // Update the score of the winning script - osr_->scripts_na[i][prev_id] += 1; + osr_->scripts_na[i][prev_id] += 1.0; // Workaround for Fraktur if (prev_id == latin_id_) { @@ -379,19 +477,19 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { // fi.is_serif(), fi.is_fraktur(), // prev_unichar); if (fi.is_fraktur()) { - osr_->scripts_na[i][prev_id] -= 1; - osr_->scripts_na[i][fraktur_id_] += 1; + osr_->scripts_na[i][prev_id] -= 1.0; + osr_->scripts_na[i][fraktur_id_] += 1.0; } } } // Update Japanese / Korean pseudo-scripts if (prev_id == katakana_id_) - osr_->scripts_na[i][japanese_id_] += 1; + osr_->scripts_na[i][japanese_id_] += 1.0; if (prev_id == hiragana_id_) - osr_->scripts_na[i][japanese_id_] += 1; + osr_->scripts_na[i][japanese_id_] += 1.0; if (prev_id == hangul_id_) - osr_->scripts_na[i][korean_id_] += 1; + osr_->scripts_na[i][korean_id_] += 1.0; if (prev_id == han_id_) osr_->scripts_na[i][korean_id_] += kHanRatioInKorean; if (prev_id == han_id_) @@ -401,27 +499,24 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { } bool ScriptDetector::must_stop(int orientation) { - update_best_script(orientation); + osr_->update_best_script(orientation); return osr_->best_result.sconfidence > 1; } - -void ScriptDetector::update_best_script(int orientation) { - float first = -1; - float second = -1; - - // i = 1 -> ignore Common scripts - for (int i = 1; i < kMaxNumberOfScripts; ++i) { - if (osr_->scripts_na[orientation][i] > first) { - osr_->best_result.script = - tess_->unicharset.get_script_from_script_id(i); - second = first; - first = osr_->scripts_na[orientation][i]; - } else if (osr_->scripts_na[orientation][i] > second) { - second = osr_->scripts_na[orientation][i]; - } +// Helper method to convert an orientation index to its value in degrees. +// The value represents the amount of clockwise rotation in degrees that must be +// applied for the text to be upright (readable). +const int OrientationIdToValue(const int& id) { + switch (id) { + case 0: + return 0; + case 1: + return 270; + case 2: + return 180; + case 3: + return 90; + default: + return -1; } - - osr_->best_result.sconfidence = - (first / second - 1.0) / (kOrientationAcceptRatio - 1.0); } diff --git a/ccmain/osdetect.h b/ccmain/osdetect.h index 364ac00cab..f649b8a6a1 100644 --- a/ccmain/osdetect.h +++ b/ccmain/osdetect.h @@ -2,6 +2,7 @@ // File: osdetect.h // Description: Orientation and script detection. // Author: Samuel Charron +// Ranjith Unnikrishnan // // (C) Copyright 2008, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,6 +26,7 @@ class TO_BLOCK_LIST; class BLOBNBOX; class BLOB_CHOICE_LIST; +class BLOBNBOX_CLIST; namespace tesseract { class Tesseract; @@ -34,8 +36,10 @@ class Tesseract; const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; struct OSBestResult { - int orientation; - const char* script; + OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0), + oconfidence(0.0) {} + int orientation_id; + int script_id; float sconfidence; float oconfidence; }; @@ -48,7 +52,16 @@ struct OSResults { orientations[i] = 0; } } + void update_best_orientation(); + void set_best_orientation(int orientation_id); + void update_best_script(int orientation_id); + + // Array holding scores for each orientation id [0,3]. + // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the + // page respectively, where the values refer to the amount of clockwise + // rotation to be applied to the page for the text to be upright and readable. float orientations[4]; + // Script confidence scores for each of 4 possible orientations. float scripts_na[4][kMaxNumberOfScripts]; UNICHARSET* unicharset; @@ -59,7 +72,6 @@ class OrientationDetector { public: OrientationDetector(OSResults*); bool detect_blob(BLOB_CHOICE_LIST* scores); - void update_best_orientation(); int get_orientation(); private: OSResults* osr_; @@ -69,7 +81,6 @@ class ScriptDetector { public: ScriptDetector(OSResults*, tesseract::Tesseract* tess); void detect_blob(BLOB_CHOICE_LIST* scores); - void update_best_script(int); void get_script() ; bool must_stop(int orientation); private: @@ -88,15 +99,25 @@ class ScriptDetector { tesseract::Tesseract* tess_; }; -bool orientation_and_script_detection(STRING& filename, - OSResults*, - tesseract::Tesseract*); +int orientation_and_script_detection(STRING& filename, + OSResults*, + tesseract::Tesseract*); -bool os_detect(TO_BLOCK_LIST* port_blocks, - OSResults* osr, - tesseract::Tesseract* tess); +int os_detect(TO_BLOCK_LIST* port_blocks, + OSResults* osr, + tesseract::Tesseract* tess); + +int os_detect_blobs(BLOBNBOX_CLIST* blob_list, + OSResults* osr, + tesseract::Tesseract* tess); bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, ScriptDetector* s, OSResults*, tesseract::Tesseract* tess); + +// Helper method to convert an orientation index to its value in degrees. +// The value represents the amount of clockwise rotation in degrees that must be +// applied for the text to be upright (readable). +const int OrientationIdToValue(const int& id); + #endif // TESSERACT_CCMAIN_OSDETECT_H__ diff --git a/ccmain/output.cpp b/ccmain/output.cpp index 2cba0445ff..7a361aa5bf 100644 --- a/ccmain/output.cpp +++ b/ccmain/output.cpp @@ -22,28 +22,25 @@ #endif #include "mfcpch.h" -#include "ocrshell.h" -#include -#include +#include +#include #ifdef __UNIX__ #include #include -#include +#include #endif -#include "mainblk.h" -#include "tfacep.h" -#include "tessvars.h" -#include "control.h" -#include "secname.h" -#include "reject.h" -#include "docqual.h" -#include "output.h" +#include "helpers.h" +#include "tfacep.h" +#include "tessvars.h" +#include "control.h" +#include "secname.h" +#include "reject.h" +#include "docqual.h" +#include "output.h" #include "bestfirst.h" #include "globals.h" #include "tesseractclass.h" -#define EXTERN - #define EPAPER_EXT ".ep" #define PAGE_YSIZE 3508 #define CTRL_INSET '\024' //dc4=text inset @@ -54,44 +51,6 @@ #define CTRL_NEWLINE '\012' //newline #define CTRL_HARDLINE '\015' //cr -EXTERN BOOL_EVAR (tessedit_write_block_separators, FALSE, -"Write block separators in output"); -EXTERN BOOL_VAR (tessedit_write_raw_output, FALSE, -"Write raw stuff to name.raw"); -EXTERN BOOL_EVAR (tessedit_write_output, FALSE, "Write text to name.txt"); -EXTERN BOOL_EVAR (tessedit_write_ratings, FALSE, -"Return ratings in IPEOCRAPI data"); -EXTERN BOOL_EVAR (tessedit_write_txt_map, FALSE, -"Write .txt to .etx map file"); -EXTERN BOOL_EVAR (tessedit_write_rep_codes, FALSE, -"Write repetition char code"); -EXTERN BOOL_EVAR (tessedit_write_unlv, FALSE, "Write .unlv output file"); -EXTERN STRING_EVAR (unrecognised_char, "|", -"Output char for unidentified blobs"); -EXTERN INT_EVAR (suspect_level, 99, "Suspect marker level"); -EXTERN INT_VAR (suspect_space_level, 100, -"Min suspect level for rejecting spaces"); -EXTERN INT_VAR (suspect_short_words, 2, -"Dont Suspect dict wds longer than this"); -EXTERN BOOL_VAR (suspect_constrain_1Il, FALSE, -"UNLV keep 1Il chars rejected"); -EXTERN double_VAR (suspect_rating_per_ch, 999.9, -"Dont touch bad rating limit"); -EXTERN double_VAR (suspect_accept_rating, -999.9, "Accept good rating limit"); - -EXTERN BOOL_EVAR (tessedit_minimal_rejection, FALSE, -"Only reject tess failures"); -EXTERN BOOL_VAR (tessedit_zero_rejection, FALSE, "Dont reject ANYTHING"); -EXTERN BOOL_VAR (tessedit_word_for_word, FALSE, -"Make output have exactly one word per WERD"); -EXTERN BOOL_VAR (tessedit_zero_kelvin_rejection, FALSE, -"Dont reject ANYTHING AT ALL"); -EXTERN BOOL_VAR (tessedit_consistent_reps, TRUE, -"Force all rep chars the same"); - -FILE *txt_mapfile = NULL; //reject map -FILE *unlv_file = NULL; //reject map - /********************************************************************** * pixels_to_pts * @@ -112,17 +71,13 @@ inT32 pixels_to_pts( //convert coords namespace tesseract { void Tesseract::output_pass( //Tess output pass //send to api PAGE_RES_IT &page_res_it, - BOOL8 write_to_shm, - TBOX *target_word_box) { + const TBOX *target_word_box) { BLOCK_RES *block_of_last_word; inT16 block_id; BOOL8 force_eol; //During output BLOCK *nextblock; //block of next word WERD *nextword; //next word - if (tessedit_write_txt_map) - txt_mapfile = open_outfile (".map"); - page_res_it.restart_page (); block_of_last_word = NULL; while (page_res_it.word () != NULL) { @@ -144,9 +99,6 @@ void Tesseract::output_pass( //Tess output pass //send to api block_of_last_word != page_res_it.block ()) { block_of_last_word = page_res_it.block (); block_id = block_of_last_word->block->index(); - if (!wordrec_no_block) - fprintf (textfile, "|^~tr%d\n", block_id); - fprintf (txt_mapfile, "|^~tr%d\n", block_id); } force_eol = (tessedit_write_block_separators && @@ -162,23 +114,11 @@ void Tesseract::output_pass( //Tess output pass //send to api else nextblock = NULL; //regardless of tilde crunching - write_results (page_res_it, determine_newline_type (page_res_it.word ()->word, page_res_it.block ()->block, nextword, nextblock), force_eol, - write_to_shm); - page_res_it.forward (); - } - if (write_to_shm) - ocr_send_text(FALSE); - if (tessedit_write_block_separators) { - if (!wordrec_no_block) - fprintf (textfile, "|^~tr\n"); - fprintf (txt_mapfile, "|^~tr\n"); - } - if (tessedit_write_txt_map) { - fprintf (txt_mapfile, "\n"); //because txt gets one - #ifdef __UNIX__ - fsync (fileno (txt_mapfile)); - #endif - fclose(txt_mapfile); + write_results(page_res_it, + determine_newline_type(page_res_it.word()->word, + page_res_it.block()->block, + nextword, nextblock), force_eol); + page_res_it.forward(); } } @@ -195,18 +135,10 @@ void Tesseract::output_pass( //Tess output pass //send to api * inset list - a list of bounding boxes of reject insets - indexed by the * reject strings in the epchoice text. *************************************************************************/ - -void Tesseract::write_results( //output a word - //full info - PAGE_RES_IT &page_res_it, - char newline_type, //type of newline - //override tilde crunch? - BOOL8 force_eol, - BOOL8 write_to_shm //send to api - ) { - //word to do - WERD_RES *word = page_res_it.word (); -// WERD_CHOICE *ep_choice; //ep format +void Tesseract::write_results(PAGE_RES_IT &page_res_it, + char newline_type, // type of newline + BOOL8 force_eol) { // override tilde crunch? + WERD_RES *word = page_res_it.word(); STRING repetition_code; const STRING *wordstr; STRING wordstr_lengths; @@ -217,49 +149,34 @@ void Tesseract::write_results( //output a word char txt_chs[32]; //Only for unlv_tilde_crunch char map_chs[32]; //Only for unlv_tilde_crunch int txt_index = 0; - static BOOL8 tilde_crunch_written = FALSE; - static BOOL8 last_char_was_newline = TRUE; - static BOOL8 last_char_was_tilde = FALSE; - static BOOL8 empty_block = TRUE; BOOL8 need_reject = FALSE; PBLOB_IT blob_it; //blobs UNICHAR_ID space = unicharset.unichar_to_id(" "); - - /* if (word->best_choice->string().length() == 0) - { - tprintf("No output: to output\n"); - } - else if (word->best_choice->string()[0]==' ') - { - tprintf("spaceword to output\n"); - } - else if (word->best_choice->string()[0]=='\0') - { - tprintf("null to output\n"); - }*/ - if (word->unlv_crunch_mode != CR_NONE - && !tessedit_zero_kelvin_rejection && !tessedit_word_for_word) { + if ((word->unlv_crunch_mode != CR_NONE || + word->best_choice->length() == 0) && + !tessedit_zero_kelvin_rejection && !tessedit_word_for_word) { if ((word->unlv_crunch_mode != CR_DELETE) && - (!tilde_crunch_written || - ((word->unlv_crunch_mode == CR_KEEP_SPACE) && - (word->word->space () > 0) && - !word->word->flag (W_FUZZY_NON) && - !word->word->flag (W_FUZZY_SP)))) { + (!stats_.tilde_crunch_written || + ((word->unlv_crunch_mode == CR_KEEP_SPACE) && + (word->word->space () > 0) && + !word->word->flag (W_FUZZY_NON) && + !word->word->flag (W_FUZZY_SP)))) { if (!word->word->flag (W_BOL) && - (word->word->space () > 0) && - !word->word->flag (W_FUZZY_NON) && - !word->word->flag (W_FUZZY_SP)) { - /* Write a space to separate from preceeding good text */ + (word->word->space () > 0) && + !word->word->flag (W_FUZZY_NON) && + !word->word->flag (W_FUZZY_SP)) { + // Write a space to separate from preceeding good text. txt_chs[txt_index] = ' '; map_chs[txt_index++] = '1'; ep_chars[ep_chars_index++] = ' '; - last_char_was_tilde = FALSE; + stats_.last_char_was_tilde = false; } need_reject = TRUE; } - if ((need_reject && !last_char_was_tilde) || (force_eol && empty_block)) { + if ((need_reject && !stats_.last_char_was_tilde) || + (force_eol && stats_.write_results_empty_block)) { /* Write a reject char - mark as rejected unless zero_rejection mode */ - last_char_was_tilde = TRUE; + stats_.last_char_was_tilde = TRUE; txt_chs[txt_index] = unrecognised; if (tessedit_zero_rejection || (suspect_level == 0)) { map_chs[txt_index++] = '1'; @@ -271,8 +188,7 @@ void Tesseract::write_results( //output a word The ep_choice string is a faked reject to allow newdiff to sync the .etx with the .txt and .map files. */ - ep_chars[ep_chars_index++] = CTRL_INSET; - //escape code + ep_chars[ep_chars_index++] = CTRL_INSET; // escape code //dummy reject ep_chars[ep_chars_index++] = 1; //dummy reject @@ -284,12 +200,12 @@ void Tesseract::write_results( //output a word //dummy reject ep_chars[ep_chars_index++] = 1; } - tilde_crunch_written = TRUE; - last_char_was_newline = FALSE; - empty_block = FALSE; + stats_.tilde_crunch_written = true; + stats_.last_char_was_newline = false; + stats_.write_results_empty_block = false; } - if ((word->word->flag (W_EOL) && !last_char_was_newline) || force_eol) { + if ((word->word->flag (W_EOL) && !stats_.last_char_was_newline) || force_eol) { /* Add a new line output */ txt_chs[txt_index] = '\n'; map_chs[txt_index++] = '\n'; @@ -297,70 +213,63 @@ void Tesseract::write_results( //output a word ep_chars[ep_chars_index++] = newline_type; //Cos of the real newline - tilde_crunch_written = FALSE; - last_char_was_newline = TRUE; - last_char_was_tilde = FALSE; + stats_.tilde_crunch_written = false; + stats_.last_char_was_newline = true; + stats_.last_char_was_tilde = false; } txt_chs[txt_index] = '\0'; map_chs[txt_index] = '\0'; - //xiaofan - if (tessedit_write_output && !wordrec_no_block) - fprintf (textfile, "%s", txt_chs); - - if (tessedit_write_txt_map) - fprintf (txt_mapfile, "%s", map_chs); - - //terminate string - ep_chars[ep_chars_index] = '\0'; + ep_chars[ep_chars_index] = '\0'; // terminate string word->ep_choice = new WERD_CHOICE(ep_chars, unicharset); if (force_eol) - empty_block = TRUE; + stats_.write_results_empty_block = true; return; } /* NORMAL PROCESSING of non tilde crunched words */ - tilde_crunch_written = FALSE; + stats_.tilde_crunch_written = false; if (newline_type) - last_char_was_newline = TRUE; + stats_.last_char_was_newline = true; else - last_char_was_newline = FALSE; - empty_block = force_eol; //About to write a real word + stats_.last_char_was_newline = false; + stats_.write_results_empty_block = force_eol; // about to write a real word if (unlv_tilde_crunching && - last_char_was_tilde && + stats_.last_char_was_tilde && (word->word->space() == 0) && !(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) && (word->best_choice->unichar_id(0) == space)) { /* Prevent adjacent tilde across words - we know that adjacent tildes within words have been removed */ word->best_choice->remove_unichar_id(0); + if (word->best_choice->blob_choices() != NULL) { + BLOB_CHOICE_LIST_C_IT blob_choices_it(word->best_choice->blob_choices()); + if (!blob_choices_it.empty()) delete blob_choices_it.extract(); + } word->best_choice->populate_unichars(getDict().getUnicharset()); word->reject_map.remove_pos (0); - blob_it = word->outword->blob_list (); - delete blob_it.extract (); //get rid of reject blob + delete word->box_word; + word->box_word = new BoxWord; } if (newline_type || (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes)) - last_char_was_tilde = FALSE; + stats_.last_char_was_tilde = false; else { if (word->reject_map.length () > 0) { if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space) - last_char_was_tilde = TRUE; + stats_.last_char_was_tilde = true; else - last_char_was_tilde = FALSE; + stats_.last_char_was_tilde = false; } else if (word->word->space () > 0) - last_char_was_tilde = FALSE; + stats_.last_char_was_tilde = false; /* else it is unchanged as there are no output chars */ } ASSERT_HOST (word->best_choice->length() == word->reject_map.length()); - if (word->word->flag (W_REP_CHAR) && tessedit_consistent_reps) - ensure_rep_chars_are_consistent(word); - set_unlv_suspects(word); check_debug_pt (word, 120); if (tessedit_rejection_debug) { @@ -368,21 +277,13 @@ void Tesseract::write_results( //output a word word->best_choice->debug_string(unicharset).string(), dict_word(*(word->best_choice))); } - -#if 0 - if (tessedit_write_unlv) { - write_unlv_text(word); - } -#endif - if (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes) { repetition_code = "|^~R"; wordstr_lengths = "\001\001\001\001"; repetition_code += unicharset.id_to_unichar(get_rep_char (word)); wordstr_lengths += strlen(unicharset.id_to_unichar(get_rep_char (word))); wordstr = &repetition_code; - } - else { + } else { if (tessedit_zero_rejection) { /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */ for (i = 0; i < word->best_choice->length(); ++i) { @@ -399,209 +300,9 @@ void Tesseract::write_results( //output a word } } } - - if (write_to_shm) - write_shm_text (word, page_res_it.block ()->block, - page_res_it.row (), *wordstr, wordstr_lengths); - -#if 0 - if (tessedit_write_output) - write_cooked_text (word->word, *wordstr, TRUE, FALSE, textfile); - - if (tessedit_write_raw_output) - write_cooked_text (word->word, word->raw_choice->string (), - TRUE, FALSE, rawfile); - - if (tessedit_write_txt_map) - write_map(txt_mapfile, word); - - ep_choice = make_epaper_choice (word, newline_type); - word->ep_choice = ep_choice; -#endif - - character_count += word->best_choice->length(); - word_count++; } } // namespace tesseract -/********************************************************************** - * make_epaper_choice - * - * Construct the epaper text string for a word, using the reject map to - * determine whether each blob should be rejected. - **********************************************************************/ - -#if 0 -WERD_CHOICE *make_epaper_choice( //convert one word - WERD_RES *word, //word to do - char newline_type //type of newline - ) { - inT16 index = 0; //to string - inT16 blobindex; //to word - inT16 prevright = 0; //right of previous blob - inT16 nextleft; //left of next blob - PBLOB *blob; - TBOX inset_box; //bounding box - PBLOB_IT blob_it; //blob iterator - char word_string[MAX_PATH]; //converted string - BOOL8 force_total_reject; - char unrecognised = STRING (unrecognised_char)[0]; - - blob_it.set_to_list (word->outword->blob_list ()); - - ASSERT_HOST (word->reject_map.length () == - word->best_choice->string ().length ()); - /* - tprintf( "\"%s\" -> length: %d; blobcount: %d (%d)\n", - word->best_choice->string().string(), - word->best_choice->string().length(), - blob_it.length(), - blob_count( word->outword ) ); - */ - - if (word->best_choice->string ().length () == 0) - force_total_reject = TRUE; - else { - force_total_reject = FALSE; - ASSERT_HOST (blob_it.length () == - word->best_choice->string ().length ()); - } - if (!blob_it.empty ()) { - for (index = 0; index < word->word->space (); index++) - word_string[index] = ' '; //leading blanks - } - /* Why does this generate leading blanks regardless of whether the - word_choice string is empty, when write_cooked_text ony generates leading - blanks when the string is NOT empty???. */ - - if (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes) { - strcpy (word_string + index, "|^~R"); - index += 4; - strcpy(word_string + index, unicharset.id_to_unichar(get_rep_char (word))); - index += strlen(unicharset.id_to_unichar(get_rep_char (word))); - } - else { - if (!blob_it.empty ()) - prevright = blob_it.data ()->bounding_box ().left (); - //actually first left - for (blobindex = 0, blob_it.mark_cycle_pt (); - !blob_it.cycled_list (); blobindex++, blob_it.forward ()) { - blob = blob_it.data (); - if (word->reject_map[blobindex].accepted ()) { - if (word->best_choice->string ()[blobindex] == ' ') - //but not rejected!! - word_string[index++] = unrecognised; - else - word_string[index++] = - word->best_choice->string ()[blobindex]; - } - else { // start reject - inset_box = blob->bounding_box (); - /* Extend reject box to include rejected neighbours */ - while (!blob_it.at_last () && - (force_total_reject || - (word->reject_map[blobindex + 1].rejected ()))) { - blobindex++; - blob = blob_it.forward (); - //get total box - inset_box += blob->bounding_box (); - } - if (blob_it.at_last ()) - nextleft = inset_box.right (); - else - nextleft = blob_it.data_relative (1)->bounding_box ().left (); - - // tprintf("Making reject from (%d,%d)->(%d,%d)\n", - // inset_box.left(),inset_box.bottom(), - // inset_box.right(),inset_box.top()); - - index += make_reject (&inset_box, prevright, nextleft, - &word->denorm, &word_string[index]); - } - prevright = blob->bounding_box ().right (); - } - } - if (newline_type) - //end line - word_string[index++] = newline_type; - word_string[index] = '\0'; //terminate string - if (strlen (word_string) != index) { - tprintf ("ASSERT ABOUT TO FAIL: %s, index %d len %d\n", - word_string, index, strlen (word_string)); - } - //don't pass any zeros - ASSERT_HOST (strlen (word_string) == index); - return new WERD_CHOICE (word_string, 0, 0, NO_PERM); -} -#endif - -/********************************************************************** - * make_reject - * - * Add the escape code to the string for the reject. - **********************************************************************/ - -inT16 -make_reject ( //make reject code -TBOX * inset_box, //bounding box -inT16 prevright, //previous char -inT16 nextleft, //next char -DENORM * denorm, //de-normalizer -char word_string[] //output string -) { - inT16 index; //to string - inT16 xpos; //start of inset - inT16 ypos; - inT16 width; //size of inset - inT16 height; - inT16 left_offset; //shift form prev char - inT16 right_offset; //shift to next char - inT16 baseline_offset; //shift from baseline - inT16 inset_index = 0; //number of inset - inT16 min_chars; //min width estimate - inT16 max_chars; //max width estimate - float x_centre; //centre of box - - index = 0; - x_centre = (inset_box->left () + inset_box->right ()) / 2.0; - left_offset = - (inT16) (denorm->x (inset_box->left ()) - denorm->x (prevright)); - right_offset = - (inT16) (denorm->x (nextleft) - denorm->x (inset_box->right ())); - xpos = (inT16) floor (denorm->x (inset_box->left ())); - width = (inT16) ceil (denorm->x (inset_box->right ())) - xpos; - ypos = (inT16) floor (denorm->y (inset_box->bottom (), x_centre)); - height = (inT16) ceil (denorm->y (inset_box->top (), x_centre)) - ypos; - baseline_offset = ypos - (inT16) denorm->y (bln_baseline_offset, x_centre); - //escape code - word_string[index++] = CTRL_INSET; - min_chars = (inT16) ceil (0.27 * width / denorm->row ()->x_height ()); - max_chars = (inT16) floor (1.8 * width / denorm->row ()->x_height ()); - /* - Ensure min_chars and max_chars are in the range 0..254. This ensures that - we can add 1 to them to avoid putting \0 in a string, and still not exceed - the max value in a byte. - */ - if (min_chars < 0) - min_chars = 0; - if (min_chars > 254) - min_chars = 254; - if (max_chars < min_chars) - max_chars = min_chars; - if (max_chars > 254) - max_chars = 254; - //min chars - word_string[index++] = min_chars + 1; - //max chars - word_string[index++] = max_chars + 1; - word_string[index++] = 2; //type? - //store index - word_string[index++] = inset_index / 255 + 1; - word_string[index++] = inset_index % 255 + 1; - return index; //size of string -} - - /********************************************************************** * determine_newline_type * @@ -641,305 +342,6 @@ char determine_newline_type( //test line ends return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE; } -/********************************************************************** - * write_shm_text - * - * Write the cooked text to the shared memory for the api. - **********************************************************************/ - -void write_shm_text( //write output - WERD_RES *word, //word to do - BLOCK *block, //block it is from - ROW_RES *row, //row it is from - const STRING &text, //text to write - const STRING &text_lengths - ) { - inT32 index; //char counter - inT32 index2; //char counter - inT32 length; //chars in word - inT32 ptsize; //font size - inT8 blanks; //blanks in word - uinT8 enhancement; //bold etc - uinT8 font; //font index - char unrecognised = STRING (unrecognised_char)[0]; - PBLOB *blob; - TBOX blob_box; //bounding box - PBLOB_IT blob_it; //blob iterator - WERD copy_outword; // copy to denorm - uinT32 rating; //of char - BOOL8 lineend; //end of line - int offset; - int offset2; - - //point size - ptsize = pixels_to_pts ((inT32) (row->row->x_height () + row->row->ascenders () - row->row->descenders ()), 300); - if (word->word->flag (W_BOL) && ocr_char_space () < 128 - && ocr_send_text (TRUE) != OKAY) - return; //release failed - copy_outword = *(word->outword); - copy_outword.baseline_denormalise (&word->denorm); - blob_it.set_to_list (copy_outword.blob_list ()); - length = text_lengths.length (); - - if (length > 0) { - blanks = word->word->space (); - if (blanks == 0 && tessedit_word_for_word && !word->word->flag (W_BOL)) - blanks = 1; - for (index = 0, offset = 0; index < length; - offset += text_lengths[index++], blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - - enhancement = 0; - if (word->italic > 0 || (word->italic == 0 && row->italic > 0)) - enhancement |= EUC_ITALIC; - if (word->bold > 0 || (word->bold == 0 && row->bold > 0)) - enhancement |= EUC_BOLD; - if (tessedit_write_ratings) - rating = (uinT32) (-word->best_choice->certainty () / 0.035); - else if (tessedit_zero_rejection) - rating = text[offset] == ' ' ? 100 : 0; - else - rating = word->reject_map[index].accepted ()? 0 : 100; - if (rating > 255) - rating = 255; - if (word->font1_count > 2) - font = word->font1; - else if (row->font1_count > 8) - font = row->font1; - else - //font index - font = word->word->flag (W_DONT_CHOP) ? 0 : 1; - - lineend = word->word->flag (W_EOL) && index == length - 1; - if (word->word->flag (W_EOL) && tessedit_zero_rejection - && index < length - 1 && text[index + text_lengths[index]] == ' ') { - for (index2 = index + 1, offset2 = offset + text_lengths[index]; - index2 < length && text[offset2] == ' '; - offset2 += text_lengths[index2++]); - if (index2 == length) - lineend = TRUE; - } - - if (!tessedit_zero_rejection || text[offset] != ' ' - || tessedit_word_for_word) { - //confidence - if (text[offset] == ' ') { - ocr_append_char (unrecognised, - blob_box.left (), blob_box.right (), - page_image.get_ysize () - 1 - blob_box.top (), - page_image.get_ysize () - 1 - blob_box.bottom (), - font, (uinT8) rating, - ptsize, //point size - blanks, enhancement, //enhancement - OCR_CDIR_LEFT_RIGHT, - OCR_LDIR_DOWN_RIGHT, - lineend ? OCR_NL_NEWLINE : OCR_NL_NONE); - } else { - for (int suboffset = 0; suboffset < text_lengths[index]; ++suboffset) - ocr_append_char (static_cast(text[offset+suboffset]), - blob_box.left (), blob_box.right (), - page_image.get_ysize () - 1 - blob_box.top (), - page_image.get_ysize () - 1 - blob_box.bottom (), - font, (uinT8) rating, - ptsize, //point size - blanks, enhancement, //enhancement - OCR_CDIR_LEFT_RIGHT, - OCR_LDIR_DOWN_RIGHT, - lineend ? OCR_NL_NEWLINE : OCR_NL_NONE); - } - blanks = 0; - } - - } - } - else if (tessedit_word_for_word) { - blanks = word->word->space (); - if (blanks == 0 && !word->word->flag (W_BOL)) - blanks = 1; - blob_box = word->word->bounding_box (); - - enhancement = 0; - if (word->italic > 0) - enhancement |= EUC_ITALIC; - if (word->bold > 0) - enhancement |= EUC_BOLD; - rating = 100; - if (word->font1_count > 2) - font = word->font1; - else if (row->font1_count > 8) - font = row->font1; - else - //font index - font = word->word->flag (W_DONT_CHOP) ? 0 : 1; - - lineend = word->word->flag (W_EOL); - - //font index - ocr_append_char (unrecognised, - blob_box.left (), blob_box.right (), - page_image.get_ysize () - 1 - blob_box.top (), - page_image.get_ysize () - 1 - blob_box.bottom (), - font, - rating, //confidence - ptsize, //point size - blanks, enhancement, //enhancement - OCR_CDIR_LEFT_RIGHT, - OCR_LDIR_DOWN_RIGHT, - lineend ? OCR_NL_NEWLINE : OCR_NL_NONE); - } -} - - -/********************************************************************** - * write_map - * - * Write a map file of 0's and 1'a which associates characters from the .txt - * file with those in the .etx file. 0 = .txt char was deleted. 1 = .txt char - * is kept. Note that there may be reject regions in the .etx file WITHOUT - * .txt chars being rejected. The map file should be the same length, and - * the same number of lines as the .txt file - * - * The paramaterised input is because I thought I might be able to generate - * multiple map files in a single run. However, it didn't work because - * newdiff needs etx files! - **********************************************************************/ - -#if 0 -void write_map( //output a map file - FILE *mapfile, //mapfile to write to - WERD_RES *word) { - inT16 index; - int status; - STRING mapstr = ""; - - if (word->best_choice->string ().length () > 0) { - for (index = 0; index < word->word->space (); index++) { - if (word->reject_spaces && - (suspect_level >= suspect_space_level) && - !tessedit_minimal_rejection && !tessedit_zero_rejection) - /* Write rejected spaces to .map file ONLY. Newdiff converts these back to - accepted spaces AFTER generating basic space stats but BEFORE using .etx */ - status = fprintf (mapfile, "0"); - else - status = fprintf (mapfile, "1"); - if (status < 0) - WRITEFAILED.error ("write_map", EXIT, "Space Errno: %d", errno); - } - - if ((word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes)) { - for (index = 0; index < 5; index++) - mapstr += '1'; - } - else { - ASSERT_HOST (word->reject_map.length () == - word->best_choice->string ().length ()); - - for (index = 0; index < word->reject_map.length (); index++) { - if (word->reject_map[index].accepted ()) - mapstr += '1'; - else - mapstr += '0'; - } - } - status = fprintf (mapfile, "%s", mapstr.string ()); - if (status < 0) - WRITEFAILED.error ("write_map", EXIT, "Map str Errno: %d", errno); - } - if (word->word->flag (W_EOL)) { - status = fprintf (mapfile, "\n"); - if (status < 0) - WRITEFAILED.error ("write_map", EXIT, "Newline Errno: %d", errno); - } - status = fflush (mapfile); - if (status != 0) - WRITEFAILED.error ("write_map", EXIT, "fflush Errno: %d", errno); -} -#endif - - -/************************************************************************* - * open_file() - *************************************************************************/ - -namespace tesseract { -FILE *Tesseract::open_outfile( //open .map & .unlv file - const char *extension) { - STRING file_name; - FILE *outfile; - - file_name = imagebasename + extension; - if (!(outfile = fopen (file_name.string (), "w"))) { - CANTOPENFILE.error ("open_outfile", EXIT, "%s %d", - file_name.string (), errno); - } - return outfile; -} -} // namespace tesseract - - -#if 0 -void write_unlv_text(WERD_RES *word) { - const char *wordstr; - - char buff[512]; //string to output - int i = 0; - int j = 0; - char unrecognised = STRING (unrecognised_char)[0]; - int status; - char space_str[3]; - - wordstr = word->best_choice->string ().string (); - - /* DONT need to do anything special for repeated char words - at this stage - the repetition char has been identified and any other chars have been - rejected. - */ - - for (; wordstr[i] != '\0'; i++) { - if ((wordstr[i] == ' ') || - (wordstr[i] == '~') || (wordstr[i] == '^') || (wordstr[i] == '|')) - buff[j++] = unrecognised; - else { - if (word->reject_map[i].rejected ()) - buff[j++] = '^'; //Add suspect marker - buff[j++] = wordstr[i]; - } - } - buff[j] = '\0'; - - if (strlen (wordstr) > 0) { - if (word->reject_spaces && - (suspect_level >= suspect_space_level) && - !tessedit_minimal_rejection && !tessedit_zero_rejection) - strcpy (space_str, "^ "); //Suspect space - else - strcpy (space_str, " "); //Certain space - - for (i = 0; i < word->word->space (); i++) { - status = fprintf (unlv_file, "%s", space_str); - if (status < 0) - WRITEFAILED.error ("write_unlv_text", EXIT, - "Space Errno: %d", errno); - } - - status = fprintf (unlv_file, "%s", buff); - if (status < 0) - WRITEFAILED.error ("write_unlv_text", EXIT, "Word Errno: %d", errno); - } - if (word->word->flag (W_EOL)) { - status = fprintf (unlv_file, "\n"); - if (status < 0) - WRITEFAILED.error ("write_unlv_text", EXIT, - "Newline Errno: %d", errno); - } - status = fflush (unlv_file); - if (status != 0) - WRITEFAILED.error ("write_unlv_text", EXIT, "Fflush Errno: %d", errno); -} -#endif - - /************************************************************************* * get_rep_char() * Return the first accepted character from the repetition string. This is the @@ -957,36 +359,6 @@ UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated? return unicharset.unichar_to_id(unrecognised_char.string()); } } -} // namespace tesseract - -void ensure_rep_chars_are_consistent(WERD_RES *word) { -#if 0 - char rep_char = get_rep_char (word); - char *ptr; - - ptr = (char *) word->best_choice->string ().string (); - for (; *ptr != '\0'; ptr++) { - if (*ptr != rep_char) - *ptr = rep_char; - } -#endif - -#if 0 - UNICHAR_ID rep_char = get_rep_char (word); //TODO(tkielbus) Reactivate - int i; - char *ptr; - STRING consistent_string; - STRING consistent_string_lengths; - - ptr = (char *) word->best_choice->string ().string (); - for (i = 0; *ptr != '\0'; ptr += word->best_choice->lengths()[i++]) { - consistent_string += unicharset.id_to_unichar(rep_char); - consistent_string_lengths += strlen(unicharset.id_to_unichar(rep_char)); - } - word->best_choice->string() = consistent_string; - word->best_choice->lengths() = consistent_string_lengths; -#endif -} /************************************************************************* * SUSPECT LEVELS @@ -998,8 +370,6 @@ void ensure_rep_chars_are_consistent(WERD_RES *word) { * NOTE: to reject JUST tess failures in the .map file set suspect_level 3 and * tessedit_minimal_rejection. *************************************************************************/ - -namespace tesseract { void Tesseract::set_unlv_suspects(WERD_RES *word_res) { int len = word_res->reject_map.length(); const WERD_CHOICE &word = *(word_res->best_choice); diff --git a/ccmain/output.h b/ccmain/output.h index 4c923020df..9e9bfd514c 100644 --- a/ccmain/output.h +++ b/ccmain/output.h @@ -20,91 +20,15 @@ #ifndef OUTPUT_H #define OUTPUT_H -#include "varable.h" +#include "params.h" //#include "epapconv.h" #include "pageres.h" #include "notdll.h" -extern BOOL_EVAR_H (tessedit_write_block_separators, TRUE, -"Write block separators in output"); -extern BOOL_VAR_H (tessedit_write_raw_output, FALSE, -"Write raw stuff to name.raw"); -extern BOOL_EVAR_H (tessedit_write_output, TRUE, "Write text to name.txt"); -extern BOOL_EVAR_H (tessedit_write_txt_map, TRUE, -"Write .txt to .etx map file"); -extern BOOL_EVAR_H (tessedit_write_rep_codes, TRUE, -"Write repetition char code"); -extern BOOL_EVAR_H (tessedit_write_unlv, FALSE, "Write .unlv output file"); -extern STRING_EVAR_H (unrecognised_char, "|", -"Output char for unidentified blobs"); -extern INT_EVAR_H (suspect_level, 99, "Suspect marker level"); -extern INT_VAR_H (suspect_space_level, 100, -"Min suspect level for rejecting spaces"); -extern INT_VAR_H (suspect_short_words, 2, -"Dont Suspect dict wds longer than this"); -extern BOOL_VAR_H (suspect_constrain_1Il, FALSE, -"UNLV keep 1Il chars rejected"); -extern double_VAR_H (suspect_rating_per_ch, 999.9, -"Dont touch bad rating limit"); -extern double_VAR_H (suspect_accept_rating, -999.9, -"Accept good rating limit"); -extern BOOL_EVAR_H (tessedit_minimal_rejection, FALSE, -"Only reject tess failures"); -extern BOOL_VAR_H (tessedit_zero_rejection, FALSE, "Dont reject ANYTHING"); -extern BOOL_VAR_H (tessedit_word_for_word, FALSE, -"Make output have exactly one word per WERD"); -extern BOOL_VAR_H (tessedit_consistent_reps, TRUE, -"Force all rep chars the same"); - -/** output a word */ -void write_results( - PAGE_RES_IT &page_res_it, ///< full info - char newline_type, ///< type of newline - BOOL8 force_eol, ///< override tilde crunch? - BOOL8 write_to_shm ///< send to api - ); - -/** convert one word */ -WERD_CHOICE *make_epaper_choice( - WERD_RES *word, ///< word to do - char newline_type ///< type of newline - ); -/** make reject code */ -inT16 make_reject ( -TBOX * inset_box, ///< bounding box -inT16 prevright, ///< previous char -inT16 nextleft, ///< next char -DENORM * denorm, ///< de-normalizer -char word_string[] ///< output string -); - /** test line ends */ char determine_newline_type(WERD *word, ///< word to do BLOCK *block, ///< current block WERD *next_word, ///< next word BLOCK *next_block ///< block of next word ); -/** write output */ -void write_cooked_text(WERD *word, ///< word to do - const STRING &text, ///< text to write - BOOL8 acceptable, ///< good stuff - BOOL8 pass2, ///< done on pass2 - FILE *fp ///< file to write - ); -/** write output */ -void write_shm_text(WERD_RES *word, ///< word to do - BLOCK *block, ///< block it is from - ROW_RES *row, ///< row it is from - const STRING &text, ///< text to write - const STRING &text_lengths - ); -/** output a map file */ -void write_map( - FILE *mapfile, ///< mapfile to write to - WERD_RES *word ///< word - ); -/*FILE *open_outfile( //open .map & .unlv file - const char *extension);*/ -void write_unlv_text(WERD_RES *word); -void ensure_rep_chars_are_consistent(WERD_RES *word); #endif diff --git a/textord/pagesegmain.cpp b/ccmain/pagesegmain.cpp similarity index 52% rename from textord/pagesegmain.cpp rename to ccmain/pagesegmain.cpp index 4766fc3d7e..e63b30dfb5 100644 --- a/textord/pagesegmain.cpp +++ b/ccmain/pagesegmain.cpp @@ -46,7 +46,8 @@ #include "blread.h" #include "wordseg.h" #include "makerow.h" -#include "baseapi.h" +#include "osdetect.h" +#include "textord.h" #include "tordmain.h" #include "tessvars.h" @@ -56,6 +57,48 @@ namespace tesseract { const int kMinCredibleResolution = 70; /// Default resolution used if input in not believable. const int kDefaultResolution = 300; +// Max erosions to perform in removing an enclosing circle. +const int kMaxCircleErosions = 8; + +// Helper to remove an enclosing circle from an image. +// If there isn't one, then the image will most likely get badly mangled. +// The returned pix must be pixDestroyed after use. NULL may be returned +// if the image doesn't meet the trivial conditions that it uses to determine +// success. +static Pix* RemoveEnclosingCircle(Pix* pixs) { + Pix* pixsi = pixInvert(NULL, pixs); + Pix* pixc = pixCreateTemplate(pixs); + pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET); + pixSeedfillBinary(pixc, pixc, pixsi, 4); + pixInvert(pixc, pixc); + pixDestroy(&pixsi); + Pix* pixt = pixAnd(NULL, pixs, pixc); + l_int32 max_count; + pixCountConnComp(pixt, 8, &max_count); + // The count has to go up before we start looking for the minimum. + l_int32 min_count = MAX_INT32; + Pix* pixout = NULL; + for (int i = 1; i < kMaxCircleErosions; i++) { + pixDestroy(&pixt); + pixErodeBrick(pixc, pixc, 3, 3); + pixt = pixAnd(NULL, pixs, pixc); + l_int32 count; + pixCountConnComp(pixt, 8, &count); + if (i == 1 || count > max_count) { + max_count = count; + min_count = count; + } else if (i > 1 && count < min_count) { + min_count = count; + pixDestroy(&pixout); + pixout = pixCopy(NULL, pixt); // Save the best. + } else if (count >= min_count) { + break; // We have passed by the best. + } + } + pixDestroy(&pixt); + pixDestroy(&pixc); + return pixout; +} /** * Segment the page according to the current value of tessedit_pageseg_mode. @@ -63,18 +106,12 @@ const int kDefaultResolution = 300; * and copied to image, otherwise it just uses image as the input. * On return the blocks list owns all the constructed page layout. */ -int Tesseract::SegmentPage(const STRING* input_file, - IMAGE* image, BLOCK_LIST* blocks) { - int width = image->get_xsize(); - int height = image->get_ysize(); - int resolution = image->get_res(); -#ifdef HAVE_LIBLEPT - if (pix_binary_ != NULL) { - width = pixGetWidth(pix_binary_); - height = pixGetHeight(pix_binary_); - resolution = pixGetXRes(pix_binary_); - } -#endif +int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, + Tesseract* osd_tess, OSResults* osr) { + ASSERT_HOST(pix_binary_ != NULL); + int width = pixGetWidth(pix_binary_); + int height = pixGetHeight(pix_binary_); + int resolution = pixGetXRes(pix_binary_); // Zero resolution messes up the algorithms, so make sure it is credible. if (resolution < kMinCredibleResolution) resolution = kDefaultResolution; @@ -82,7 +119,7 @@ int Tesseract::SegmentPage(const STRING* input_file, PageSegMode pageseg_mode = static_cast( static_cast(tessedit_pageseg_mode)); // If a UNLV zone file can be found, use that instead of segmentation. - if (pageseg_mode != tesseract::PSM_AUTO && + if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != NULL && input_file->length() > 0) { STRING name = *input_file; const char* lastdot = strrchr(name.string(), '.'); @@ -90,88 +127,85 @@ int Tesseract::SegmentPage(const STRING* input_file, name[lastdot - name.string()] = '\0'; read_unlv_file(name, width, height, blocks); } - bool single_column = pageseg_mode > PSM_AUTO; if (blocks->empty()) { // No UNLV file present. Work according to the PageSegMode. // First make a single block covering the whole image. BLOCK_IT block_it(blocks); BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); + block->set_right_to_left(right_to_left()); block_it.add_to_end(block); } else { // UNLV file present. Use PSM_SINGLE_COLUMN. pageseg_mode = PSM_SINGLE_COLUMN; } + bool single_column = !PSM_COL_FIND_ENABLED(pageseg_mode); + bool osd_enabled = PSM_OSD_ENABLED(pageseg_mode); + bool osd_only = pageseg_mode == PSM_OSD_ONLY; - TO_BLOCK_LIST land_blocks, port_blocks; - TBOX page_box; - if (pageseg_mode <= PSM_SINGLE_COLUMN) { - if (AutoPageSeg(width, height, resolution, single_column, - image, blocks, &port_blocks) < 0) { - return -1; - } + int auto_page_seg_ret_val = 0; + TO_BLOCK_LIST to_blocks; + if (osd_enabled || PSM_BLOCK_FIND_ENABLED(pageseg_mode)) { + auto_page_seg_ret_val = + AutoPageSeg(resolution, single_column, osd_enabled, osd_only, + blocks, &to_blocks, osd_tess, osr); + if (osd_only) + return auto_page_seg_ret_val; // To create blobs from the image region bounds uncomment this line: - // port_blocks.clear(); // Uncomment to go back to the old mode. + // to_blocks.clear(); // Uncomment to go back to the old mode. } else { -#if HAVE_LIBLEPT - image->FromPix(pix_binary_); -#endif deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); + if (pageseg_mode == PSM_CIRCLE_WORD) { + Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_); + if (pixcleaned != NULL) { + pixDestroy(&pix_binary_); + pix_binary_ = pixcleaned; + } + } } + + if (auto_page_seg_ret_val < 0) { + return -1; + } + if (blocks->empty()) { tprintf("Empty page\n"); return 0; // AutoPageSeg found an empty page. } - if (port_blocks.empty()) { - // AutoPageSeg was not used, so we need to find_components first. - find_components(blocks, &land_blocks, &port_blocks, &page_box); - } else { - // AutoPageSeg does not need to find_components as it did that already. - page_box.set_left(0); - page_box.set_bottom(0); - page_box.set_right(width); - page_box.set_top(height); - // Filter_blobs sets up the TO_BLOCKs the same as find_components does. - filter_blobs(page_box.topright(), &port_blocks, true); - } + textord_.TextordPage(pageseg_mode, width, height, pix_binary_, + blocks, &to_blocks); + SetupWordScripts(blocks); + return auto_page_seg_ret_val; +} - TO_BLOCK_IT to_block_it(&port_blocks); - ASSERT_HOST(!port_blocks.empty()); - TO_BLOCK* to_block = to_block_it.data(); - if (pageseg_mode <= PSM_SINGLE_BLOCK || - to_block->line_size < 2) { - // For now, AUTO, SINGLE_COLUMN and SINGLE_BLOCK all map to the old - // textord. The difference is the number of blocks and how the are made. - textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks, - this); - } else { - // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. - float gradient = make_single_row(page_box.topright(), - to_block, &port_blocks, this); - if (pageseg_mode == PSM_SINGLE_LINE) { - // SINGLE_LINE uses the old word maker on the single line. - make_words(page_box.topright(), gradient, blocks, - &land_blocks, &port_blocks, this); - } else { - // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a - // single word, and in SINGLE_CHAR mode, all the outlines - // go in a single blob. - make_single_word(pageseg_mode == PSM_SINGLE_CHAR, - to_block->get_rows(), to_block->block->row_list()); +// TODO(rays) This is a hack to set all the words with a default script. +// In the future this will be set by a preliminary pass over the document. +void Tesseract::SetupWordScripts(BLOCK_LIST* blocks) { + int script = unicharset.default_sid(); + bool has_x_height = unicharset.script_has_xheight(); + bool is_latin = script == unicharset.latin_sid(); + BLOCK_IT b_it(blocks); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + ROW_IT r_it(b_it.data()->row_list()); + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { + WERD_IT w_it(r_it.data()->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + word->set_script_id(script); + word->set_flag(W_SCRIPT_HAS_XHEIGHT, has_x_height); + word->set_flag(W_SCRIPT_IS_LATIN, is_latin); + } } } - return 0; } + /** * Auto page segmentation. Divide the page image into blocks of uniform * text linespacing and images. * - * Width, height and resolution are derived from the input image. - * - * If the pix is non-NULL, then it is assumed to be the input, and it is - * copied to the image, otherwise the image is used directly. + * Resolution (in ppi) is derived from the input image. * * The output goes in the blocks list with corresponding TO_BLOCKs in the * to_blocks list. @@ -179,10 +213,17 @@ int Tesseract::SegmentPage(const STRING* input_file, * If single_column is true, then no attempt is made to divide the image * into columns, but multiple blocks are still made if the text is of * non-uniform linespacing. + * + * If osd is true, then orientation and script detection is performed as well. + * If only_osd is true, then only orientation and script detection is + * performed. If osr is desired, the osr_tess must be another Tesseract + * that was initialized especially for osd, and the results will be output + * into osr. */ -int Tesseract::AutoPageSeg(int width, int height, int resolution, - bool single_column, IMAGE* image, - BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { +int Tesseract::AutoPageSeg(int resolution, bool single_column, + bool osd, bool only_osd, + BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks, + Tesseract* osd_tess, OSResults* osr) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; @@ -196,7 +237,8 @@ int Tesseract::AutoPageSeg(int width, int height, int resolution, #ifdef HAVE_LIBLEPT if (pix_binary_ != NULL) { if (textord_debug_images) { - Pix* grey_pix = pixCreate(width, height, 8); + Pix* grey_pix = pixCreate(pixGetWidth(pix_binary_), + pixGetHeight(pix_binary_), 8); // Printable images are light grey on white, but for screen display // they are black on dark grey so the other colors show up well. if (textord_debug_printable) { @@ -210,8 +252,9 @@ int Tesseract::AutoPageSeg(int width, int height, int resolution, pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG); pixDestroy(&grey_pix); } - if (tessedit_dump_pageseg_images) + if (tessedit_dump_pageseg_images) { pixWrite("tessinput.png", pix_binary_, IFF_PNG); + } // Leptonica is used to find the lines and image regions in the input. LineFinder::FindVerticalLines(resolution, pix_binary_, &vertical_x, &vertical_y, &v_lines); @@ -221,16 +264,13 @@ int Tesseract::AutoPageSeg(int width, int height, int resolution, ImageFinder::FindImages(pix_binary_, &boxa, &pixa); if (tessedit_dump_pageseg_images) pixWrite("tessnoimages.png", pix_binary_, IFF_PNG); - // Copy the Pix to the IMAGE. - image->FromPix(pix_binary_); if (single_column) v_lines.clear(); } #endif - TO_BLOCK_LIST land_blocks, port_blocks; - TBOX page_box; + TO_BLOCK_LIST port_blocks; // The rest of the algorithm uses the usual connected components. - find_components(blocks, &land_blocks, &port_blocks, &page_box); + textord_.find_components(pix_binary_, blocks, &port_blocks); TO_BLOCK_IT to_block_it(&port_blocks); ASSERT_HOST(!to_block_it.empty()); @@ -244,20 +284,50 @@ int Tesseract::AutoPageSeg(int width, int height, int resolution, // that there aren't any interesting line separators or images, since // it means that we have a pre-defined unlv zone file. ColumnFinder finder(static_cast(to_block->line_size), - blkbox.botleft(), blkbox.topright(), + blkbox.botleft(), blkbox.topright(), resolution, &v_lines, &h_lines, vertical_x, vertical_y); - if (finder.FindBlocks(height, resolution, single_column, + BLOBNBOX_CLIST osd_blobs; + int osd_orientation = 0; + bool vertical_text = finder.IsVerticallyAlignedText(to_block, &osd_blobs); + if (osd && osd_tess != NULL && osr != NULL) { + os_detect_blobs(&osd_blobs, osr, osd_tess); + if (only_osd) continue; + osd_orientation = osr->best_result.orientation_id; + double osd_score = osr->orientations[osd_orientation]; + double osd_margin = min_orientation_margin * 2; + // tprintf("Orientation scores:"); + for (int i = 0; i < 4; ++i) { + if (i != osd_orientation && + osd_score - osr->orientations[i] < osd_margin) { + osd_margin = osd_score - osr->orientations[i]; + } + // tprintf(" %d:%f", i, osr->orientations[i]); + } + // tprintf("\n"); + if (osd_margin < min_orientation_margin) { + // Margin insufficient - dream up a suitable default. + if (vertical_text && (osd_orientation & 1)) + osd_orientation = 3; + else + osd_orientation = 0; + tprintf("Score margin insufficient:%.2f, using %d as a default\n", + osd_margin, osd_orientation); + } + } + osd_blobs.shallow_clear(); + finder.CorrectOrientation(to_block, vertical_text, osd_orientation); + if (finder.FindBlocks(single_column, pixGetHeight(pix_binary_), to_block, boxa, pixa, &found_blocks, to_blocks) < 0) return -1; - finder.ComputeDeskewVectors(&deskew_, &reskew_); + finder.GetDeskewVectors(&deskew_, &reskew_); boxa = NULL; pixa = NULL; } } -#ifdef HAVE_LIBLEPT boxaDestroy(&boxa); pixaDestroy(&pixa); -#endif + if (only_osd) return 0; + blocks->clear(); BLOCK_IT block_it(blocks); // Move the found blocks to the input/output blocks. diff --git a/ccmain/pagewalk.cpp b/ccmain/pagewalk.cpp index ff15947444..3b3bcb3da6 100644 --- a/ccmain/pagewalk.cpp +++ b/ccmain/pagewalk.cpp @@ -17,602 +17,31 @@ * **********************************************************************/ -#ifdef _MSC_VER -#pragma warning(disable:4244) // Conversion warnings -#endif - #include "mfcpch.h" -#include "pagewalk.h" +#include "pageres.h" #include "tesseractclass.h" -#define EXTERN - -EXTERN BOOL_VAR (current_word_quit, FALSE, "Stop processing this word"); -DLLSYM BOOL_VAR (selection_quit, FALSE, "Stop processing this selection"); - -/** - * block_list_bounding_box() - * - * Scan block list to find the bounding box of all blocks. - * @param block_list the block list to find the bounding box of - */ - -TBOX block_list_bounding_box(BLOCK_LIST *block_list) -{ - BLOCK_IT block_it(block_list); - TBOX enclosing_box; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) - enclosing_box += block_it.data ()->bounding_box (); - return enclosing_box; -} - - -/** - * block_list_compress() - * - * Pack a block list to occupy a smaller space by compressing each block and - * moving the compressed blocks one above the other. - * The compressed block list has the same top left point as the uncompressed - * first. Blocks are reordered so that the source names are in alphabetic - * order. (This gathers together, but does not combine, blocks from the same - * file.) - * - * The enclosing box of the compressed block list is returned. - */ - -const TBOX block_list_compress(BLOCK_LIST *block_list) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ICOORD initial_top_left; - ICOORD block_spacing (0, BLOCK_SPACING); - TBOX enclosing_box; //for full display - - initial_top_left = block_it.data()->bounding_box().topleft(); - //group srcfile blks - block_it.sort (block_name_order); - - /* Compress the target block list into an area starting from the top left of - the first block on the list */ - - enclosing_box = TBOX (initial_top_left, initial_top_left); - enclosing_box.move_bottom_edge (BLOCK_SPACING); - - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - block->compress (enclosing_box.botleft () - block_spacing - - block->bounding_box ().topleft ()); - enclosing_box += block->bounding_box (); - } - return enclosing_box; -} - - -/** - * block_list_move() - * - * Move all the blocks in the list by a vector - * - * @param block_list the block list to move - * @param vec the vector to move it by - */ - -void block_list_move(BLOCK_LIST *block_list, - ICOORD vec) -{ - BLOCK_IT block_it(block_list); - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) - block_it.data ()->move (vec); -} - - -/** - * block_name_order() - * - * Block comparator used to sort a block list so that blocks from the same - * filename are located together, and blocks from the same file are ordered - * by vertical position. - */ - -int block_name_order(const void *block1p, - const void *block2p) -{ - int result; - BLOCK *block1 = *(BLOCK **) block1p; - BLOCK *block2 = *(BLOCK **) block2p; - - result = strcmp (block1->name (), block2->name ()); - if (result == 0) - result = block2->bounding_box ().top () - block1->bounding_box ().top (); - return result; -} - - -/** - * process_all_blobs() - * - * Walk the current block list applying the specified blob processor function - * to all blobs - * @param block_list the blocks to check - * @param blob_processor function to call - * @param c_blob_processor function to call - */ - -void -process_all_blobs (BLOCK_LIST * block_list, - BOOL8 blob_processor (BLOCK *, ROW *, WERD *, PBLOB *), - BOOL8 c_blob_processor (BLOCK *, ROW *, WERD *, C_BLOB *)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - PBLOB_IT blob_it; - PBLOB *blob; - C_BLOB_IT c_blob_it; - C_BLOB *c_blob; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if (word->flag (W_POLYGON)) { - if (blob_processor != NULL) { - blob_it.set_to_list (word->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (!blob_processor (block, row, word, blob) || selection_quit) - return; - } - } - } - else { - if (c_blob_processor != NULL) { - c_blob_it.set_to_list (word->cblob_list ()); - for (c_blob_it.mark_cycle_pt (); !c_blob_it.cycled_list (); c_blob_it.forward ()) { - c_blob = c_blob_it.data (); - if (!c_blob_processor (block, row, word, c_blob) || selection_quit) - return; - } - } - } - } - } - } -} - - -/** - * process_selected_blobs() - * - * Walk the current block list applying the specified blob processor function - * to each selected blob - * @param block_list the blocks to check - * @param selection_box within this box(?) - * @param blob_processor function to call - * @param c_blob_processor function to call - */ - -void -process_selected_blobs (BLOCK_LIST * block_list, - TBOX & selection_box, - BOOL8 blob_processor (BLOCK *, ROW *, WERD *, PBLOB *), - BOOL8 c_blob_processor (BLOCK *, ROW *, WERD *, C_BLOB *)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - PBLOB_IT blob_it; - PBLOB *blob; - C_BLOB_IT c_blob_it; - C_BLOB *c_blob; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - if (block->bounding_box ().overlap (selection_box)) { - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->bounding_box ().overlap (selection_box)) { - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if (word->bounding_box ().overlap (selection_box)) { - if (word->flag (W_POLYGON)) { - if (blob_processor != NULL) { - blob_it.set_to_list (word->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (blob->bounding_box().overlap (selection_box)) { - if (!blob_processor(block, row, word, blob) || selection_quit) - return; - } - } - } - } - else { - if (c_blob_processor != NULL) { - c_blob_it.set_to_list (word->cblob_list ()); - for (c_blob_it.mark_cycle_pt (); !c_blob_it.cycled_list (); c_blob_it.forward ()) { - c_blob = c_blob_it.data (); - if (c_blob->bounding_box ().overlap (selection_box)) { - if (!c_blob_processor(block, row, word, c_blob) || selection_quit) - return; - } - } - } - } - } - } - } - } - } - } -} - - -/** - * process_all_words() - * - * Walk the current block list applying the specified word processor function - * to all words - */ -void -process_all_words (BLOCK_LIST * block_list, - BOOL8 word_processor (BLOCK *, ROW *, WERD *)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if (!word_processor (block, row, word) || selection_quit) - return; - } - } - } -} - - /** * process_selected_words() * * Walk the current block list applying the specified word processor function - * to each word selected. + * to each word that overlaps the selection_box. */ - -void -process_selected_words (BLOCK_LIST * block_list, - TBOX & selection_box, - BOOL8 word_processor (BLOCK *, ROW *, WERD *)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - if (block->bounding_box ().overlap (selection_box)) { - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->bounding_box ().overlap (selection_box)) { - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if (word->bounding_box ().overlap (selection_box)) { - if (!word_processor (block, row, word) || selection_quit) - return; - } - } - } - } - } - } -} - namespace tesseract { -void -Tesseract::process_selected_words (BLOCK_LIST * block_list, - TBOX & selection_box, - BOOL8 (tesseract::Tesseract::*word_processor) (BLOCK *, ROW *, WERD *)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - if (block->bounding_box ().overlap (selection_box)) { - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->bounding_box ().overlap (selection_box)) { - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if (word->bounding_box ().overlap (selection_box)) { - if (!((this->*word_processor) (block, row, word)) || selection_quit) - return; - } - } - } - } - } - } -} -} // namespace tesseract - - -/** - * process_all_words_it() PASS ITERATORS - * - * Walk the current block list applying the specified word processor function - * to all words - */ - -void -process_all_words_it (BLOCK_LIST * block_list, - BOOL8 word_processor (BLOCK *, ROW *, WERD *, BLOCK_IT &, ROW_IT &, WERD_IT &)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if (!word_processor (block, row, word, block_it, row_it, word_it) || selection_quit) - return; - } - } - } -} - - -/** - * process_selected_words_it() PASS ITERATORS - * - * Walk the current block list applying the specified word processor function - * to each word selected. - */ - -void -process_selected_words_it (BLOCK_LIST * block_list, - TBOX & selection_box, - BOOL8 word_processor (BLOCK *, ROW *, WERD *, BLOCK_IT &, ROW_IT &, WERD_IT &)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - if (block->bounding_box ().overlap (selection_box)) { - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->bounding_box ().overlap (selection_box)) { - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if (word->bounding_box ().overlap (selection_box)) { - if (!word_processor (block, row, word, block_it, row_it, word_it) || selection_quit) - return; - } - } - } - } - } - } -} - - -/** - * process_all_blocks() - * - * Walk the current block list applying the specified block processor function - * to each block. - */ - -void -process_all_blocks (BLOCK_LIST * block_list, - BOOL8 block_processor (BLOCK *)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - if (!block_processor (block) || selection_quit) - return; - } -} - - -/** - * process_selected_blocks() - * - * Walk the current block list applying the specified block processor function - * to each block selected. - */ - -void -process_selected_blocks (BLOCK_LIST * block_list, - TBOX & selection_box, - BOOL8 block_processor (BLOCK *)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - if (block->bounding_box ().overlap (selection_box)) { - if (!block_processor (block) || selection_quit) +void Tesseract::process_selected_words( + PAGE_RES* page_res, // blocks to check + TBOX & selection_box, + BOOL8(tesseract::Tesseract::*word_processor)( // function to call + BLOCK* block, ROW* row, WERD_RES* word_res)) { + for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != NULL; + page_res_it.forward()) { + WERD* word = page_res_it.word()->word; + if (word->bounding_box().overlap(selection_box)) { + if (!((this->*word_processor)(page_res_it.block()->block, + page_res_it.row()->row, + page_res_it.word()))) return; } } } - - -/** - * process_all_rows() - * - * Walk the current block list applying the specified row processor function - * to all rows - */ - -void -process_all_rows (BLOCK_LIST * block_list, - BOOL8 row_processor (BLOCK *, ROW *)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row_processor (block, row) || selection_quit) - return; - } - } -} - - -/** - * process_selected_rows() - * - * Walk the current block list applying the specified row processor function - * to each row selected. - */ - -void -process_selected_rows (BLOCK_LIST * block_list, - TBOX & selection_box, - BOOL8 row_processor (BLOCK *, ROW *)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - if (block->bounding_box ().overlap (selection_box)) { - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->bounding_box ().overlap (selection_box)) { - if (!row_processor (block, row) || selection_quit) - return; - } - } - } - } -} - - -/** - * process_all_rows_it() PASS ITERATORS - * - * Walk the current block list applying the specified row processor function - * to all rows - */ - -void -process_all_rows_it (BLOCK_LIST * block_list, - BOOL8 row_processor (BLOCK *, ROW *, BLOCK_IT &, ROW_IT &)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (!row_processor (block, row, block_it, row_it) || selection_quit) - return; - } - } -} - - -/** - * process_selected_rows_it() PASS ITERATORS - * - * Walk the current block list applying the specified row processor function - * to each row selected. - */ - -void -process_selected_rows_it (BLOCK_LIST * block_list, - TBOX & selection_box, - BOOL8 row_processor (BLOCK *, ROW *, BLOCK_IT &, ROW_IT &)) -{ - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - if (block->bounding_box ().overlap (selection_box)) { - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->bounding_box ().overlap (selection_box)) { - if (!row_processor (block, row, block_it, row_it) || selection_quit) - return; - } - } - } - } -} +} // namespace tesseract diff --git a/ccmain/pagewalk.h b/ccmain/pagewalk.h deleted file mode 100644 index 65e0da8457..0000000000 --- a/ccmain/pagewalk.h +++ /dev/null @@ -1,157 +0,0 @@ -/********************************************************************** - * File: pagewalk.h (Formerly walkers.h) - * Description: Structure processors - * Author: Phil Cheatle - * Created: Thu Oct 10 16:25:24 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef PAGEWALK_H -#define PAGEWALK_H - -#include "ocrblock.h" -#include "ocrrow.h" -#include "werd.h" -#include "polyblob.h" -#include "stepblob.h" -#include "rect.h" -#include "varable.h" -#include "notdll.h" -#include "tesseractclass.h" - -#define BLOCK_SPACING 20 - -extern BOOL_VAR_H (current_word_quit, FALSE, "Stop processing this word"); -extern DLLSYM BOOL_VAR_H (selection_quit, FALSE, -"Stop processing this selection"); -TBOX block_list_bounding_box( //find bounding box - BLOCK_LIST *block_list //of this block list - ); -const TBOX block_list_compress( //shuffle up blocks - BLOCK_LIST *block_list); -void block_list_move( //move - BLOCK_LIST *block_list, //this list - ICOORD vec //by this vector - ); -int block_name_order( //sort blocks - const void *block1p, //ptr to ptr to block1 - const void *block2p //ptr to ptr to block2 - ); -void process_all_blobs ( //process blobs -BLOCK_LIST * block_list, //blocks to check -BOOL8 blob_processor ( //function to call - //function to call -BLOCK *, ROW *, WERD *, PBLOB *), BOOL8 c_blob_processor ( -BLOCK -*, -ROW -*, -WERD -*, -C_BLOB -*)); -void process_selected_blobs ( //process blobs -BLOCK_LIST * block_list, //blocks to check - //function to call -TBOX & selection_box, BOOL8 blob_processor ( - //function to call -BLOCK *, ROW *, WERD *, PBLOB *), BOOL8 c_blob_processor ( -BLOCK -*, -ROW -*, -WERD -*, -C_BLOB -*)); -void process_all_words ( //process words -BLOCK_LIST * block_list, //blocks to check -BOOL8 word_processor ( //function to call -BLOCK *, ROW *, WERD *)); -void process_selected_words ( //process words -BLOCK_LIST * block_list, //blocks to check - //function to call -TBOX & selection_box, BOOL8 word_processor ( -BLOCK -*, -ROW -*, -WERD -*)); - -void process_all_words_it ( //process words -BLOCK_LIST * block_list, //blocks to check -BOOL8 word_processor ( //function to call -BLOCK *, -ROW *, -WERD *, -BLOCK_IT &, -ROW_IT &, WERD_IT &)); -void process_selected_words_it ( //process words -BLOCK_LIST * block_list, //blocks to check - //function to call -TBOX & selection_box, BOOL8 word_processor ( -BLOCK -*, -ROW -*, -WERD -*, -BLOCK_IT -&, -ROW_IT -&, -WERD_IT -&)); -void process_all_blocks ( //process blocks -BLOCK_LIST * block_list, //blocks to check -BOOL8 block_processor ( //function to call -BLOCK *)); -void process_selected_blocks ( //process blocks -BLOCK_LIST * block_list, //blocks to check - //function to call -TBOX & selection_box, BOOL8 block_processor ( -BLOCK -*)); -void process_all_rows ( //process words -BLOCK_LIST * block_list, //blocks to check -BOOL8 row_processor ( //function to call -BLOCK *, ROW *)); -void process_selected_rows ( //process rows -BLOCK_LIST * block_list, //blocks to check - //function to call -TBOX & selection_box, BOOL8 row_processor ( -BLOCK -*, -ROW -*)); -void process_all_rows_it ( //process words -BLOCK_LIST * block_list, //blocks to check -BOOL8 row_processor ( //function to call -BLOCK *, -ROW *, -BLOCK_IT &, ROW_IT &)); -void process_selected_rows_it ( //process rows -BLOCK_LIST * block_list, //blocks to check - //function to call -TBOX & selection_box, BOOL8 row_processor ( -BLOCK -*, -ROW -*, -BLOCK_IT -&, -ROW_IT -&)); -#endif diff --git a/ccmain/paircmp.cpp b/ccmain/paircmp.cpp deleted file mode 100644 index fb25070383..0000000000 --- a/ccmain/paircmp.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/********************************************************************** - * File: paircmp.cpp (Formerly paircmp.c) - * Description: Code to compare two blobs using the adaptive matcher - * Author: Ray Smith - * Created: Wed Apr 21 09:31:02 BST 1993 - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifdef _MSC_VER -#pragma warning(disable:4244) // Conversion warnings -#endif - -#include "mfcpch.h" -#include "blobcmp.h" -#include "tfacep.h" -#include "paircmp.h" -#include "tesseractclass.h" - -#define EXTERN - -/********************************************************************** - * compare_blob_pairs - * - * A blob processor to compare pairs of selected blobs. - **********************************************************************/ - -namespace tesseract { -BOOL8 Tesseract::compare_blob_pairs( //blob processor - BLOCK *, - ROW *row, //row it came from - WERD *, - PBLOB *blob //blob to compare - ) { - static ROW *prev_row = NULL; //other in pair - static PBLOB *prev_blob = NULL; - float rating; //from matcher - - if (prev_row == NULL || prev_blob == NULL) { - prev_row = row; - prev_blob = blob; - } - else { - rating = compare_blobs (prev_blob, prev_row, blob, row); - tprintf ("Rating=%g\n", rating); - prev_row = NULL; - prev_blob = NULL; - } - return TRUE; -} - - -/********************************************************************** - * compare_blobs - * - * Compare 2 blobs and return the rating. - **********************************************************************/ - -float Tesseract::compare_blobs( //match 2 blobs - PBLOB *blob1, //first blob - ROW *row1, //row it came from - PBLOB *blob2, //other blob - ROW *row2) { - PBLOB *bn_blob1; //baseline norm - PBLOB *bn_blob2; - DENORM denorm1, denorm2; - float rating; //match result - - bn_blob1 = blob1->baseline_normalise (row1, &denorm1); - bn_blob2 = blob2->baseline_normalise (row2, &denorm2); - rating = compare_bln_blobs (bn_blob1, &denorm1, bn_blob2, &denorm2); - delete bn_blob1; - delete bn_blob2; - return rating; -} - - -/********************************************************************** - * compare_bln_blobs - * - * Compare 2 baseline normalised blobs and return the rating. - **********************************************************************/ -float Tesseract::compare_bln_blobs( //match 2 blobs - PBLOB *blob1, //first blob - DENORM *denorm1, - PBLOB *blob2, //other blob - DENORM *denorm2) { - TBLOB *tblob1; //tessblobs - TBLOB *tblob2; - TEXTROW tessrow1, tessrow2; //tess rows - float rating; //match result - - tblob1 = make_tess_blob (blob1, TRUE); - make_tess_row(denorm1, &tessrow1); - tblob2 = make_tess_blob (blob2, TRUE); - make_tess_row(denorm2, &tessrow2); - rating = compare_tess_blobs (tblob1, &tessrow1, tblob2, &tessrow2); - free_blob(tblob1); - free_blob(tblob2); - - return rating; -} -} // namespace tesseract diff --git a/ccmain/varabled.cpp b/ccmain/paramsd.cpp similarity index 56% rename from ccmain/varabled.cpp rename to ccmain/paramsd.cpp index e059058d25..087a95baca 100644 --- a/ccmain/varabled.cpp +++ b/ccmain/paramsd.cpp @@ -1,6 +1,6 @@ /////////////////////////////////////////////////////////////////////// -// File: varabled.cpp -// Description: Variables Editor +// File: paramsd.cpp +// Description: Tesseract parameter Editor // Author: Joern Wanke // Created: Wed Jul 18 10:05:01 PDT 2007 // @@ -17,7 +17,7 @@ // /////////////////////////////////////////////////////////////////////// // -// The variables editor is used to edit all the variables used within +// The parameters editor is used to edit all the parameters used within // tesseract from the ui. #ifdef WIN32 #else @@ -33,69 +33,68 @@ #endif #ifndef GRAPHICS_DISABLED -#include "varabled.h" +#include "paramsd.h" +#include "params.h" #include "scrollview.h" #include "svmnode.h" -#include "varable.h" -#include "mainblk.h" -#define VARDIR "configs/" /*variables files */ +#define VARDIR "configs/" /*parameters files */ #define MAX_ITEMS_IN_SUBMENU 30 -const ERRCODE NO_VARIABLES_TO_EDIT = "No Variables defined to edit"; - +// The following variables should remain static globals, since they +// are used by debug editor, which uses a single Tesseract instance. +// // Contains the mappings from unique VC ids to their actual pointers. -static std::map vcMap; - -static int nrVariables = 0; +static std::map vcMap; +static int nrParams = 0; static int writeCommands[2]; -ELISTIZE(VariableContent) +ELISTIZE(ParamContent) -// Constructors for the various VarTypes. -VariableContent::VariableContent(STRING_VARIABLE* it) { - my_id_ = nrVariables; - nrVariables++; - var_type_ = VT_STRING; +// Constructors for the various ParamTypes. +ParamContent::ParamContent(tesseract::StringParam* it) { + my_id_ = nrParams; + nrParams++; + param_type_ = VT_STRING; sIt = it; vcMap[my_id_] = this; } -// Constructors for the various VarTypes. -VariableContent::VariableContent(INT_VARIABLE* it) { - my_id_ = nrVariables; - nrVariables++; - var_type_ = VT_INTEGER; +// Constructors for the various ParamTypes. +ParamContent::ParamContent(tesseract::IntParam* it) { + my_id_ = nrParams; + nrParams++; + param_type_ = VT_INTEGER; iIt = it; vcMap[my_id_] = this; } -// Constructors for the various VarTypes. -VariableContent::VariableContent(BOOL_VARIABLE* it) { - my_id_ = nrVariables; - nrVariables++; - var_type_ = VT_BOOLEAN; +// Constructors for the various ParamTypes. +ParamContent::ParamContent(tesseract::BoolParam* it) { + my_id_ = nrParams; + nrParams++; + param_type_ = VT_BOOLEAN; bIt = it; vcMap[my_id_] = this; } -// Constructors for the various VarTypes. -VariableContent::VariableContent(double_VARIABLE* it) { - my_id_ = nrVariables; - nrVariables++; - var_type_ = VT_DOUBLE; +// Constructors for the various ParamTypes. +ParamContent::ParamContent(tesseract::DoubleParam* it) { + my_id_ = nrParams; + nrParams++; + param_type_ = VT_DOUBLE; dIt = it; vcMap[my_id_] = this; } // Gets a VC object identified by its ID. -VariableContent* VariableContent::GetVariableContentById(int id) { +ParamContent* ParamContent::GetParamContentById(int id) { return vcMap[id]; } // Copy the first N words from the source string to the target string. // Words are delimited by "_". -void VariablesEditor::GetFirstWords( +void ParamsEditor::GetFirstWords( const char *s, // source string int n, // number of words char *t // target string @@ -114,34 +113,34 @@ void VariablesEditor::GetFirstWords( } // Getter for the name. -const char* VariableContent::GetName() const { - if (var_type_ == VT_INTEGER) { return iIt->name_str(); } - else if (var_type_ == VT_BOOLEAN) { return bIt->name_str(); } - else if (var_type_ == VT_DOUBLE) { return dIt->name_str(); } - else if (var_type_ == VT_STRING) { return sIt->name_str(); } +const char* ParamContent::GetName() const { + if (param_type_ == VT_INTEGER) { return iIt->name_str(); } + else if (param_type_ == VT_BOOLEAN) { return bIt->name_str(); } + else if (param_type_ == VT_DOUBLE) { return dIt->name_str(); } + else if (param_type_ == VT_STRING) { return sIt->name_str(); } else - return "ERROR: VariableContent::GetName()"; + return "ERROR: ParamContent::GetName()"; } // Getter for the description. -const char* VariableContent::GetDescription() const { - if (var_type_ == VT_INTEGER) { return iIt->info_str(); } - else if (var_type_ == VT_BOOLEAN) { return bIt->info_str(); } - else if (var_type_ == VT_DOUBLE) { return dIt->info_str(); } - else if (var_type_ == VT_STRING) { return sIt->info_str(); } +const char* ParamContent::GetDescription() const { + if (param_type_ == VT_INTEGER) { return iIt->info_str(); } + else if (param_type_ == VT_BOOLEAN) { return bIt->info_str(); } + else if (param_type_ == VT_DOUBLE) { return dIt->info_str(); } + else if (param_type_ == VT_STRING) { return sIt->info_str(); } else return NULL; } // Getter for the value. -const char* VariableContent::GetValue() const { +const char* ParamContent::GetValue() const { char* msg = new char[1024]; - if (var_type_ == VT_INTEGER) { + if (param_type_ == VT_INTEGER) { sprintf(msg, "%d", ((inT32) *(iIt))); - } else if (var_type_ == VT_BOOLEAN) { + } else if (param_type_ == VT_BOOLEAN) { sprintf(msg, "%d", ((BOOL8) * (bIt))); - } else if (var_type_ == VT_DOUBLE) { + } else if (param_type_ == VT_DOUBLE) { sprintf(msg, "%g", ((double) * (dIt))); - } else if (var_type_ == VT_STRING) { + } else if (param_type_ == VT_STRING) { if (((STRING) * (sIt)).string() != NULL) { sprintf(msg, "%s", ((STRING) * (sIt)).string()); } else { @@ -152,26 +151,26 @@ char* msg = new char[1024]; } // Setter for the value. -void VariableContent::SetValue(const char* val) { +void ParamContent::SetValue(const char* val) { // TODO (wanke) Test if the values actually are properly converted. // (Quickly visible impacts?) changed_ = TRUE; - if (var_type_ == VT_INTEGER) { + if (param_type_ == VT_INTEGER) { iIt->set_value(atoi(val)); - } else if (var_type_ == VT_BOOLEAN) { + } else if (param_type_ == VT_BOOLEAN) { bIt->set_value(atoi(val)); - } else if (var_type_ == VT_DOUBLE) { + } else if (param_type_ == VT_DOUBLE) { dIt->set_value(strtod(val, NULL)); - } else if (var_type_ == VT_STRING) { + } else if (param_type_ == VT_STRING) { sIt->set_value(val); } } // Gets the up to the first 3 prefixes from s (split by _). // For example, tesseract_foo_bar will be split into tesseract,foo and bar. -void VariablesEditor::GetPrefixes(const char* s, STRING* level_one, - STRING* level_two, - STRING* level_three) { +void ParamsEditor::GetPrefixes(const char* s, STRING* level_one, + STRING* level_two, + STRING* level_three) { char* p = new char[1024]; GetFirstWords(s, 1, p); *level_one = p; @@ -183,50 +182,47 @@ void VariablesEditor::GetPrefixes(const char* s, STRING* level_one, } // Compare two VC objects by their name. -int VariableContent::Compare(const void* v1, const void* v2) { - const VariableContent* one = - *reinterpret_cast(v1); - const VariableContent* two = - *reinterpret_cast(v2); +int ParamContent::Compare(const void* v1, const void* v2) { + const ParamContent* one = + *reinterpret_cast(v1); + const ParamContent* two = + *reinterpret_cast(v2); return strcmp(one->GetName(), two->GetName()); } -// Find all editable variables used within tesseract and create a +// Find all editable parameters used within tesseract and create a // SVMenuNode tree from it. // TODO (wanke): This is actually sort of hackish. -SVMenuNode* VariablesEditor::BuildListOfAllLeaves() { // find all variables. +SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) { SVMenuNode* mr = new SVMenuNode(); - VariableContent_LIST vclist; - VariableContent_IT vc_it(&vclist); + ParamContent_LIST vclist; + ParamContent_IT vc_it(&vclist); // Amount counts the number of entries for a specific char*. // TODO(rays) get rid of the use of std::map. std::map amount; - INT_VARIABLE_C_IT int_it(INT_VARIABLE::get_head()); - BOOL_VARIABLE_C_IT bool_it(BOOL_VARIABLE::get_head()); - STRING_VARIABLE_C_IT str_it(STRING_VARIABLE::get_head()); - double_VARIABLE_C_IT dbl_it(double_VARIABLE::get_head()); - - // Add all variables to a list. - for (int_it.mark_cycle_pt(); !int_it.cycled_list(); int_it.forward()) { - vc_it.add_after_then_move(new VariableContent(int_it.data())); - } - - for (bool_it.mark_cycle_pt(); !bool_it.cycled_list(); bool_it.forward()) { - vc_it.add_after_then_move(new VariableContent(bool_it.data())); - } - - for (str_it.mark_cycle_pt(); !str_it.cycled_list(); str_it.forward()) { - vc_it.add_after_then_move(new VariableContent(str_it.data())); - } - - for (dbl_it.mark_cycle_pt(); !dbl_it.cycled_list(); dbl_it.forward()) { - vc_it.add_after_then_move(new VariableContent(dbl_it.data())); + // Add all parameters to a list. + int v, i; + int num_iterations = (tess->params() == NULL) ? 1 : 2; + for (v = 0; v < num_iterations; ++v) { + tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params(); + for (i = 0; i < vec->int_params.size(); ++i) { + vc_it.add_after_then_move(new ParamContent(vec->int_params[i])); + } + for (i = 0; i < vec->bool_params.size(); ++i) { + vc_it.add_after_then_move(new ParamContent(vec->bool_params[i])); + } + for (i = 0; i < vec->string_params.size(); ++i) { + vc_it.add_after_then_move(new ParamContent(vec->string_params[i])); + } + for (i = 0; i < vec->double_params.size(); ++i) { + vc_it.add_after_then_move(new ParamContent(vec->double_params[i])); + } } // Count the # of entries starting with a specific prefix. for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) { - VariableContent* vc = vc_it.data(); + ParamContent* vc = vc_it.data(); STRING tag; STRING tag2; STRING tag3; @@ -237,14 +233,14 @@ SVMenuNode* VariablesEditor::BuildListOfAllLeaves() { // find all variables. amount[tag3.string()]++; } - vclist.sort(VariableContent::Compare); // Sort the list alphabetically. + vclist.sort(ParamContent::Compare); // Sort the list alphabetically. SVMenuNode* other = mr->AddChild("OTHER"); // go through the list again and this time create the menu structure. vc_it.move_to_first(); for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) { - VariableContent* vc = vc_it.data(); + ParamContent* vc = vc_it.data(); STRING tag; STRING tag2; STRING tag3; @@ -270,15 +266,15 @@ SVMenuNode* VariablesEditor::BuildListOfAllLeaves() { // find all variables. } // Event listener. Waits for SVET_POPUP events and processes them. -void VariablesEditor::Notify(const SVEvent* sve) { +void ParamsEditor::Notify(const SVEvent* sve) { if (sve->type == SVET_POPUP) { // only catch SVET_POPUP! char* param = sve->parameter; if (sve->command_id == writeCommands[0]) { - WriteVars(param, false); + WriteParams(param, false); } else if (sve->command_id == writeCommands[1]) { - WriteVars(param, true); + WriteParams(param, true); } else { - VariableContent* vc = VariableContent::GetVariableContentById( + ParamContent* vc = ParamContent::GetParamContentById( sve->command_id); vc->SetValue(param); sv_window_->AddMessage("Setting %s to %s", @@ -287,13 +283,13 @@ void VariablesEditor::Notify(const SVEvent* sve) { } } -// Integrate the variables editor as popupmenu into the existing scrollview +// Integrate the parameters editor as popupmenu into the existing scrollview // window (usually the pg editor). If sv == null, create a new empty -// empty window and attach the variables editor to that window (ugly). -VariablesEditor::VariablesEditor(const tesseract::Tesseract* tess, +// empty window and attach the parameters editor to that window (ugly). +ParamsEditor::ParamsEditor(tesseract::Tesseract* tess, ScrollView* sv) { if (sv == NULL) { - const char* name = "VarEditorMAIN"; + const char* name = "ParamEditorMAIN"; sv = new ScrollView(name, 1, 1, 200, 200, 300, 200); } @@ -302,31 +298,30 @@ VariablesEditor::VariablesEditor(const tesseract::Tesseract* tess, //Only one event handler per window. //sv->AddEventHandler((SVEventHandler*) this); - SVMenuNode* svMenuRoot = BuildListOfAllLeaves(); + SVMenuNode* svMenuRoot = BuildListOfAllLeaves(tess); - STRING varfile; - varfile = tess->datadir; - varfile += VARDIR; // variables dir - varfile += "edited"; // actual name + STRING paramfile; + paramfile = tess->datadir; + paramfile += VARDIR; // parameters dir + paramfile += "edited"; // actual name SVMenuNode* std_menu = svMenuRoot->AddChild ("Build Config File"); - writeCommands[0] = nrVariables+1; - std_menu->AddChild("All Variables", writeCommands[0], - varfile.string(), "Config file name?"); + writeCommands[0] = nrParams+1; + std_menu->AddChild("All Parameters", writeCommands[0], + paramfile.string(), "Config file name?"); - writeCommands[1] = nrVariables+2; - std_menu->AddChild ("changed_ Variables Only", writeCommands[1], - varfile.string(), "Config file name?"); + writeCommands[1] = nrParams+2; + std_menu->AddChild ("changed_ Parameters Only", writeCommands[1], + paramfile.string(), "Config file name?"); svMenuRoot->BuildMenu(sv, false); } -// Write all (changed_) variables to a config file. -void VariablesEditor::WriteVars(char *filename, // in this file - bool changes_only // changed_ vars only? - ) { +// Write all (changed_) parameters to a config file. +void ParamsEditor::WriteParams(char *filename, + bool changes_only) { FILE *fp; // input file char msg_str[255]; // if file exists @@ -344,10 +339,10 @@ void VariablesEditor::WriteVars(char *filename, // in this file return; } - for (std::map::iterator iter = vcMap.begin(); + for (std::map::iterator iter = vcMap.begin(); iter != vcMap.end(); ++iter) { - VariableContent* cur = iter->second; + ParamContent* cur = iter->second; if (!changes_only || cur->HasChanged()) { fprintf (fp, "%-25s %-12s # %s\n", cur->GetName(), cur->GetValue(), cur->GetDescription()); diff --git a/ccmain/paramsd.h b/ccmain/paramsd.h new file mode 100644 index 0000000000..12ddc8ee20 --- /dev/null +++ b/ccmain/paramsd.h @@ -0,0 +1,124 @@ +/////////////////////////////////////////////////////////////////////// +// File: paramsd.cpp +// Description: Tesseract parameter editor +// Author: Joern Wanke +// Created: Wed Jul 18 10:05:01 PDT 2007 +// +// (C) Copyright 2007, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// +// +// Tesseract parameter editor is used to edit all the parameters used +// within tesseract from the ui. +#ifndef GRAPHICS_DISABLED +#ifndef VARABLED_H +#define VARABLED_H + +#include "elst.h" +#include "scrollview.h" +#include "params.h" +#include "tesseractclass.h" + +class SVMenuNode; + +// A list of all possible parameter types used. +enum ParamType { + VT_INTEGER, + VT_BOOLEAN, + VT_STRING, + VT_DOUBLE +}; + +// A rather hackish helper structure which can take any kind of parameter input +// (defined by ParamType) and do a couple of common operations on them, like +// comparisond or getting its value. It is used in the context of the +// ParamsEditor as a bridge from the internal tesseract parameters to the +// ones displayed by the ScrollView server. +class ParamContent : public ELIST_LINK { + public: + // Compare two VC objects by their name. + static int Compare(const void* v1, const void* v2); + + // Gets a VC object identified by its ID. + static ParamContent* GetParamContentById(int id); + + // Constructors for the various ParamTypes. + ParamContent() { + } + ParamContent(tesseract::StringParam* it); + ParamContent(tesseract::IntParam* it); + ParamContent(tesseract::BoolParam* it); + ParamContent(tesseract::DoubleParam* it); + + + // Getters and Setters. + void SetValue(const char* val); + const char* GetValue() const; + const char* GetName() const; + const char* GetDescription() const; + + int GetId() { return my_id_; } + bool HasChanged() { return changed_; } + + private: + // The unique ID of this VC object. + int my_id_; + // Whether the parameter was changed_ and thus needs to be rewritten. + bool changed_; + // The actual ParamType of this VC object. + ParamType param_type_; + + tesseract::StringParam* sIt; + tesseract::IntParam* iIt; + tesseract::BoolParam* bIt; + tesseract::DoubleParam* dIt; +}; + +ELISTIZEH(ParamContent) + +// The parameters editor enables the user to edit all the parameters used within +// tesseract. It can be invoked on its own, but is supposed to be invoked by +// the program editor. +class ParamsEditor : public SVEventHandler { + public: + // Integrate the parameters editor as popupmenu into the existing scrollview + // window (usually the pg editor). If sv == null, create a new empty + // empty window and attach the parameter editor to that window (ugly). + ParamsEditor(tesseract::Tesseract*, ScrollView* sv = NULL); + + // Event listener. Waits for SVET_POPUP events and processes them. + void Notify(const SVEvent* sve); + + private: + // Gets the up to the first 3 prefixes from s (split by _). + // For example, tesseract_foo_bar will be split into tesseract,foo and bar. + void GetPrefixes(const char* s, STRING* level_one, + STRING* level_two, STRING* level_three); + + // Gets the first n words (split by _) and puts them in t. + // For example, tesseract_foo_bar with N=2 will yield tesseract_foo_. + void GetFirstWords(const char *s, // source string + int n, // number of words + char *t); // target string + + // Find all editable parameters used within tesseract and create a + // SVMenuNode tree from it. + SVMenuNode *BuildListOfAllLeaves(tesseract::Tesseract *tess); + + // Write all (changed_) parameters to a config file. + void WriteParams(char* filename, bool changes_only); + + ScrollView* sv_window_; +}; + +#endif +#endif diff --git a/ccmain/pgedit.cpp b/ccmain/pgedit.cpp index 48d1cfd025..814afb33e9 100755 --- a/ccmain/pgedit.cpp +++ b/ccmain/pgedit.cpp @@ -21,20 +21,22 @@ #pragma warning(disable:4244) // Conversion warnings #endif +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + #include "pgedit.h" #include #include #include "genblob.h" -#include "tessio.h" -#include "tessout.h" #include "tordmain.h" #include "statistc.h" #include "debugwin.h" #include "svshowim.h" -#include "mainblk.h" -#include "varabled.h" +#include "paramsd.h" #include "string.h" #include "scrollview.h" @@ -45,47 +47,31 @@ #include "blread.h" -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - #ifndef GRAPHICS_DISABLED -#define ASC_HEIGHT (2 * bln_baseline_offset + bln_x_height) -#define X_HEIGHT (bln_baseline_offset + bln_x_height) -#define BL_HEIGHT bln_baseline_offset +#define ASC_HEIGHT (2 * kBlnBaselineOffset + kBlnXHeight) +#define X_HEIGHT (kBlnBaselineOffset + kBlnXHeight) +#define BL_HEIGHT kBlnBaselineOffset #define DESC_HEIGHT 0 #define MAXSPACING 128 /*max expected spacing in pix */ const ERRCODE EMPTYBLOCKLIST = "No blocks to edit"; -extern IMAGE page_image; enum CMD_EVENTS { NULL_CMD_EVENT, - DELETE_CMD_EVENT, - COPY_CMD_EVENT, CHANGE_DISP_CMD_EVENT, - CHANGE_TEXT_CMD_EVENT, - TOGGLE_SEG_CMD_EVENT, DUMP_WERD_CMD_EVENT, SHOW_POINT_CMD_EVENT, - ROW_SPACE_STAT_CMD_EVENT, - BLOCK_SPACE_STAT_CMD_EVENT, SHOW_BLN_WERD_CMD_EVENT, - SEGMENT_WERD_CMD_EVENT, + DEBUG_WERD_CMD_EVENT, BOUNDING_BOX_CMD_EVENT, CORRECT_TEXT_CMD_EVENT, POLYGONAL_CMD_EVENT, BL_NORM_CMD_EVENT, BITMAP_CMD_EVENT, - TIDY_CMD_EVENT, - VIEW_CMD_EVENT, IMAGE_CMD_EVENT, BLOCKS_CMD_EVENT, BASELINES_CMD_EVENT, - WRITE_CMD_EVENT, - NEW_SOURCE_CMD_EVENT, UNIFORM_DISP_CMD_EVENT, REFRESH_CMD_EVENT, QUIT_CMD_EVENT, @@ -100,196 +86,51 @@ enum CMD_EVENTS */ ScrollView* image_win; -VariablesEditor* ve; +ParamsEditor* pe; bool stillRunning = false; #ifdef __UNIX__ -FILE *debug_window = NULL; // opened on demand +FILE *debug_window = NULL; // opened on demand #endif - // baseline norm words -ScrollView* bln_word_window = NULL; +ScrollView* bln_word_window = NULL; // baseline norm words -CMD_EVENTS mode = CHANGE_DISP_CMD_EVENT; - // Selected words op +CMD_EVENTS mode = CHANGE_DISP_CMD_EVENT; // selected words op +// These variables should remain global, since they are only used for the +// debug mode (in which only a single Tesseract thread/instance will be exist). BITS16 word_display_mode; BOOL8 display_image = FALSE; BOOL8 display_blocks = FALSE; BOOL8 display_baselines = FALSE; -BOOL8 viewing_source = TRUE; - -BLOCK_LIST *source_block_list = NULL; // image blocks -BLOCK_LIST target_block_list; // target blocks -BLOCK_LIST *other_block_list = &target_block_list; - -BOOL8 source_changed = FALSE; // Changes not saved -BOOL8 target_changed = FALSE; // Changes not saved -BOOL8 *other_image_changed = &target_changed; - - -/* Public globals */ - -#define EXTERN - -EXTERN BLOCK_LIST *current_block_list = NULL; -EXTERN BOOL8 *current_image_changed = &source_changed; - -/* Variables */ - -EXTERN STRING_VAR(editor_image_win_name, "EditorImage", -"Editor image window name"); -EXTERN INT_VAR(editor_image_xpos, 590, "Editor image X Pos"); -EXTERN INT_VAR(editor_image_ypos, 10, "Editor image Y Pos"); -EXTERN INT_VAR(editor_image_menuheight, 50, "Add to image height for menu bar"); -EXTERN INT_VAR(editor_image_word_bb_color, ScrollView::BLUE, -"Word bounding box colour"); -EXTERN INT_VAR(editor_image_blob_bb_color, ScrollView::YELLOW, -"Blob bounding box colour"); -EXTERN INT_VAR(editor_image_text_color, ScrollView::WHITE, -"Correct text colour"); - -EXTERN STRING_VAR(editor_dbwin_name, "EditorDBWin", -"Editor debug window name"); -EXTERN INT_VAR(editor_dbwin_xpos, 50, "Editor debug window X Pos"); -EXTERN INT_VAR(editor_dbwin_ypos, 500, "Editor debug window Y Pos"); -EXTERN INT_VAR(editor_dbwin_height, 24, "Editor debug window height"); -EXTERN INT_VAR(editor_dbwin_width, 80, "Editor debug window width"); - -EXTERN STRING_VAR(editor_word_name, "BlnWords", "BL normalised word window"); -EXTERN INT_VAR(editor_word_xpos, 60, "Word window X Pos"); -EXTERN INT_VAR(editor_word_ypos, 510, "Word window Y Pos"); -EXTERN INT_VAR(editor_word_height, 240, "Word window height"); -EXTERN INT_VAR(editor_word_width, 655, "Word window width"); - -EXTERN double_VAR(editor_smd_scale_factor, 1.0, "Scaling for smd image"); - -/** - * add_word() - * - * Inserts the a word into a specified block list. The list is searched for a - * block and row of the same file as those of the word to be added, which - * contain the bounding box of the word. If such a row is found, the new - * word is inserted into the row in the correct X order. If the - * block is found, but not the row, a copy of the word's old row is added to - * the block in the correct Y order, and the word is put in that row. - * If neither the row nor the block are found, then the word's old block is - * copied with only the word's row. It is added to the block list in the - * correct Y order. - */ - -void add_word( // to block list - WERD *word, //< word to be added - ROW *src_row, //< source row - BLOCK *src_block, //< source block - BLOCK_LIST *dest_block_list //< add to this - ) { - BLOCK_IT block_it(dest_block_list); - BLOCK *block; // current block - BLOCK *dest_block = NULL; // destination block - ROW_IT row_it; - ROW *row; // destination row - ROW *dest_row = NULL; // destination row - WERD_IT word_it; - TBOX word_box = word->bounding_box(); - TBOX insert_point_word_box; - BOOL8 seen_blocks_for_current_file = FALSE; - - block_it.mark_cycle_pt(); - while(!block_it.cycled_list() &&(dest_block == NULL)) { - block = block_it.data(); - if ((block->bounding_box().contains(word_box)) && - (strcmp(block->name(), src_block->name()) == 0)) { - dest_block = block; // found dest block - row_it.set_to_list(block->row_list()); - row_it.mark_cycle_pt(); - while((!row_it.cycled_list()) &&(dest_row == NULL)) { - row = row_it.data(); - if (row->bounding_box().contains(word_box)) - dest_row = row; // found dest row - else - row_it.forward(); - } - } - else - block_it.forward(); - } - - if (dest_block == NULL) { // make a new one - dest_block = new BLOCK; - *dest_block = *src_block; - - block_it.set_to_list(dest_block_list); - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - block = block_it.data(); - - if (!seen_blocks_for_current_file && - (strcmp(block->name(), dest_block->name()) == 0)) - seen_blocks_for_current_file = TRUE; - - if (seen_blocks_for_current_file && - ((strcmp(block->name(), dest_block->name()) != 0) || - (block->bounding_box().top() < - dest_block->bounding_box().top()))) - break; - } - - if (block_it.cycled_list()) - // didn't find insrt pt - block_it.add_to_end(dest_block); - else - // did find insert pt - block_it.add_before_stay_put(dest_block); - } - - if (dest_row == NULL) { // make a new one - dest_row = new ROW; - *dest_row = *src_row; - - row_it.set_to_list(dest_block->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - if (row_it.data()->bounding_box().top() < - dest_row->bounding_box().top()) - break; - } - - if (row_it.cycled_list()) - // didn't find insrt pt - row_it.add_to_end(dest_row); - else - // did find insert pt - row_it.add_before_stay_put(dest_row); - } - - /* dest_block and dest_row are now found or built and inserted as necessesary - so add the word to dest row */ - - word_it.set_to_list(dest_row->word_list()); - for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - if (word_it.data()->bounding_box().right() >= word_box.left()) - break; - } - - if (word_it.cycled_list()) - word_it.add_to_end(word); // didn't find insrt pt - else { // did find insert pt - insert_point_word_box = word_it.data()->bounding_box(); - if (insert_point_word_box.contains(word_box) || - word_box.contains(insert_point_word_box)) - image_win->AddMessage("Refusing to add words which obliterate," - " or are obliterated by, others"); - else { - if (word_it.data()->bounding_box().left() > - word->bounding_box().left()) - // infront of insert pt - word_it.add_before_stay_put(word); - else - // behind insert pt - word_it.add_after_stay_put(word); - } - } -} +PAGE_RES *current_page_res = NULL; + +STRING_VAR(editor_image_win_name, "EditorImage", + "Editor image window name"); +INT_VAR(editor_image_xpos, 590, "Editor image X Pos"); +INT_VAR(editor_image_ypos, 10, "Editor image Y Pos"); +INT_VAR(editor_image_menuheight, 50, "Add to image height for menu bar"); +INT_VAR(editor_image_word_bb_color, ScrollView::BLUE, + "Word bounding box colour"); +INT_VAR(editor_image_blob_bb_color, ScrollView::YELLOW, + "Blob bounding box colour"); +INT_VAR(editor_image_text_color, ScrollView::WHITE, + "Correct text colour"); + +STRING_VAR(editor_dbwin_name, "EditorDBWin", + "Editor debug window name"); +INT_VAR(editor_dbwin_xpos, 50, "Editor debug window X Pos"); +INT_VAR(editor_dbwin_ypos, 500, "Editor debug window Y Pos"); +INT_VAR(editor_dbwin_height, 24, "Editor debug window height"); +INT_VAR(editor_dbwin_width, 80, "Editor debug window width"); + +STRING_VAR(editor_word_name, "BlnWords", "BL normalized word window"); +INT_VAR(editor_word_xpos, 60, "Word window X Pos"); +INT_VAR(editor_word_ypos, 510, "Word window Y Pos"); +INT_VAR(editor_word_height, 240, "Word window height"); +INT_VAR(editor_word_width, 655, "Word window width"); + +STRING_VAR(editor_debug_config_file, "", "Config file to apply to single words"); class BlnEventHandler : public SVEventHandler { public: @@ -297,7 +138,7 @@ class BlnEventHandler : public SVEventHandler { if (sv_event->type == SVET_DESTROY) bln_word_window = NULL; else if (sv_event->type == SVET_CLICK) - show_point(current_block_list, sv_event->x, sv_event->y); + show_point(current_page_res, sv_event->x, sv_event->y); } }; @@ -327,106 +168,21 @@ ScrollView* bln_word_window_handle() { // return handle * new window needs to be. Create it and re-display. */ -void build_image_window(TBOX page_bounding_box) { +void build_image_window(int width, int height) { if (image_win != NULL) { delete image_win; } image_win = new ScrollView(editor_image_win_name.string(), editor_image_xpos, editor_image_ypos, - page_bounding_box.right() + 1, - page_bounding_box.top() + - editor_image_menuheight + 1, - page_bounding_box.right() + 1, - page_bounding_box.top() + 1, + width + 1, + height + editor_image_menuheight + 1, + width + 1, + height + 1, true); } - -/** - * build_menu() - * - * Construct the menu tree used by the command window - */ -namespace tesseract { -SVMenuNode *Tesseract::build_menu_new() { - - SVMenuNode* parent_menu; - SVMenuNode* root_menu_item = new SVMenuNode(); - - SVMenuNode* modes_menu_item = root_menu_item->AddChild("MODES"); - - modes_menu_item->AddChild("Change Display", - CHANGE_DISP_CMD_EVENT); - modes_menu_item->AddChild("Delete", - DELETE_CMD_EVENT); - modes_menu_item->AddChild("Copy to TARGET", - COPY_CMD_EVENT); - modes_menu_item->AddChild("Change Text", - CHANGE_TEXT_CMD_EVENT); - modes_menu_item->AddChild("Toggle Correct Seg Flg", - TOGGLE_SEG_CMD_EVENT); - modes_menu_item->AddChild("Dump Word", - DUMP_WERD_CMD_EVENT); - modes_menu_item->AddChild("Show Point", - SHOW_POINT_CMD_EVENT); - modes_menu_item->AddChild("Row gaps hist", - ROW_SPACE_STAT_CMD_EVENT); - modes_menu_item->AddChild("Block gaps hist", - BLOCK_SPACE_STAT_CMD_EVENT); - modes_menu_item->AddChild("Show BL Norm Word", - SHOW_BLN_WERD_CMD_EVENT); - modes_menu_item->AddChild("Re-Segment Word", - SEGMENT_WERD_CMD_EVENT); - modes_menu_item->AddChild("Recog Words", - RECOG_WERDS); - modes_menu_item->AddChild("Recog Blobs", - RECOG_PSEUDO); - - parent_menu = root_menu_item->AddChild("DISPLAY"); - - parent_menu->AddChild("Bounding Boxes", - BOUNDING_BOX_CMD_EVENT, FALSE); - parent_menu->AddChild("Correct Text", - CORRECT_TEXT_CMD_EVENT, FALSE); - parent_menu->AddChild("Polygonal Approx", - POLYGONAL_CMD_EVENT, FALSE); - parent_menu->AddChild("Baseline Normalised", - BL_NORM_CMD_EVENT, FALSE); - parent_menu->AddChild("Edge Steps", - BITMAP_CMD_EVENT, TRUE); - - parent_menu = root_menu_item->AddChild("OTHER"); - - parent_menu->AddChild("Quit", - QUIT_CMD_EVENT); - parent_menu->AddChild("Tidy Target", - TIDY_CMD_EVENT); - - parent_menu->AddChild("View TARGET", - VIEW_CMD_EVENT, FALSE); - parent_menu->AddChild("Show Image", - IMAGE_CMD_EVENT, FALSE); - parent_menu->AddChild("ShowBlock Outlines", - BLOCKS_CMD_EVENT, FALSE); - parent_menu->AddChild("Show Baselines", - BASELINES_CMD_EVENT, FALSE); - parent_menu->AddChild("Write File", - WRITE_CMD_EVENT, imagebasename.string()); - parent_menu->AddChild("New Source File", - NEW_SOURCE_CMD_EVENT, imagebasename.string()); - parent_menu->AddChild("Uniform Display", - UNIFORM_DISP_CMD_EVENT); - parent_menu->AddChild("Refresh Display", - REFRESH_CMD_EVENT); - - return root_menu_item; -} - -} // namespace tesseract - - /** * display_bln_lines() * - * Display normalised baseline, x-height, ascender limit and descender limit + * Display normalized baseline, x-height, ascender limit and descender limit */ void display_bln_lines(ScrollView* window, @@ -446,251 +202,97 @@ void display_bln_lines(ScrollView* window, maxx, y_offset + scale_factor * ASC_HEIGHT); } - /** - * do_new_source() + * notify() * - * Change to another source file. Automatically tidy page first + * Event handler that processes incoming events, either forwarding + * them to process_cmd_win_event or process_image_event. * */ -namespace tesseract { -void Tesseract::do_new_source( // serialise - ) { - FILE *infp; // input file - - char* name = image_win->ShowInputDialog("New Source File name"); - - STRING name_str(name); - delete[] name; - - if (source_changed) { - - int a = image_win->ShowYesNoDialog( - "Source changes will be LOST. Continue?(Y/N)"); - if (a != 'y') { image_win->AddMessage("Write cancelled"); return; } +void PGEventHandler::Notify(const SVEvent* event) { + char myval = '0'; + if (event->type == SVET_POPUP) { + pe->Notify(event); + } // These are handled by ParamsEditor + else if (event->type == SVET_EXIT) { stillRunning = false; } + else if (event->type == SVET_MENU) { + if (strcmp(event->parameter, "true") == 0) { myval = 'T'; } + else if (strcmp(event->parameter, "false") == 0) { myval = 'F'; } + tess_->process_cmd_win_event(event->command_id, &myval); } - - // if not file exists - if (!(infp = fopen(name_str.string(), "r"))) { - - image_win->AddMessage("Cant open file " "%s" "", name_str.string()); - return; + else { + tess_->process_image_event(*event); } - - fclose(infp); - - image_win->AddMessage("Reading file " "%s" "...", name_str.string()); - source_block_list->clear(); - // appends to SOURCE - pgeditor_read_file(name_str, source_block_list); - source_changed = FALSE; - - image_win->AddMessage("Doing automatic Tidy Target..."); - viewing_source = FALSE; // Force viewing source - do_tidy_cmd(); - - image_win->AddMessage("Doing automatic Tidy Target...Done"); - } -} // namespace tesseract - /** - * do_re_display() + * build_menu() * - * Redisplay page + * Construct the menu tree used by the command window */ +namespace tesseract { +SVMenuNode *Tesseract::build_menu_new() { + SVMenuNode* parent_menu; + SVMenuNode* root_menu_item = new SVMenuNode(); -void - // function to call -do_re_display(BOOL8 word_painter( -BLOCK *, ROW *, WERD *)) { - BLOCK_IT block_it(current_block_list); - BLOCK *block; - int block_count = 1; - - ROW_IT row_it; - ROW *row; - - WERD_IT word_it; - WERD *word; - - image_win->Clear(); - if (display_image != 0) { - sv_show_sub_image(&page_image, 0, 0, - page_image.get_xsize(), page_image.get_ysize(), - image_win, 0, 0); - } - - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - block = block_it.data(); - row_it.set_to_list(block->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - row = row_it.data(); - word_it.set_to_list(row->word_list()); - for (word_it.mark_cycle_pt(); - !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); - word_painter(block, row, word); - } - if (display_baselines) - row->plot_baseline(image_win, ScrollView::GREEN); - } - if (display_blocks) - block->plot(image_win, block_count++, ScrollView::RED); - } - image_win->Update(); -} - + SVMenuNode* modes_menu_item = root_menu_item->AddChild("MODES"); -/** - * do_tidy_cmd() - * - * Tidy TARGET page - */ + modes_menu_item->AddChild("Change Display", CHANGE_DISP_CMD_EVENT); + modes_menu_item->AddChild("Dump Word", DUMP_WERD_CMD_EVENT); + modes_menu_item->AddChild("Show Point", SHOW_POINT_CMD_EVENT); + modes_menu_item->AddChild("Show BL Norm Word", SHOW_BLN_WERD_CMD_EVENT); + modes_menu_item->AddChild("Config Words", DEBUG_WERD_CMD_EVENT); + modes_menu_item->AddChild("Recog Words", RECOG_WERDS); + modes_menu_item->AddChild("Recog Blobs", RECOG_PSEUDO); -const TBOX do_tidy_cmd() { // tidy - ICOORD shift_vector; - TBOX tidy_box; // Just the tidy area - TBOX source_box; // source file area + parent_menu = root_menu_item->AddChild("DISPLAY"); - source_box = block_list_bounding_box(source_block_list); - // find src area + parent_menu->AddChild("Bounding Boxes", BOUNDING_BOX_CMD_EVENT, FALSE); + parent_menu->AddChild("Correct Text", CORRECT_TEXT_CMD_EVENT, FALSE); + parent_menu->AddChild("Polygonal Approx", POLYGONAL_CMD_EVENT, FALSE); + parent_menu->AddChild("Baseline Normalized", BL_NORM_CMD_EVENT, FALSE); + parent_menu->AddChild("Edge Steps", BITMAP_CMD_EVENT, TRUE); - if (!target_block_list.empty()) { - tidy_box = block_list_compress(&target_block_list); + parent_menu = root_menu_item->AddChild("OTHER"); - /* Shift tidied target above the source image area. */ + parent_menu->AddChild("Quit", QUIT_CMD_EVENT); + parent_menu->AddChild("Show Image", IMAGE_CMD_EVENT, FALSE); + parent_menu->AddChild("ShowBlock Outlines", BLOCKS_CMD_EVENT, FALSE); + parent_menu->AddChild("Show Baselines", BASELINES_CMD_EVENT, FALSE); + parent_menu->AddChild("Uniform Display", UNIFORM_DISP_CMD_EVENT); + parent_menu->AddChild("Refresh Display", REFRESH_CMD_EVENT); - shift_vector = ICOORD(0, source_box.top() + BLOCK_SPACING) - - tidy_box.botleft(); - block_list_move(&target_block_list, shift_vector); - tidy_box.move(shift_vector); - } - source_box += tidy_box; - // big enough for both - build_image_window(source_box); - do_view_cmd(); - return tidy_box; + return root_menu_item; } - /** - * do_view_cmd() + * do_re_display() * - * View TARGET/View SOURCE command + * Redisplay page */ +void Tesseract::do_re_display( + BOOL8 (tesseract::Tesseract::*word_painter)(BLOCK* block, + ROW* row, + WERD_RES* word_res)) { + PAGE_RES_IT pr_it(current_page_res); + int block_count = 1; -void do_view_cmd() { - viewing_source = !viewing_source; image_win->Clear(); - if (viewing_source) { - current_block_list = source_block_list; - current_image_changed = &source_changed; - other_block_list = &target_block_list; - other_image_changed = &target_changed; - do_re_display(&word_display); - } - else { - current_block_list = &target_block_list; - current_image_changed = &target_changed; - other_block_list = source_block_list; - other_image_changed = &source_changed; - do_re_display(&word_display); - } -} - - -/** - * do_write_file() - * - * Serialise a block list to file - * - * If writing image, tidy page and move to(0,0) first - */ - -void do_write_file( // serialise - ) { - - char* name = image_win->ShowInputDialog("File Name"); - - FILE *infp; // input file - char msg_str[80]; - - TBOX enclosing_box; - - // if file exists - if ((infp = fopen(name, "r")) != NULL) { - fclose(infp); - sprintf(msg_str, "Overwrite file " "%s" "?(Y/N)", name); - - int a = image_win->ShowYesNoDialog(msg_str); - if (a != 'y') { image_win->AddMessage("Write cancelled"); delete[] name; return; } + if (display_image != 0) { + image_win->Image(pix_binary_, 0, 0); } - infp = fopen(name, "w"); // can we write to it? - if (infp == NULL) { - - image_win->AddMessage("Cant write to file " "%s" "", name); - delete[] name; - return; - } - fclose(infp); - - delete [] name; - - if (!viewing_source && !target_block_list.empty()) { - // Tidy & move to(0,0) - image_win->AddMessage("Automatic tidy..."); - viewing_source = TRUE; // Stay viewing target! - enclosing_box = do_tidy_cmd(); - block_list_move(&target_block_list, -enclosing_box.botleft()); - image_win->AddMessage("Writing file..."); - pgeditor_write_file(name, &target_block_list); - // move back - block_list_move(&target_block_list, - enclosing_box.botleft()); - } - else { - image_win->AddMessage("Writing file..."); - pgeditor_write_file(name, current_block_list); + for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) { + (this->*word_painter)(pr_it.block()->block, pr_it.row()->row, word); + if (display_baselines && pr_it.row() != pr_it.prev_row()) + pr_it.row()->row->plot_baseline(image_win, ScrollView::GREEN); + if (display_blocks && pr_it.block() != pr_it.prev_block()) + pr_it.block()->block->plot(image_win, block_count++, ScrollView::RED); } - image_win->AddMessage("Writing file...Done"); - *current_image_changed = FALSE; - -} - -/** - * notify() - * - * Event handler that processes incoming events, either forwarding - * them to process_cmd_win_event or process_image_event. - * - */ - -void PGEventHandler::Notify(const SVEvent* event) { - char myval = '0'; - if (event->type == SVET_POPUP) { -ve->Notify(event); - } // These are handled by Var. Editor - else if (event->type == SVET_EXIT) { stillRunning = false; } - else if (event->type == SVET_MENU) { - if (strcmp(event->parameter, "true") == 0) { myval = 'T'; } - else if (strcmp(event->parameter, "false") == 0) { myval = 'F'; } - tess_->process_cmd_win_event(event->command_id, &myval); - } - else { - tess_->process_image_event(*event); - // else pgeditor_show_point(*event); - } - current_word_quit.set_value(FALSE); - selection_quit.set_value(FALSE); - // replot all var wins + image_win->Update(); } - /** * pgeditor_main() * @@ -699,21 +301,19 @@ ve->Notify(event); * */ -namespace tesseract { -void Tesseract::pgeditor_main(BLOCK_LIST *blocks) { +void Tesseract::pgeditor_main(int width, int height, PAGE_RES *page_res) { - source_block_list = blocks; - current_block_list = blocks; - if (current_block_list->empty()) + current_page_res = page_res; + if (current_page_res->block_res_list.empty()) return; stillRunning = true; - build_image_window(block_list_bounding_box(source_block_list)); + build_image_window(width, height); word_display_mode.turn_on_bit(DF_EDGE_STEP); - do_re_display(&word_set_display); + do_re_display(&tesseract::Tesseract::word_set_display); #ifndef GRAPHICS_DISABLED - ve = new VariablesEditor(this, image_win); + pe = new ParamsEditor(this, image_win); #endif PGEventHandler pgEventHandler(this); @@ -742,34 +342,6 @@ void pgeditor_msg( // message display image_win->AddMessage(msg); } - -/** - * pgeditor_read_file() - * - * Deserialise source file - */ - -namespace tesseract { -void Tesseract::pgeditor_read_file( // of serialised file - STRING &filename, - BLOCK_LIST *blocks // block list to add to - ) { - STRING name = filename; //truncated name - const char *lastdot; //of name - TO_BLOCK_LIST land_blocks, port_blocks; - TBOX page_box; - - lastdot = strrchr (name.string (), '.'); - if (lastdot != NULL) - name[lastdot-name.string()] = '\0'; - if (!read_unlv_file(name, page_image.get_xsize(), page_image.get_ysize(), - blocks)) - FullPageBlock(page_image.get_xsize(), page_image.get_ysize(), blocks); - find_components(blocks, &land_blocks, &port_blocks, &page_box); - textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks, this); -} -} // namespace tesseract - /** * pgeditor_show_point() * @@ -781,43 +353,6 @@ void pgeditor_show_point( // display coords image_win->AddMessage("Pointing at(%d, %d)", event->x, event->y); } - -/** - * pgeditor_write_file() - * - * Serialise a block list to file - * - */ - -void pgeditor_write_file( // serialise - char *name, // file name - BLOCK_LIST *blocks // block list to write - ) { - FILE *infp; // input file - BLOCK_IT block_it(blocks); // block iterator - BLOCK *block; // current block - ROW_IT row_it; // row iterator - - infp = fopen(name, "w"); // create output file - if (infp == NULL) - CANTCREATEFILE.error("pgeditor_write_file", EXIT, name); - - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - block = block_it.extract(); - - row_it.set_to_list(block->row_list()); - for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) - // ensure correct - row_it.data()->recalc_bounding_box(); - - block->serialise(infp); // serialize non-empty - block_it.add_after_then_move(block); - } - fclose(infp); -} - - /** * process_cmd_win_event() * @@ -837,25 +372,17 @@ BOOL8 Tesseract::process_cmd_win_event( // UI command semantics case NULL_CMD_EVENT: break; - case VIEW_CMD_EVENT: - do_view_cmd(); - break; case CHANGE_DISP_CMD_EVENT: - case DELETE_CMD_EVENT: - case CHANGE_TEXT_CMD_EVENT: - case TOGGLE_SEG_CMD_EVENT: case DUMP_WERD_CMD_EVENT: case SHOW_POINT_CMD_EVENT: - case ROW_SPACE_STAT_CMD_EVENT: - case BLOCK_SPACE_STAT_CMD_EVENT: case SHOW_BLN_WERD_CMD_EVENT: - case SEGMENT_WERD_CMD_EVENT: + case RECOG_WERDS: + case RECOG_PSEUDO: mode =(CMD_EVENTS) cmd_event; break; - case COPY_CMD_EVENT: - mode =(CMD_EVENTS) cmd_event; - if (!viewing_source) - image_win->AddMessage("Can't COPY while viewing target!"); + case DEBUG_WERD_CMD_EVENT: + mode = DEBUG_WERD_CMD_EVENT; + word_config_ = image_win->ShowInputDialog("Config File Name"); break; case BOUNDING_BOX_CMD_EVENT: if (new_value[0] == 'T') @@ -893,53 +420,27 @@ BOOL8 Tesseract::process_cmd_win_event( // UI command semantics mode = CHANGE_DISP_CMD_EVENT; break; case UNIFORM_DISP_CMD_EVENT: - do_re_display(&word_set_display); - *current_image_changed = TRUE; - break; - case WRITE_CMD_EVENT: - do_write_file(); - break; - case TIDY_CMD_EVENT: - if (!target_block_list.empty()) { - viewing_source = TRUE; // Force viewing target - do_tidy_cmd(); - } - break; - case NEW_SOURCE_CMD_EVENT: - do_new_source(); + do_re_display(&tesseract::Tesseract::word_set_display); break; case IMAGE_CMD_EVENT: display_image =(new_value[0] == 'T'); - do_re_display(&word_display); + do_re_display(&tesseract::Tesseract::word_display); break; case BLOCKS_CMD_EVENT: display_blocks =(new_value[0] == 'T'); - do_re_display(&word_display); + do_re_display(&tesseract::Tesseract::word_display); break; case BASELINES_CMD_EVENT: display_baselines =(new_value[0] == 'T'); - do_re_display(&word_display); + do_re_display(&tesseract::Tesseract::word_display); break; case REFRESH_CMD_EVENT: - do_re_display(&word_display); + do_re_display(&tesseract::Tesseract::word_display); break; case QUIT_CMD_EVENT: - if (source_changed || target_changed) { - int a = image_win->ShowYesNoDialog( - "Changes not saved. Exit anyway?(Y/N)"); - if (a == 'y') { exit = TRUE; ScrollView::Exit(); } - } - else { - exit = TRUE; - ScrollView::Exit(); - } + exit = TRUE; + ScrollView::Exit(); break; - case RECOG_WERDS: - mode = RECOG_WERDS; - break; - case RECOG_PSEUDO: - mode = RECOG_PSEUDO; - break; default: sprintf(msg, "Unrecognised event " INT32FORMAT "(%s)", @@ -962,6 +463,8 @@ BOOL8 Tesseract::process_cmd_win_event( // UI command semantics */ void Tesseract::process_image_event( // action in image win const SVEvent &event) { + // The following variable should remain static, since it is used by + // debug editor, which uses a single Tesseract instance. static ICOORD down; ICOORD up; TBOX selection_box; @@ -971,10 +474,10 @@ void Tesseract::process_image_event( // action in image win case SVET_SELECTION: if (event.type == SVET_SELECTION) { - down.set_x(event.x - event.x_size); + down.set_x(event.x + event.x_size); down.set_y(event.y + event.y_size); if (mode == SHOW_POINT_CMD_EVENT) - show_point(current_block_list, event.x, event.y); + show_point(current_page_res, event.x, event.y); } up.set_x(event.x); @@ -984,64 +487,36 @@ void Tesseract::process_image_event( // action in image win switch(mode) { case CHANGE_DISP_CMD_EVENT: - ::process_selected_words(current_block_list, - selection_box, - &word_blank_and_set_display); + process_selected_words( + current_page_res, + selection_box, + &tesseract::Tesseract::word_blank_and_set_display); break; - case COPY_CMD_EVENT: - if (!viewing_source) - image_win->AddMessage("Can't COPY while viewing target!"); - else - ::process_selected_words(current_block_list, - selection_box, - &word_copy); - break; - case DELETE_CMD_EVENT: - ::process_selected_words_it(current_block_list, - selection_box, - &word_delete); - break; - case CHANGE_TEXT_CMD_EVENT: - ::process_selected_words(current_block_list, + case DUMP_WERD_CMD_EVENT: + process_selected_words(current_page_res, selection_box, - &word_change_text); - break; - case TOGGLE_SEG_CMD_EVENT: - ::process_selected_words(current_block_list, - selection_box, - &word_toggle_seg); - break; - case DUMP_WERD_CMD_EVENT: - ::process_selected_words(current_block_list, - selection_box, - &word_dumper); + &tesseract::Tesseract::word_dumper); break; case SHOW_BLN_WERD_CMD_EVENT: - ::process_selected_words(current_block_list, - selection_box, - &word_bln_display); - break; - case SEGMENT_WERD_CMD_EVENT: - re_segment_word(current_block_list, selection_box); - break; - case ROW_SPACE_STAT_CMD_EVENT: - row_space_stat(current_block_list, selection_box); + process_selected_words(current_page_res, + selection_box, + &tesseract::Tesseract::word_bln_display); break; - case BLOCK_SPACE_STAT_CMD_EVENT: - block_space_stat(current_block_list, selection_box); + case DEBUG_WERD_CMD_EVENT: + debug_word(current_page_res, selection_box); break; case SHOW_POINT_CMD_EVENT: break; // ignore up event case RECOG_WERDS: image_win->AddMessage("Recogging selected words"); - this->process_selected_words(current_block_list, + this->process_selected_words(current_page_res, selection_box, &Tesseract::recog_interactive); break; case RECOG_PSEUDO: image_win->AddMessage("Recogging selected blobs"); - recog_pseudo_word(current_block_list, selection_box); + recog_pseudo_word(current_page_res, selection_box); break; default: @@ -1053,328 +528,17 @@ void Tesseract::process_image_event( // action in image win break; } } -} // namespace tesseract - - -/** - * re_scale_and_move_bln_word() - * - * Scale and move a bln word so that it fits in a specified bounding box. - * Scale by width or height to generate the largest image - */ - -float re_scale_and_move_bln_word( // put bln word in box - WERD *norm_word, //< BL normalised word - const TBOX &box //< destination box - ) { - TBOX norm_box = norm_word->bounding_box(); - float width_scale_factor; - float height_scale_factor; - float selected_scale_factor; - - width_scale_factor = box.width() /(float) norm_box.width(); - height_scale_factor = box.height() /(float) ASC_HEIGHT; - - if ((ASC_HEIGHT * width_scale_factor) <= box.height()) - selected_scale_factor = width_scale_factor; - else - selected_scale_factor = height_scale_factor; - - norm_word->scale(selected_scale_factor); - norm_word->move(ICOORD((box.left() + box.width() / 2), box.bottom())); - return selected_scale_factor; -} - /** - * re_segment_word() - * - * If all selected blobs are in the same row, remove them from their current - * word(s) and put them in a new word. Insert the new word in the row at the - * appropriate point. Delete any empty words. + * debug_word * + * Process the whole image, but load word_config_ for the selected word(s). */ - -void re_segment_word( // break/join words - BLOCK_LIST *block_list, // blocks to check - TBOX &selection_box) { - BLOCK_IT block_it(block_list); - BLOCK *block; - BLOCK *block_to_process = NULL; - ROW_IT row_it; - ROW *row; - ROW *row_to_process = NULL; - WERD_IT word_it; - WERD *word; - WERD *new_word = NULL; - BOOL8 polyg = false; - PBLOB_IT blob_it; - PBLOB_LIST dummy; // Just to initialize new_blob_it. - PBLOB_IT new_blob_it = &dummy; - PBLOB *blob; - - /* Find row to process - error if selections from more than one row */ - - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - block = block_it.data(); - if (block->bounding_box().overlap(selection_box)) { - row_it.set_to_list(block->row_list()); - for (row_it.mark_cycle_pt(); - !row_it.cycled_list(); row_it.forward()) { - row = row_it.data(); - if (row->bounding_box().overlap(selection_box)) { - if (row_to_process == NULL) { - block_to_process = block; - row_to_process = row; - } - else { - image_win->AddMessage("Cant resegment words " - "in more than one row"); - return; - } - } - } - } - } - /* Continue with row_to_process */ - - word_it.set_to_list(row_to_process->word_list()); - for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); - polyg = word->flag(W_POLYGON); - if (word->bounding_box().overlap(selection_box)) { - blob_it.set_to_list(word->gblob_list()); - for (blob_it.mark_cycle_pt(); - !blob_it.cycled_list(); blob_it.forward()) { - blob = blob_it.data(); - if (gblob_bounding_box(blob, polyg).overlap(selection_box)) { - if (new_word == NULL) { - new_word = word->shallow_copy(); - new_blob_it.set_to_list(new_word->gblob_list()); - } - new_blob_it.add_to_end(blob_it.extract()); - // move blob - } - } - if (blob_it.empty()) { // no blobs in word - // so delete word - delete word_it.extract(); - } - } - } - if (new_word != NULL) { - gblob_sort_list(new_word->gblob_list(), polyg); - word_it.add_to_end(new_word); - word_it.sort(word_comparator); - row_to_process->bounding_box().plot(image_win, - ScrollView::BLACK, ScrollView::BLACK); - word_it.set_to_list(row_to_process->word_list()); - for (word_it.mark_cycle_pt(); - !word_it.cycled_list(); word_it.forward()) - word_display(block_to_process, row_to_process, word_it.data()); - *current_image_changed = TRUE; - } -} - -/// show space stats -void block_space_stat(BLOCK_LIST *block_list, // blocks to check - TBOX &selection_box) { - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - int block_idx = 0; - STATS all_gap_stats(0, MAXSPACING); - WERD_IT word_it; - WERD *word; - PBLOB_IT blob_it; - PBLOB *blob; - C_BLOB_IT cblob_it; - C_BLOB *cblob; - TBOX box; - inT16 prev_box_right; - inT16 gap_width; - inT16 min_inter_word_gap; - inT16 max_inter_char_gap; - - /* Find blocks to process */ - - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - block_idx++; - block = block_it.data(); - if (block->bounding_box().overlap(selection_box)) { - /* Process a block */ - tprintf("\nBlock %d\n", block_idx); - min_inter_word_gap = 3000; - max_inter_char_gap = 0; - all_gap_stats.clear(); - row_it.set_to_list(block->row_list()); - for (row_it.mark_cycle_pt(); - !row_it.cycled_list(); row_it.forward()) { - row = row_it.data(); - prev_box_right = -1; - word_it.set_to_list(row->word_list()); - for (word_it.mark_cycle_pt(); - !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); - if (word->flag(W_POLYGON)) { - blob_it.set_to_list(word->blob_list()); - for (blob_it.mark_cycle_pt(); - !blob_it.cycled_list(); blob_it.forward()) { - blob = blob_it.data(); - box = blob->bounding_box(); - if (prev_box_right > -1) { - gap_width = box.left() - prev_box_right; - all_gap_stats.add(gap_width, 1); - if (blob_it.at_first()) { - if (gap_width < min_inter_word_gap) - min_inter_word_gap = gap_width; - } - else { - if (gap_width > max_inter_char_gap) - max_inter_char_gap = gap_width; - } - } - prev_box_right = box.right(); - } - } - else { - cblob_it.set_to_list(word->cblob_list()); - for (cblob_it.mark_cycle_pt(); - !cblob_it.cycled_list(); cblob_it.forward()) { - cblob = cblob_it.data(); - box = cblob->bounding_box(); - if (prev_box_right > -1) { - gap_width = box.left() - prev_box_right; - all_gap_stats.add(gap_width, 1); - if (cblob_it.at_first()) { - if (gap_width < min_inter_word_gap) - min_inter_word_gap = gap_width; - } - else { - if (gap_width > max_inter_char_gap) - max_inter_char_gap = gap_width; - } - } - prev_box_right = box.right(); - } - } - } - } - tprintf("Max inter char gap = %d.\nMin inter word gap = %d.\n", - max_inter_char_gap, min_inter_word_gap); - all_gap_stats.short_print(NULL, TRUE); - all_gap_stats.smooth(2); - tprintf("SMOOTHED DATA...\n"); - all_gap_stats.short_print(NULL, TRUE); - } - } -} - -/// show space stats -void row_space_stat(BLOCK_LIST *block_list, // blocks to check - TBOX &selection_box) { - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - int block_idx = 0; - int row_idx; - STATS all_gap_stats(0, MAXSPACING); - WERD_IT word_it; - WERD *word; - PBLOB_IT blob_it; - PBLOB *blob; - C_BLOB_IT cblob_it; - C_BLOB *cblob; - TBOX box; - inT16 prev_box_right; - inT16 gap_width; - inT16 min_inter_word_gap; - inT16 max_inter_char_gap; - - /* Find rows to process */ - - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - block_idx++; - block = block_it.data(); - if (block->bounding_box().overlap(selection_box)) { - row_it.set_to_list(block->row_list()); - row_idx = 0; - for (row_it.mark_cycle_pt(); - !row_it.cycled_list(); row_it.forward()) { - row_idx++; - row = row_it.data(); - if (row->bounding_box().overlap(selection_box)) { - /* Process a row */ - - tprintf("\nBlock %d Row %d\n", block_idx, row_idx); - min_inter_word_gap = 3000; - max_inter_char_gap = 0; - prev_box_right = -1; - all_gap_stats.clear(); - word_it.set_to_list(row->word_list()); - for (word_it.mark_cycle_pt(); - !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); - if (word->flag(W_POLYGON)) { - blob_it.set_to_list(word->blob_list()); - for (blob_it.mark_cycle_pt(); - !blob_it.cycled_list(); blob_it.forward()) { - blob = blob_it.data(); - box = blob->bounding_box(); - if (prev_box_right > -1) { - gap_width = box.left() - prev_box_right; - all_gap_stats.add(gap_width, 1); - if (blob_it.at_first()) { - if (gap_width < min_inter_word_gap) - min_inter_word_gap = gap_width; - } - else { - if (gap_width > max_inter_char_gap) - max_inter_char_gap = gap_width; - } - } - prev_box_right = box.right(); - } - } - else { - cblob_it.set_to_list(word->cblob_list()); - for (cblob_it.mark_cycle_pt(); - !cblob_it.cycled_list(); cblob_it.forward()) { - cblob = cblob_it.data(); - box = cblob->bounding_box(); - if (prev_box_right > -1) { - gap_width = box.left() - prev_box_right; - all_gap_stats.add(gap_width, 1); - if (cblob_it.at_first()) { - if (gap_width < min_inter_word_gap) - min_inter_word_gap = gap_width; - } - else { - if (gap_width > max_inter_char_gap) - max_inter_char_gap = gap_width; - } - } - prev_box_right = box.right(); - } - } - } - tprintf - ("Max inter char gap = %d.\nMin inter word gap = %d.\n", - max_inter_char_gap, min_inter_word_gap); - all_gap_stats.short_print(NULL, TRUE); - all_gap_stats.smooth(2); - tprintf("SMOOTHED DATA...\n"); - all_gap_stats.short_print(NULL, TRUE); - } - } - } - } +void Tesseract::debug_word(PAGE_RES* page_res, const TBOX &selection_box) { + ResetAdaptiveClassifier(); + recog_all_words(page_res, NULL, &selection_box, word_config_.string(), 0); } +} // namespace tesseract /** @@ -1384,87 +548,37 @@ void row_space_stat(BLOCK_LIST *block_list, // blocks to check * row baseline */ -void show_point( // display posn of bloba word - BLOCK_LIST *block_list, // blocks to check - float x, - float y) { +void show_point(PAGE_RES* page_res, float x, float y) { FCOORD pt(x, y); - TBOX box; - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - PBLOB_IT blob_it; - PBLOB *blob; - C_BLOB_IT cblob_it; - C_BLOB *cblob; + PAGE_RES_IT pr_it(page_res); char msg[160]; char *msg_ptr = msg; msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y); - for (block_it.mark_cycle_pt(); - !block_it.cycled_list(); block_it.forward()) { - block = block_it.data(); - if (block->bounding_box().contains(pt)) { - row_it.set_to_list(block->row_list()); - for (row_it.mark_cycle_pt(); - !row_it.cycled_list(); row_it.forward()) { - row = row_it.data(); - if (row->bounding_box().contains(pt)) { - msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", - row->base_line(x)); - - word_it.set_to_list(row->word_list()); - for (word_it.mark_cycle_pt(); - !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); - box = word->bounding_box(); - if (box.contains(pt)) { - msg_ptr += sprintf(msg_ptr, - "Wd(%d, %d)/(%d, %d) ", - box.left(), box.bottom(), - box.right(), box.top()); - - if (word->flag(W_POLYGON)) { - blob_it.set_to_list(word->blob_list()); - for (blob_it.mark_cycle_pt(); - !blob_it.cycled_list(); - blob_it.forward()) { - blob = blob_it.data(); - box = blob->bounding_box(); - if (box.contains(pt)) { - msg_ptr += sprintf(msg_ptr, - "Blb(%d, %d)/(%d, %d) ", - box.left(), - box.bottom(), - box.right(), - box.top()); - } - } - } - else { - cblob_it.set_to_list(word->cblob_list()); - for (cblob_it.mark_cycle_pt(); - !cblob_it.cycled_list(); - cblob_it.forward()) { - cblob = cblob_it.data(); - box = cblob->bounding_box(); - if (box.contains(pt)) { - msg_ptr += sprintf(msg_ptr, - "CBlb(%d, %d)/(%d, %d) ", - box.left(), - box.bottom(), - box.right(), - box.top()); - } - } - } - } - } + for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) { + if (pr_it.row() != pr_it.prev_row() && + pr_it.row()->row->bounding_box().contains(pt)) { + msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", + pr_it.row()->row->base_line(x)); + } + if (word->word->bounding_box().contains(pt)) { + TBOX box = word->word->bounding_box(); + msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", + box.left(), box.bottom(), + box.right(), box.top()); + C_BLOB_IT cblob_it(word->word->cblob_list()); + for (cblob_it.mark_cycle_pt(); + !cblob_it.cycled_list(); + cblob_it.forward()) { + C_BLOB* cblob = cblob_it.data(); + box = cblob->bounding_box(); + if (box.contains(pt)) { + msg_ptr += sprintf(msg_ptr, + "CBlb(%d, %d)/(%d, %d) ", + box.left(), box.bottom(), + box.right(), box.top()); } } } @@ -1492,144 +606,48 @@ void show_point( // display posn of bloba word * Blank display of word then redisplay word according to current display mode * settings */ - -BOOL8 word_blank_and_set_display( // display a word - BLOCK *block, // block holding word - ROW *row, // row holding word - WERD *word // word to be processed - ) { - word->bounding_box().plot(image_win, ScrollView::BLACK, ScrollView::BLACK); - return word_set_display(block, row, word); +namespace tesseract { +BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row, + WERD_RES* word_res) { + word_res->word->bounding_box().plot(image_win, ScrollView::BLACK, + ScrollView::BLACK); + return word_set_display(block, row, word_res); } /** * word_bln_display() * - * Normalise word and display in word window + * Normalize word and display in word window */ - -BOOL8 word_bln_display( // bln & display - BLOCK *, // block holding word - ROW *row, // row holding word - WERD *word // word to be processed - ) { - WERD *bln_word; - - bln_word = word->poly_copy(row->x_height()); - bln_word->baseline_normalise(row); +BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) { + TWERD *bln_word = word_res->chopped_word; + if (bln_word == NULL) { + word_res->SetupForRecognition(unicharset, false, row, block); + bln_word = word_res->chopped_word; + } bln_word_window_handle()->Clear(); display_bln_lines(bln_word_window_handle(), ScrollView::CYAN, 1.0, 0.0f, -1000.0f, 1000.0f); - bln_word->plot(bln_word_window_handle(), ScrollView::RED); - delete bln_word; - return TRUE; -} - - -/** - * word_change_text() - * - * Change the correct text of a word - */ - -BOOL8 word_change_text( // change correct text - BLOCK *block, // block holding word - ROW *row, // row holding word - WERD *word // word to be processed - ) { - char* cp = image_win->ShowInputDialog( - "Enter/edit the correct text and press <>"); - word->set_text(cp); - delete[] cp; - - if (word_display_mode.bit(DF_TEXT) || word->display_flag(DF_TEXT)) { - word_blank_and_set_display(block, row, word); - ScrollView::Update(); - } - - *current_image_changed = TRUE; + bln_word->plot(bln_word_window_handle()); + bln_word_window_handle()->Update(); return TRUE; } -/** - * word_copy() - * - * Copy a word to other display list - */ - -BOOL8 word_copy( // copy a word - BLOCK *block, // block holding word - ROW *row, // row holding word - WERD *word // word to be processed - ) { - WERD *copy_word = new WERD; - - *copy_word = *word; - add_word(copy_word, row, block, other_block_list); - *other_image_changed = TRUE; - return TRUE; -} - - -/** - * word_delete() - * - * Delete a word - */ - -BOOL8 word_delete( // delete a word - BLOCK *block, // block holding word - ROW *row, // row holding word - WERD *word, // word to be processed - BLOCK_IT &block_it, // block list iterator - ROW_IT &row_it, // row list iterator - WERD_IT &word_it // word list iterator - ) { - word_it.extract(); - word->bounding_box().plot(image_win, ScrollView::BLACK, ScrollView::BLACK); - delete(word); - - if (word_it.empty()) { // no words left in row - // so delete row - row_it.extract(); - row->bounding_box().plot(image_win, ScrollView::BLACK, ScrollView::BLACK); - delete(row); - - if (row_it.empty()) { // no rows left in blk - // so delete block - block_it.extract(); - block->bounding_box().plot(image_win, ScrollView::BLACK, ScrollView::BLACK); - delete(block); - } - } - *current_image_changed = TRUE; - return TRUE; -} - /** * word_display() Word Processor * * Display a word according to its display modes */ - -BOOL8 word_display( // display a word - BLOCK *, // block holding word - ROW *row, // row holding word - WERD *word // word to be processed - ) { +BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) { + WERD* word = word_res->word; TBOX word_bb; // word bounding box int word_height; // ht of word BB BOOL8 displayed_something = FALSE; - BOOL8 displayed_rainbow = FALSE; float shift; // from bot left - PBLOB_IT it; // blob iterator C_BLOB_IT c_it; // cblob iterator - WERD *word_ptr; // poly copy - WERD temp_word; - float scale_factor; // for BN_POLYGON /* Note the double coercions of(COLOUR)((inT32)editor_image_word_bb_color) @@ -1647,88 +665,29 @@ BOOL8 word_display( // display a word ScrollView::Color c = (ScrollView::Color) ((inT32) editor_image_blob_bb_color); image_win->Pen(c); - if (word->flag(W_POLYGON)) { - it.set_to_list(word->blob_list()); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) - it.data()->bounding_box().plot(image_win); - } - else { - c_it.set_to_list(word->cblob_list()); - for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) - c_it.data()->bounding_box().plot(image_win); - } + c_it.set_to_list(word->cblob_list()); + for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) + c_it.data()->bounding_box().plot(image_win); displayed_something = TRUE; } // display edge steps - if (word->display_flag(DF_EDGE_STEP) && - !word->flag(W_POLYGON)) { // edgesteps available + if (word->display_flag(DF_EDGE_STEP)) { // edgesteps available word->plot(image_win); // rainbow colors displayed_something = TRUE; - displayed_rainbow = TRUE; } // display poly approx if (word->display_flag(DF_POLYGONAL)) { // need to convert - if (!word->flag(W_POLYGON)) { - word_ptr = word->poly_copy(row->x_height()); - - /* CALL POLYGONAL APPROXIMATOR WHEN AVAILABLE - on a temp_word */ - - if (displayed_rainbow) - // ensure its visible - word_ptr->plot(image_win, ScrollView::WHITE); - else - // rainbow colors - word_ptr->plot(image_win); - delete word_ptr; - } - else { - if (displayed_rainbow) - // ensure its visible - word->plot(image_win, ScrollView::WHITE); - else - word->plot(image_win); // rainbow colors - } - - displayed_rainbow = TRUE; - displayed_something = TRUE; - } - - // disp BN poly approx - if (word->display_flag(DF_BN_POLYGONAL)) { - // need to convert - if (!word->flag(W_POLYGON)) { - word_ptr = word->poly_copy(row->x_height()); - temp_word = *word_ptr; - delete word_ptr; - - /* CALL POLYGONAL APPROXIMATOR WHEN AVAILABLE - on a temp_word */ - - } - else - temp_word = *word; // copy word - word_bb = word->bounding_box(); - if (!temp_word.flag(W_NORMALIZED)) - temp_word.baseline_normalise(row); - - scale_factor = re_scale_and_move_bln_word(&temp_word, word_bb); - display_bln_lines(image_win, ScrollView::CYAN, scale_factor, - word_bb.bottom(), word_bb.left(), word_bb.right()); - - if (displayed_rainbow) - // ensure its visible - temp_word.plot(image_win, ScrollView::WHITE); - else - temp_word.plot(image_win); // rainbow colors - - displayed_rainbow = TRUE; + TWERD* tword = TWERD::PolygonalCopy(word); + tword->plot(image_win); + delete tword; displayed_something = TRUE; } // display correct text - if (word->display_flag(DF_TEXT)) { + if (word->display_flag(DF_TEXT) && word->text() != NULL) { word_bb = word->bounding_box(); ScrollView::Color c =(ScrollView::Color) ((inT32) editor_image_blob_bb_color); @@ -1756,19 +715,12 @@ BOOL8 word_display( // display a word return TRUE; } - /** * word_dumper() * * Dump members to the debug window */ - -BOOL8 word_dumper( // dump word - BLOCK *block, //< block holding word - ROW *row, //< row holding word - WERD *word //< word to be processed - ) { - +BOOL8 Tesseract::word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res) { if (block != NULL) { tprintf("\nBlock data...\n"); block->print(NULL, FALSE); @@ -1776,7 +728,7 @@ BOOL8 word_dumper( // dump word tprintf("\nRow data...\n"); row->print(NULL); tprintf("\nWord data...\n"); - word->print(NULL); + word_res->word->print(); return TRUE; } @@ -1786,46 +738,17 @@ BOOL8 word_dumper( // dump word * * Display word according to current display mode settings */ - -BOOL8 word_set_display( // display a word - BLOCK *block, //< block holding word - ROW *row, //< row holding word - WERD *word //< word to be processed - ) { - TBOX word_bb; // word bounding box - +BOOL8 Tesseract::word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res) { + WERD* word = word_res->word; word->set_display_flag(DF_BOX, word_display_mode.bit(DF_BOX)); word->set_display_flag(DF_TEXT, word_display_mode.bit(DF_TEXT)); word->set_display_flag(DF_POLYGONAL, word_display_mode.bit(DF_POLYGONAL)); word->set_display_flag(DF_EDGE_STEP, word_display_mode.bit(DF_EDGE_STEP)); word->set_display_flag(DF_BN_POLYGONAL, word_display_mode.bit(DF_BN_POLYGONAL)); - *current_image_changed = TRUE; - return word_display(block, row, word); + return word_display(block, row, word_res); } +} // namespace tesseract -/** - * word_toggle_seg() - * - * Toggle the correct segmentation flag - */ - -BOOL8 word_toggle_seg( // toggle seg flag - BLOCK *, //< block holding word - ROW *, //< row holding word - WERD *word //< word to be processed - ) { - word->set_flag(W_SEGMENTED, !word->flag(W_SEGMENTED)); - *current_image_changed = TRUE; - return TRUE; -} - #endif // GRAPHICS_DISABLED - -/* DEBUG ONLY */ - -void do_check_mem( // do it - inT32 level) { - check_mem("Doing it", level); -} diff --git a/ccmain/pgedit.h b/ccmain/pgedit.h index 2cfcd1000d..667be1bb46 100755 --- a/ccmain/pgedit.h +++ b/ccmain/pgedit.h @@ -24,8 +24,7 @@ #include "ocrrow.h" #include "werd.h" #include "rect.h" -#include "pagewalk.h" -#include "varable.h" +#include "params.h" #include "notdll.h" #include "tesseractclass.h" @@ -45,7 +44,6 @@ class PGEventHandler : public SVEventHandler { }; extern BLOCK_LIST *current_block_list; -extern BOOL8 *current_image_changed; extern STRING_VAR_H (editor_image_win_name, "EditorImage", "Editor image window name"); extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos"); @@ -71,14 +69,8 @@ extern INT_VAR_H (editor_word_height, 240, "Word window height"); extern INT_VAR_H (editor_word_width, 655, "Word window width"); extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image"); -void add_word( //to block list - WERD *word, //word to be added - ROW *src_row, //source row - BLOCK *src_block, //source block - BLOCK_LIST *dest_block_list //add to this - ); ScrollView* bln_word_window_handle(); //return handle -void build_image_window(TBOX page_bounding_box); +void build_image_window(int width, int height); void display_bln_lines(ScrollView window, ScrollView::Color colour, float scale_factor, @@ -86,86 +78,11 @@ void display_bln_lines(ScrollView window, float minx, float maxx); //function to call -void do_re_display (BOOL8 word_painter ( -BLOCK *, ROW *, WERD *)); -const TBOX do_tidy_cmd(); //tidy -void do_view_cmd(); -void do_write_file( //serialise - char *name //file name - ); void pgeditor_msg( //message display const char *msg); void pgeditor_show_point( //display coords SVEvent *event); -void pgeditor_write_file( //serialise - char *name, //file name - BLOCK_LIST *blocks //block list to write - ); //put bln word in box -float re_scale_and_move_bln_word(WERD *norm_word, //BL normalised word - const TBOX &box //destination box - ); -void re_segment_word( //break/join words - BLOCK_LIST *block_list, //blocks to check - TBOX &selection_box); -void block_space_stat( //show space stats - BLOCK_LIST *block_list, //blocks to check - TBOX &selection_box); -void row_space_stat( //show space stats - BLOCK_LIST *block_list, //blocks to check - TBOX &selection_box); -void show_point( //display posn of bloba word - BLOCK_LIST *block_list, //blocks to check - float x, - float y); - //display a word -BOOL8 word_blank_and_set_display(BLOCK *block, //block holding word - ROW *row, //row holding word - WERD *word //word to be processed - ); -BOOL8 word_bln_display( //bln & display - BLOCK *, //block holding word - ROW *row, //row holding word - WERD *word //word to be processed - ); -BOOL8 word_change_text( //change correct text - BLOCK *block, //block holding word - ROW *row, //row holding word - WERD *word //word to be processed - ); -BOOL8 word_copy( //copy a word - BLOCK *block, //block holding word - ROW *row, //row holding word - WERD *word //word to be processed - ); -BOOL8 word_delete( //delete a word - BLOCK *block, //block holding word - ROW *row, //row holding word - WERD *word, //word to be processed - BLOCK_IT &block_it, //block list iterator - ROW_IT &row_it, //row list iterator - WERD_IT &word_it //word list iterator - ); -BOOL8 word_display( // display a word - BLOCK *, //block holding word - ROW *row, //row holding word - WERD *word //word to be processed - ); -BOOL8 word_dumper( //dump word - BLOCK *block, //block holding word - ROW *row, //row holding word - WERD *word //word to be processed - ); -BOOL8 word_set_display( //display a word - BLOCK *block, //block holding word - ROW *row, //row holding word - WERD *word //word to be processed - ); -BOOL8 word_toggle_seg( //toggle seg flag - BLOCK *, //block holding word - ROW *, //row holding word - WERD *word //word to be processed - ); -void do_check_mem( //do it - inT32 level); +void show_point(PAGE_RES* page_res, float x, float y); + #endif diff --git a/ccmain/recogtraining.cpp b/ccmain/recogtraining.cpp new file mode 100644 index 0000000000..8c4b7e1ce3 --- /dev/null +++ b/ccmain/recogtraining.cpp @@ -0,0 +1,182 @@ +/////////////////////////////////////////////////////////////////////// +// File: recogtraining.cpp +// Description: Functions for ambiguity and parameter training. +// Author: Daria Antonova +// Created: Mon Aug 13 11:26:43 PDT 2009 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "tesseractclass.h" + +#include "boxread.h" +#include "control.h" +#include "cutil.h" +#include "host.h" +#include "permute.h" +#include "ratngs.h" +#include "reject.h" +#include "stopper.h" + +namespace tesseract { + +const inT16 kMaxBoxEdgeDiff = 2; + +// Sets flags necessary for recognition in the training mode. +// Opens and returns the pointer to the output file. +FILE *Tesseract::init_recog_training(const STRING &fname) { + if (tessedit_ambigs_training) { + tessedit_tess_adaption_mode.set_value(0); // turn off adaption + tessedit_enable_doc_dict.set_value(0); // turn off document dictionary + save_best_choices.set_value(1); // save individual char choices + getDict().save_raw_choices.set_value(1); // save raw choices + getDict().permute_only_top.set_value(true); // use only top choice permuter + tessedit_ok_mode.set_value(0); // turn off context checking + // Explore all segmentations. + getDict().stopper_no_acceptable_choices.set_value(1); + } + + STRING output_fname = fname; + const char *lastdot = strrchr(output_fname.string(), '.'); + if (lastdot != NULL) output_fname[lastdot - output_fname.string()] = '\0'; + output_fname += ".txt"; + FILE *output_file = open_file(output_fname.string(), "a+"); + return output_file; +} + +// Copies the bounding box from page_res_it->word() to the given TBOX. +bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) { + if (page_res_it->word() != NULL) { + *tbox = page_res_it->word()->word->bounding_box(); + page_res_it->forward(); + return true; + } else { + return false; + } +} + +// Reads the next box from the given box file into TBOX. +bool read_b(int applybox_page, int *line_number, FILE *box_file, + char *label, TBOX *bbox) { + int x_min, y_min, x_max, y_max; + if (read_next_box(applybox_page, line_number, box_file, label, + &x_min, &y_min, &x_max, &y_max)) { + bbox->set_to_given_coords(x_min, y_min, x_max, y_max); + return true; + } else { + return false; + } +} + +// This function takes tif/box pair of files and runs recognition on the image, +// while making sure that the word bounds that tesseract identified roughly +// match to those specified by the input box file. For each word (ngram in a +// single bounding box from the input box file) it outputs the ocred result, +// the correct label, rating and certainty. +void Tesseract::recog_training_segmented(const STRING &fname, + PAGE_RES *page_res, + volatile ETEXT_DESC *monitor, + FILE *output_file) { + STRING box_fname = fname; + const char *lastdot = strrchr(box_fname.string(), '.'); + if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0'; + box_fname += ".box"; + // read_next_box() will close box_file + FILE *box_file = open_file(box_fname.string(), "r"); + + PAGE_RES_IT page_res_it; + page_res_it.page_res = page_res; + page_res_it.restart_page(); + char label[kBoxReadBufSize]; + + // Process all the words on this page. + TBOX tbox; // tesseract-identified box + TBOX bbox; // box from the box file + bool keep_going; + int line_number = 0; + do { + keep_going = read_t(&page_res_it, &tbox); + keep_going &= read_b(applybox_page, &line_number, box_file, label, &bbox); + // Align bottom left points of the TBOXes. + while (keep_going && + !NearlyEqual(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) { + keep_going = (bbox.bottom() < tbox.bottom()) ? + read_t(&page_res_it, &tbox) : + read_b(applybox_page, &line_number, box_file, label, &bbox); + } + while (keep_going && + !NearlyEqual(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) { + keep_going = (bbox.left() > tbox.left()) ? read_t(&page_res_it, &tbox) : + read_b(applybox_page, &line_number, box_file, label, &bbox); + } + // OCR the word if top right points of the TBOXes are similar. + if (keep_going && + NearlyEqual(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) && + NearlyEqual(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) { + ambigs_classify_and_output(page_res_it.prev_word(), + page_res_it.prev_row(), + page_res_it.prev_block(), + label, output_file); + } + } while (keep_going); +} + +// Runs classify_word_pass1() on the current word. Outputs Tesseract's +// raw choice as a result of the classification. For words labeled with a +// single unichar also outputs all alternatives from blob_choices of the +// best choice. +void Tesseract::ambigs_classify_and_output(WERD_RES *werd_res, + ROW_RES *row_res, + BLOCK_RES *block_res, + const char *label, + FILE *output_file) { + int offset; + // Classify word. + classify_word_pass1(werd_res, row_res->row, block_res->block); + WERD_CHOICE *best_choice = werd_res->best_choice; + ASSERT_HOST(best_choice != NULL); + ASSERT_HOST(best_choice->blob_choices() != NULL); + + // Compute the number of unichars in the label. + int label_num_unichars = 0; + int step = 1; // should be non-zero on the first iteration + for (offset = 0; label[offset] != '\0' && step > 0; + step = getDict().getUnicharset().step(label + offset), + offset += step, ++label_num_unichars); + if (step == 0) { + tprintf("Not outputting illegal unichar %s\n", label); + return; + } + + // Output all classifier choices for the unigrams (1->1 classifications). + if (label_num_unichars == 1 && best_choice->blob_choices()->length() == 1) { + BLOB_CHOICE_LIST_C_IT outer_blob_choice_it; + outer_blob_choice_it.set_to_list(best_choice->blob_choices()); + BLOB_CHOICE_IT blob_choice_it; + blob_choice_it.set_to_list(outer_blob_choice_it.data()); + for (blob_choice_it.mark_cycle_pt(); + !blob_choice_it.cycled_list(); + blob_choice_it.forward()) { + BLOB_CHOICE *blob_choice = blob_choice_it.data(); + if (blob_choice->unichar_id() != INVALID_UNICHAR_ID) { + fprintf(output_file, "%s\t%s\t%.4f\t%.4f\n", + unicharset.id_to_unichar(blob_choice->unichar_id()), + label, blob_choice->rating(), blob_choice->certainty()); + } + } + } + // Output raw choices for many->many and 1->many classifications. + getDict().PrintAmbigAlternatives(output_file, label, label_num_unichars); +} + +} // namespace tesseract diff --git a/ccmain/reject.cpp b/ccmain/reject.cpp index b893efd195..69f091e039 100644 --- a/ccmain/reject.cpp +++ b/ccmain/reject.cpp @@ -32,24 +32,19 @@ #include "scanutils.h" #include #include -//#include "tessbox.h" #include "memry.h" #include "reject.h" #include "tfacep.h" -#include "mainblk.h" #include "charcut.h" #include "imgs.h" -#include "scaleimg.h" #include "control.h" #include "docqual.h" #include "secname.h" #include "globals.h" +#include "helpers.h" /* #define SECURE_NAMES done in secnames.h when necessary */ -//extern "C" { -#include "callnet.h" -//} #include "tesseractclass.h" #include "notdll.h" @@ -59,151 +54,6 @@ #endif CLISTIZEH (STRING) CLISTIZE (STRING) -#define EXTERN -EXTERN -INT_VAR (tessedit_reject_mode, 0, "Rejection algorithm"); -EXTERN -INT_VAR (tessedit_ok_mode, 5, "Acceptance decision algorithm"); -EXTERN -BOOL_VAR (tessedit_use_nn, FALSE, ""); -EXTERN -BOOL_VAR (tessedit_rejection_debug, FALSE, "Adaption debug"); -EXTERN -BOOL_VAR (tessedit_rejection_stats, FALSE, "Show NN stats"); -EXTERN -BOOL_VAR (tessedit_flip_0O, TRUE, "Contextual 0O O0 flips"); -EXTERN -double_VAR (tessedit_lower_flip_hyphen, 1.5, -"Aspect ratio dot/hyphen test"); -EXTERN -double_VAR (tessedit_upper_flip_hyphen, 1.8, -"Aspect ratio dot/hyphen test"); - -EXTERN -BOOL_VAR (rej_trust_doc_dawg, FALSE, -"Use DOC dawg in 11l conf. detector"); -EXTERN -BOOL_VAR (rej_1Il_use_dict_word, FALSE, "Use dictword test"); -EXTERN -BOOL_VAR (rej_1Il_trust_permuter_type, TRUE, "Dont double check"); - -EXTERN -BOOL_VAR (one_ell_conflict_default, TRUE, "one_ell_conflict default"); -EXTERN -BOOL_VAR (show_char_clipping, FALSE, "Show clip image window?"); -EXTERN -BOOL_VAR (nn_debug, FALSE, "NN DEBUGGING?"); -EXTERN -BOOL_VAR (nn_reject_debug, FALSE, "NN DEBUG each char?"); -EXTERN -BOOL_VAR (nn_lax, FALSE, "Use 2nd rate matches"); -EXTERN -BOOL_VAR (nn_double_check_dict, FALSE, "Double check"); -EXTERN -BOOL_VAR (nn_conf_double_check_dict, TRUE, -"Double check for confusions"); -EXTERN -BOOL_VAR (nn_conf_1Il, TRUE, "NN use 1Il conflicts"); -EXTERN -BOOL_VAR (nn_conf_Ss, TRUE, "NN use Ss conflicts"); -EXTERN -BOOL_VAR (nn_conf_hyphen, TRUE, "NN hyphen conflicts"); -EXTERN -BOOL_VAR (nn_conf_test_good_qual, FALSE, "NN dodgy 1Il cross check"); -EXTERN -BOOL_VAR (nn_conf_test_dict, TRUE, "NN dodgy 1Il cross check"); -EXTERN -BOOL_VAR (nn_conf_test_sensible, TRUE, "NN dodgy 1Il cross check"); -EXTERN -BOOL_VAR (nn_conf_strict_on_dodgy_chs, TRUE, -"Require stronger NN match"); -EXTERN -double_VAR (nn_dodgy_char_threshold, 0.99, "min accept score"); -EXTERN -INT_VAR (nn_conf_accept_level, 4, "NN accept dodgy 1Il matches? "); -EXTERN -INT_VAR (nn_conf_initial_i_level, 3, -"NN accept initial Ii match level "); - -EXTERN -BOOL_VAR (no_unrej_dubious_chars, TRUE, "Dubious chars next to reject?"); -EXTERN -BOOL_VAR (no_unrej_no_alphanum_wds, TRUE, "Stop unrej of non A/N wds?"); -EXTERN -BOOL_VAR (no_unrej_1Il, FALSE, "Stop unrej of 1Ilchars?"); -EXTERN -BOOL_VAR (rej_use_tess_accepted, TRUE, "Individual rejection control"); -EXTERN -BOOL_VAR (rej_use_tess_blanks, TRUE, "Individual rejection control"); -EXTERN -BOOL_VAR (rej_use_good_perm, TRUE, "Individual rejection control"); -EXTERN -BOOL_VAR (rej_use_sensible_wd, FALSE, "Extend permuter check"); -EXTERN -BOOL_VAR (rej_alphas_in_number_perm, FALSE, "Extend permuter check"); - -EXTERN -double_VAR (rej_whole_of_mostly_reject_word_fract, 0.85, -"if >this fract"); -EXTERN -INT_VAR (rej_mostly_reject_mode, 1, -"0-never, 1-afterNN, 2-after new xht"); -EXTERN -double_VAR (tessed_fullstop_aspect_ratio, 1.2, -"if >this fract then reject"); - -EXTERN -INT_VAR (net_image_width, 40, "NN input image width"); -EXTERN -INT_VAR (net_image_height, 36, "NN input image height"); -EXTERN -INT_VAR (net_image_x_height, 22, "NN input image x_height"); -EXTERN -INT_VAR (tessedit_image_border, 2, "Rej blbs near image edge limit"); - -/* - Net input is assumed to have (net_image_width * net_image_height) input - units of image pixels, followed by 0, 1, or N units representing the - baseline position. 0 implies no baseline information. 1 implies a floating - point value. N implies a "guage" of N units. For any char an initial set - of these are ON, the remainder OFF to indicate the "level" of the - baseline. - - HOWEVER!!! NOTE THAT EACH NEW INPUT LAYER FORMAT EXPECTS TO BE RUN WITH A - DIFFERENT tessed/netmatch/nmatch.c MODULE. - These are classic C modules - generated by aspirin with HARD CODED CONSTANTS -*/ - -EXTERN -INT_VAR (net_bl_nodes, 20, "Number of baseline nodes"); - -EXTERN -double_VAR (nn_reject_threshold, 0.5, "NN min accept score"); -EXTERN -double_VAR (nn_reject_head_and_shoulders, 0.6, "top scores sep factor"); - -/* NOTE - ctoh doesn't handle "=" properly, hence \075 */ -EXTERN -STRING_VAR (ok_single_ch_non_alphanum_wds, "-?\075", -"Allow NN to unrej"); -EXTERN -STRING_VAR (ok_repeated_ch_non_alphanum_wds, "-?*\075", -"Allow NN to unrej"); -EXTERN -STRING_VAR (conflict_set_I_l_1, "Il1[]", "Il1 conflict set"); -EXTERN -STRING_VAR (conflict_set_S_s, "Ss$", "Ss conflict set"); -EXTERN -STRING_VAR (conflict_set_hyphen, "-_~", "hyphen conflict set"); -EXTERN -STRING_VAR (dubious_chars_left_of_reject, "!'+`()-./\\<>;:^_,~\"", -"Unreliable chars"); -EXTERN -STRING_VAR (dubious_chars_right_of_reject, "!'+`()-./\\<>;:^_,~\"", -"Unreliable chars"); - -EXTERN -INT_VAR (min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this"); /************************************************************************* * set_done() @@ -370,7 +220,7 @@ void Tesseract::make_reject_map( //make rej map for wd //detailed results and the whole of any words which are very small */ else if (tessedit_reject_mode == 5) { - if (bln_x_height / word->denorm.scale () <= min_sane_x_ht_pixels) + if (kBlnXHeight / word->denorm.scale () <= min_sane_x_ht_pixels) word->reject_map.rej_word_small_xht (); else { one_ell_conflict(word, TRUE); @@ -437,11 +287,6 @@ void Tesseract::make_reject_map( //make rej map for wd //detailed results tprintf("Dict word: %d\n", dict_word(*(word->best_choice))); } - /* Un-reject any rejected characters if NN permits */ - - if (tessedit_use_nn && (pass == 2) && - word->reject_map.recoverable_rejects ()) - nn_recover_rejects(word, row); flip_hyphens(word); check_debug_pt (word, 20); } @@ -460,8 +305,8 @@ void reject_blanks(WERD_RES *word) { } } - -void reject_I_1_L(WERD_RES *word) { +namespace tesseract { +void Tesseract::reject_I_1_L(WERD_RES *word) { inT16 i; inT16 offset; @@ -474,6 +319,7 @@ void reject_I_1_L(WERD_RES *word) { } } } +} // namespace tesseract void reject_poor_matches( //detailed results @@ -493,12 +339,12 @@ void reject_poor_matches( //detailed results ("ASSERT FAIL string:\"%s\"; strlen=%d; choices len=%d; blob len=%d\n", word->best_choice->unichar_string().string(), strlen (word->best_choice->unichar_lengths().string()), list_it.length(), - word->outword->blob_list()->length()); + word->box_word->length()); } #endif ASSERT_HOST (strlen (word->best_choice->unichar_lengths().string ()) == list_it.length ()); - ASSERT_HOST (word->outword->blob_list ()->length () == list_it.length ()); + ASSERT_HOST(word->box_word->length() == list_it.length()); threshold = compute_reject_threshold (blob_choices); for (list_it.mark_cycle_pt (); @@ -583,47 +429,36 @@ float compute_reject_threshold( //compute threshold //detailed results * If the word is perilously close to the edge of the image, reject those blobs * in the word which are too close to the edge as they could be clipped. *************************************************************************/ - -void reject_edge_blobs(WERD_RES *word) { - TBOX word_box = word->word->bounding_box (); - TBOX blob_box; - PBLOB_IT blob_it = word->outword->blob_list (); - //blobs - int blobindex = 0; - float centre; - - if ((word_box.left () < tessedit_image_border) || - (word_box.bottom () < tessedit_image_border) || - (word_box.right () + tessedit_image_border > - page_image.get_xsize () - 1) || - (word_box.top () + tessedit_image_border > page_image.get_ysize () - 1)) { - ASSERT_HOST (word->reject_map.length () == blob_it.length ()); - for (blobindex = 0, blob_it.mark_cycle_pt (); - !blob_it.cycled_list (); blobindex++, blob_it.forward ()) { - blob_box = blob_it.data ()->bounding_box (); - centre = (blob_box.left () + blob_box.right ()) / 2.0; - if ((word->denorm.x (blob_box.left ()) < tessedit_image_border) || - (word->denorm.y (blob_box.bottom (), centre) < - tessedit_image_border) || - (word->denorm.x (blob_box.right ()) + tessedit_image_border > - page_image.get_xsize () - 1) || - (word->denorm.y (blob_box.top (), centre) - + tessedit_image_border > page_image.get_ysize () - 1)) { - word->reject_map[blobindex].setrej_edge_char (); - //close to edge +namespace tesseract { +void Tesseract::reject_edge_blobs(WERD_RES *word) { + TBOX word_box = word->word->bounding_box(); + // Use the box_word as it is already denormed back to image coordinates. + int blobcount = word->box_word->length(); + + if (word_box.left() < tessedit_image_border || + word_box.bottom() < tessedit_image_border || + word_box.right() + tessedit_image_border > ImageWidth() - 1 || + word_box.top() + tessedit_image_border > ImageHeight() - 1) { + ASSERT_HOST(word->reject_map.length() == blobcount); + for (int blobindex = 0; blobindex < blobcount; blobindex++) { + TBOX blob_box = word->box_word->BlobBox(blobindex); + if (blob_box.left() < tessedit_image_border || + blob_box.bottom() < tessedit_image_border || + blob_box.right() + tessedit_image_border > ImageWidth() - 1 || + blob_box.top() + tessedit_image_border > ImageHeight() - 1) { + word->reject_map[blobindex].setrej_edge_char(); + // Close to edge } } } } - /********************************************************************** * one_ell_conflict() * * Identify words where there is a potential I/l/1 error. * - A bundle of contextual heuristics! **********************************************************************/ -namespace tesseract { BOOL8 Tesseract::one_ell_conflict(WERD_RES *word_res, BOOL8 update_map) { const char *word; const char *lengths; @@ -868,548 +703,27 @@ BOOL8 Tesseract::test_ambig_word( //test for ambiguity return ambig; } -/************************************************************************* - * char_ambiguities() - * - * Return a pointer to a string containing the full conflict set of characters - * which includes the specified character, if there is one. If the specified - * character is not a member of a conflict set, return NULL. - * (NOTE that a character is assumed to be a member of only ONE conflict set.) - *************************************************************************/ -const char *Tesseract::char_ambiguities(char c) { - static STRING_CLIST conflict_sets; - static BOOL8 read_conflict_sets = FALSE; - STRING_C_IT cs_it(&conflict_sets); - const char *cs; - STRING cs_file_name; - FILE *cs_file; - char buff[1024]; - - if (!read_conflict_sets) { - cs_file_name = datadir + "confsets"; - if (!(cs_file = fopen (cs_file_name.string (), "r"))) { - CANTOPENFILE.error ("char_ambiguities", EXIT, "%s %d", - cs_file_name.string (), errno); - } - while (fscanf (cs_file, "%s", buff) == 1) { - cs_it.add_after_then_move (new STRING (buff)); - } - fclose (cs_file); - read_conflict_sets = TRUE; - cs_it.move_to_first (); - if (tessedit_rejection_debug) { - for (cs_it.mark_cycle_pt (); - !cs_it.cycled_list (); cs_it.forward ()) { - tprintf ("\"%s\"\n", cs_it.data ()->string ()); - } - } - } - - cs_it.move_to_first (); - for (cs_it.mark_cycle_pt (); !cs_it.cycled_list (); cs_it.forward ()) { - cs = cs_it.data ()->string (); - if (strchr (cs, c) != NULL) - return cs; - } - return NULL; -} - -/************************************************************************* - * nn_recover_rejects() - * Generate the nn_reject_map - a copy of the current reject map, but dont - * reject previously rejected chars if the NN matcher agrees with the best - * choice. - *************************************************************************/ - -void Tesseract::nn_recover_rejects(WERD_RES *word, ROW *row) { - //copy for debug - REJMAP old_map = word->reject_map; - /* - NOTE THAT THIS IS RELATIVELY INEFFICIENT AS THE WHOLE OF THE WERD IS - MATCHED BY THE NN MATCHER. IF COULD EASILY BE RESTRICTED TO JUST THE - REJECT CHARACTERS (Though initial use is when words are total rejects - anyway). - */ - - set_global_subsubloc_code(SUBSUBLOC_NN); - nn_match_word(word, row); - - if (no_unrej_1Il) - dont_allow_1Il(word); - if (no_unrej_dubious_chars) - dont_allow_dubious_chars(word); - - if (rej_mostly_reject_mode == 1) - reject_mostly_rejects(word); - /* - IF there are no unrejected alphanumerics AND - The word is not an acceptable single non alphanum char word AND - The word is not an acceptable repeated non alphanum char word - THEN Reject whole word - */ - if (no_unrej_no_alphanum_wds && - (count_alphanums (word) < 1) && - !((word->best_choice->unichar_lengths().length () == 1) && - STRING(ok_single_ch_non_alphanum_wds).contains( - word->best_choice->unichar_string()[0])) - && !repeated_nonalphanum_wd (word, row)) - - word->reject_map.rej_word_no_alphanums (); - - #ifndef SECURE_NAMES - - if (nn_debug) { - tprintf ("\nTess: \"%s\" MAP ", - word->best_choice->unichar_string().string()); - old_map.print (stdout); - tprintf ("->"); - word->reject_map.print (stdout); - tprintf ("\n"); - } - #endif - set_global_subsubloc_code(SUBSUBLOC_OTHER); -} - -void Tesseract::nn_match_word( //Match a word - WERD_RES *word, - ROW *row) { - PIXROW_LIST *pixrow_list; - PIXROW_IT pixrow_it; - IMAGELINE *imlines; //lines of the image - TBOX pix_box; //box of imlines extent -#ifndef GRAPHICS_DISABLED - ScrollView* win = NULL; -#endif - IMAGE clip_image; - IMAGE scaled_image; - float baseline_pos; - inT16 net_image_size; - inT16 clip_image_size; - WERD copy_outword; // copy to denorm - inT16 i; - - const char *word_string; - const char *word_string_lengths; - BOOL8 word_in_dict; //Tess wd in dict - BOOL8 checked_dict_word; //Tess wd definitely in dict - BOOL8 sensible_word; //OK char string - BOOL8 centre; //Not at word end chs - BOOL8 good_quality_word; - inT16 char_quality; - inT16 accepted_char_quality; - - inT16 conf_level; //0:REJECT - //1:DODGY ACCEPT - //2:DICT ACCEPT - //3:CLEAR ACCEPT - inT16 first_alphanum_index_; - inT16 first_alphanum_offset_; - - word_string = word->best_choice->unichar_string().string(); - word_string_lengths = word->best_choice->unichar_lengths().string(); - first_alphanum_index_ = first_alphanum_index (word_string, - word_string_lengths); - first_alphanum_offset_ = first_alphanum_offset (word_string, - word_string_lengths); - word_in_dict = ((word->best_choice->permuter () == SYSTEM_DAWG_PERM) || - (word->best_choice->permuter () == FREQ_DAWG_PERM) || - (word->best_choice->permuter () == USER_DAWG_PERM)); - checked_dict_word = word_in_dict && - (safe_dict_word(*(word->best_choice)) > 0); - sensible_word = acceptable_word_string (word_string, word_string_lengths) != - AC_UNACCEPTABLE; - - word_char_quality(word, row, &char_quality, &accepted_char_quality); - good_quality_word = - word->best_choice->unichar_lengths().length () == char_quality; - - #ifndef SECURE_NAMES - if (nn_reject_debug) { - tprintf ("Dict: %c Checked Dict: %c Sensible: %c Quality: %c\n", - word_in_dict ? 'T' : 'F', - checked_dict_word ? 'T' : 'F', - sensible_word ? 'T' : 'F', good_quality_word ? 'T' : 'F'); - } - #endif - - if (word->best_choice->unichar_lengths().length () != - word->outword->blob_list ()->length ()) { - #ifndef SECURE_NAMES - tprintf ("nn_match_word ASSERT FAIL String:\"%s\"; #Blobs=%d\n", - word->best_choice->unichar_string().string (), - word->outword->blob_list ()->length ()); - #endif - err_exit(); - } - - copy_outword = *(word->outword); - copy_outword.baseline_denormalise (&word->denorm); - /* - For each character, generate and match a new image, containing JUST the - character we have clipped, centered in the image, on a white background. - Note that we MUST have a square image so that we can scale it uniformly in - x and y. We base the size on x_height as this can be found fairly reliably. - */ - net_image_size = (net_image_width > net_image_height) ? - net_image_width : net_image_height; - clip_image_size = (inT16) floor (0.5 + - net_image_size * word->x_height / - net_image_x_height); - if ((clip_image_size <= 1) || (net_image_size <= 1)) { - return; - } - - /* - Get the image of the word and the pix positions of each char - */ - char_clip_word(©_outword, page_image, pixrow_list, imlines, pix_box); -#ifndef GRAPHICS_DISABLED - if (show_char_clipping) { - win = display_clip_image (©_outword, page_image, - pixrow_list, pix_box); - } -#endif - pixrow_it.set_to_list (pixrow_list); - pixrow_it.move_to_first (); - for (pixrow_it.mark_cycle_pt (), i = 0; - !pixrow_it.cycled_list (); pixrow_it.forward (), i++) { - if (pixrow_it.data ()-> - bad_box (page_image.get_xsize (), page_image.get_ysize ())) - continue; - clip_image.create (clip_image_size, clip_image_size, 1); - //make bin imge - if (!copy_outword.flag (W_INVERSE)) - invert_image(&clip_image); //white background for black on white - pixrow_it.data ()->char_clip_image (imlines, pix_box, row, - clip_image, baseline_pos); - if (copy_outword.flag (W_INVERSE)) - invert_image(&clip_image); //invert white on black for scaling &NN - scaled_image.create (net_image_size, net_image_size, 1); - scale_image(clip_image, scaled_image); - baseline_pos *= net_image_size / clip_image_size; - //scale with im - centre = !pixrow_it.at_first () && !pixrow_it.at_last (); - - conf_level = nn_match_char (scaled_image, baseline_pos, - word_in_dict, checked_dict_word, - sensible_word, centre, - good_quality_word, word_string[i]); - if (word->reject_map[i].recoverable ()) { - if ((i == first_alphanum_index_) && - word_string_lengths[first_alphanum_index_] == 1 && - ((word_string[first_alphanum_offset_] == 'I') || - (word_string[first_alphanum_offset_] == 'i'))) { - if (conf_level >= nn_conf_initial_i_level) - word->reject_map[i].setrej_nn_accept (); - //un-reject char - } - else if (conf_level > 0) - //un-reject char - word->reject_map[i].setrej_nn_accept (); - } -#ifndef GRAPHICS_DISABLED - if (show_char_clipping) - display_images(clip_image, scaled_image); -#endif - clip_image.destroy(); - scaled_image.destroy(); - } - - delete[]imlines; // Free array of imlines - delete pixrow_list; - -#ifndef GRAPHICS_DISABLED - if (show_char_clipping) { -// destroy_window(win); -// win->Destroy(); - delete win; - } -#endif -} -} // namespace tesseract - - -/************************************************************************* - * nn_match_char() - * Call Neural Net matcher to match a single character, given a scaled, - * square image - *************************************************************************/ - -inT16 nn_match_char( //of character - IMAGE &scaled_image, - float baseline_pos, //rel to scaled_image - BOOL8 dict_word, //part of dict wd? - BOOL8 checked_dict_word, //part of dict wd? - BOOL8 sensible_word, //part acceptable str? - BOOL8 centre, //not at word ends? - BOOL8 good_quality_word, //initial segmentation - char tess_ch //confirm this? - ) { - inT16 conf_level; //0..2 - inT32 row; - inT32 col; - inT32 y_size = scaled_image.get_ysize (); - inT32 start_y = y_size - (y_size - net_image_height) / 2 - 1; - inT32 end_y = start_y - net_image_height + 1; - IMAGELINE imline; - float *input_vector; - float *input_vec_ptr; - char top; - float top_score; - char next; - float next_score; - inT16 input_nodes = (net_image_height * net_image_width) + net_bl_nodes; - inT16 j; - - input_vector = (float *) alloc_mem (input_nodes * sizeof (float)); - input_vec_ptr = input_vector; - - invert_image(&scaled_image); //cos nns work better - for (row = start_y; row >= end_y; row--) { - scaled_image.fast_get_line (0, row, net_image_width, &imline); - for (col = 0; col < net_image_width; col++) - *input_vec_ptr++ = imline.pixels[col]; - } - /* - The bit map presented to the net may be shorter than the image, so shift - the coord to be relative to the bitmap portion. - */ - baseline_pos -= (y_size - net_image_height) / 2.0; - /* - Baseline pos is 0 if below bitmap, 1 if above and in proportion otherwise. - This is represented to the net as a set of bl_nodes, an initial proportion - of which are set to 1.0, indicating the level of the baseline. The - remainder are 0.0 - */ - - if (baseline_pos < 0) - baseline_pos = 0; - else if (baseline_pos >= net_image_height) - baseline_pos = net_image_height + 1; - else - baseline_pos = baseline_pos + 1; - baseline_pos = baseline_pos / (net_image_height + 1); - - if (net_bl_nodes > 0) { - baseline_pos *= 1.7; //Use a wider range - if (net_bl_nodes > 1) { - /* Multi-node baseline representation */ - for (j = 0; j < net_bl_nodes; j++) { - if (baseline_pos > ((float) j / net_bl_nodes)) - *input_vec_ptr++ = 1.0; - else - *input_vec_ptr++ = 0.0; - } - } - else { - /* Single node baseline */ - *input_vec_ptr++ = baseline_pos; - } - } - - callnet(input_vector, &top, &top_score, &next, &next_score); - conf_level = evaluate_net_match (top, top_score, next, next_score, - tess_ch, dict_word, checked_dict_word, - sensible_word, centre, good_quality_word); - #ifndef SECURE_NAMES - if (nn_reject_debug) { - tprintf ("top:\"%c\" %4.2f next:\"%c\" %4.2f TESS:\"%c\" Conf: %d\n", - top, top_score, next, next_score, tess_ch, conf_level); - } - #endif - free_mem(input_vector); - return conf_level; -} - - -inT16 evaluate_net_match(char top, - float top_score, - char next, - float next_score, - char tess_ch, - BOOL8 dict_word, - BOOL8 checked_dict_word, - BOOL8 sensible_word, - BOOL8 centre, - BOOL8 good_quality_word) { - inT16 accept_level; //0 Very clearly matched - //1 Clearly top - //2 Top but poor match - //3 Next & poor top match - //4 Next but good top match - //5 No chance - BOOL8 good_top_choice; - BOOL8 excellent_top_choice; - BOOL8 confusion_match = FALSE; - BOOL8 dodgy_char = !isalnum (tess_ch); - - good_top_choice = (top_score > nn_reject_threshold) && - (nn_reject_head_and_shoulders * top_score > next_score); - - excellent_top_choice = good_top_choice && - (top_score > nn_dodgy_char_threshold); - - if (top == tess_ch) { - if (excellent_top_choice) - accept_level = 0; - else if (good_top_choice) - accept_level = 1; //Top correct and well matched - else - accept_level = 2; //Top correct but poor match - } - else if ((nn_conf_1Il && - STRING (conflict_set_I_l_1).contains (tess_ch) && - STRING (conflict_set_I_l_1).contains (top)) || - (nn_conf_hyphen && - STRING (conflict_set_hyphen).contains (tess_ch) && - STRING (conflict_set_hyphen).contains (top)) || - (nn_conf_Ss && - STRING (conflict_set_S_s).contains (tess_ch) && - STRING (conflict_set_S_s).contains (top))) { - confusion_match = TRUE; - if (good_top_choice) - accept_level = 1; //Good top confusion - else - accept_level = 2; //Poor top confusion - } - else if ((nn_conf_1Il && - STRING (conflict_set_I_l_1).contains (tess_ch) && - STRING (conflict_set_I_l_1).contains (next)) || - (nn_conf_hyphen && - STRING (conflict_set_hyphen).contains (tess_ch) && - STRING (conflict_set_hyphen).contains (next)) || - (nn_conf_Ss && - STRING (conflict_set_S_s).contains (tess_ch) && - STRING (conflict_set_S_s).contains (next))) { - confusion_match = TRUE; - if (!good_top_choice) - accept_level = 3; //Next confusion and top match dodgy - else - accept_level = 4; //Next confusion and good top match - } - else if (next == tess_ch) { - if (!good_top_choice) - accept_level = 3; //Next match and top match dodgy - else - accept_level = 4; //Next match and good top match - } - else - accept_level = 5; - - /* Could allow some match flexibility here sS$ etc */ - - /* Now set confirmation level according to how much we can believe the tess - char. */ - - if ((accept_level == 0) && !confusion_match) - return 3; - - if ((accept_level <= 1) && - (!nn_conf_strict_on_dodgy_chs || !dodgy_char) && !confusion_match) - return 3; - - if ((accept_level == 2) && - !confusion_match && !dodgy_char && - good_quality_word && - dict_word && - (checked_dict_word || !nn_double_check_dict) && sensible_word) - return 2; - - if (confusion_match && - (accept_level <= nn_conf_accept_level) && - (good_quality_word || - (!nn_conf_test_good_qual && - !STRING (conflict_set_I_l_1).contains (tess_ch))) && - (dict_word || !nn_conf_test_dict) && - (checked_dict_word || !nn_conf_double_check_dict) && - (sensible_word || !nn_conf_test_sensible)) - return 1; - - if (!confusion_match && - nn_lax && - (accept_level == 3) && - (good_quality_word || !nn_conf_test_good_qual) && - (dict_word || !nn_conf_test_dict) && - (sensible_word || !nn_conf_test_sensible)) - return 1; - else - return 0; -} - - -/************************************************************************* - * dont_allow_dubious_chars() - * Let Rejects "eat" into adjacent "dubious" chars. I.e those prone to be wrong - * if adjacent to a reject. - *************************************************************************/ -void dont_allow_dubious_chars(WERD_RES *word) { - int i = 0; - int offset = 0; - int rej_pos; - int word_len = word->reject_map.length (); - - while (i < word_len) { - /* Find next reject */ - - while ((i < word_len) && (word->reject_map[i].accepted ())) - { - offset += word->best_choice->unichar_lengths()[i]; - i++; - } - - if (i < word_len) { - rej_pos = i; - - /* Reject dubious chars to the left */ - i--; - offset -= word->best_choice->unichar_lengths()[i]; - while ((i >= 0) && - STRING(dubious_chars_left_of_reject).contains( - word->best_choice->unichar_string()[offset])) { - word->reject_map[i--].setrej_dubious (); - offset -= word->best_choice->unichar_lengths()[i]; - } - - /* Skip adjacent rejects */ - - for (i = rej_pos; - (i < word_len) && (word->reject_map[i].rejected ()); - offset += word->best_choice->unichar_lengths()[i++]); - - /* Reject dubious chars to the right */ - - while ((i < word_len) && - STRING(dubious_chars_right_of_reject).contains( - word->best_choice->unichar_string()[offset])) { - offset += word->best_choice->unichar_lengths()[i]; - word->reject_map[i++].setrej_dubious (); - } - } - } -} - /************************************************************************* * dont_allow_1Il() * Dont unreject LONE accepted 1Il conflict set chars *************************************************************************/ -namespace tesseract { void Tesseract::dont_allow_1Il(WERD_RES *word) { int i = 0; int offset; - int word_len = word->reject_map.length (); - const char *s = word->best_choice->unichar_string().string (); - const char *lengths = word->best_choice->unichar_lengths().string (); + int word_len = word->reject_map.length(); + const char *s = word->best_choice->unichar_string().string(); + const char *lengths = word->best_choice->unichar_lengths().string(); BOOL8 accepted_1Il = FALSE; for (i = 0, offset = 0; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) { - if (word->reject_map[i].accepted ()) { - if (STRING (conflict_set_I_l_1).contains (s[offset])) + if (word->reject_map[i].accepted()) { + if (STRING(conflict_set_I_l_1).contains(s[offset])) { accepted_1Il = TRUE; - else { - if (unicharset.get_isalpha (s + offset, lengths[i]) || - unicharset.get_isdigit (s + offset, lengths[i])) + } else { + if (unicharset.get_isalpha(s + offset, lengths[i]) || + unicharset.get_isdigit(s + offset, lengths[i])) return; // >=1 non 1Il ch accepted } } @@ -1419,15 +733,14 @@ void Tesseract::dont_allow_1Il(WERD_RES *word) { for (i = 0, offset = 0; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) { - if (STRING (conflict_set_I_l_1).contains (s[offset]) && - word->reject_map[i].accepted ()) - word->reject_map[i].setrej_postNN_1Il (); + if (STRING(conflict_set_I_l_1).contains(s[offset]) && + word->reject_map[i].accepted()) + word->reject_map[i].setrej_postNN_1Il(); } } -inT16 Tesseract::count_alphanums( //how many alphanums - WERD_RES *word_res) { +inT16 Tesseract::count_alphanums(WERD_RES *word_res) { int count = 0; const WERD_CHOICE *best_choice = word_res->best_choice; for (int i = 0; i < word_res->reject_map.length(); ++i) { @@ -1439,34 +752,33 @@ inT16 Tesseract::count_alphanums( //how many alphanums } return count; } -} // namespace tesseract -void reject_mostly_rejects( //rej all if most rejectd - WERD_RES *word) { +// reject all if most rejected. +void Tesseract::reject_mostly_rejects(WERD_RES *word) { /* Reject the whole of the word if the fraction of rejects exceeds a limit */ - if ((float) word->reject_map.reject_count () / word->reject_map.length () >= + if ((float) word->reject_map.reject_count() / word->reject_map.length() >= rej_whole_of_mostly_reject_word_fract) - word->reject_map.rej_word_mostly_rej (); + word->reject_map.rej_word_mostly_rej(); } -namespace tesseract { BOOL8 Tesseract::repeated_nonalphanum_wd(WERD_RES *word, ROW *row) { inT16 char_quality; inT16 accepted_char_quality; - if (word->best_choice->unichar_lengths().length () <= 1) + if (word->best_choice->unichar_lengths().length() <= 1) return FALSE; - if (!STRING (ok_repeated_ch_non_alphanum_wds). - contains (word->best_choice->unichar_string()[0])) + if (!STRING(ok_repeated_ch_non_alphanum_wds). + contains(word->best_choice->unichar_string()[0])) return FALSE; - if (!repeated_ch_string (word->best_choice->unichar_string().string (), - word->best_choice->unichar_lengths().string ())) - return FALSE; + UNICHAR_ID uch_id = word->best_choice->unichar_id(0); + for (int i = 1; i < word->best_choice->length(); ++i) { + if (word->best_choice->unichar_id(i) != uch_id) return FALSE; + } word_char_quality(word, row, &char_quality, &accepted_char_quality); @@ -1477,36 +789,18 @@ BOOL8 Tesseract::repeated_nonalphanum_wd(WERD_RES *word, ROW *row) { return FALSE; } -BOOL8 Tesseract::repeated_ch_string(const char *rep_ch_str, - const char *lengths) { - UNICHAR_ID c; - - if ((rep_ch_str == NULL) || (*rep_ch_str == '\0')) { - return FALSE; - } - - c = unicharset.unichar_to_id(rep_ch_str, *lengths); - rep_ch_str += *(lengths++); - while (*rep_ch_str != '\0' && - unicharset.unichar_to_id(rep_ch_str, *lengths) == c) { - rep_ch_str++; - } - if (*rep_ch_str == '\0') - return TRUE; - return FALSE; -} - - inT16 Tesseract::safe_dict_word(const WERD_CHOICE &word) { int dict_word_type = dict_word(word); return dict_word_type == DOC_DAWG_PERM ? 0 : dict_word_type; } - +// Note: After running this function word_res->best_choice->blob_choices() +// might not contain the right BLOB_CHOICE coresponding to each character +// in word_res->best_choice. However, the length of blob_choices and +// word_res->best_choice will remain the same. void Tesseract::flip_hyphens(WERD_RES *word_res) { WERD_CHOICE *best_choice = word_res->best_choice; int i; - PBLOB_IT outword_it; int prev_right = -9999; int next_left; TBOX out_box; @@ -1515,17 +809,16 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) { if (tessedit_lower_flip_hyphen <= 1) return; - outword_it.set_to_list(word_res->outword->blob_list()); + TBLOB* blob = word_res->rebuild_word->blobs; UNICHAR_ID unichar_dash = unicharset.unichar_to_id("-"); bool modified = false; - for (i = 0, outword_it.mark_cycle_pt(); - i < best_choice->length() && !outword_it.cycled_list(); - ++i, outword_it.forward()) { - out_box = outword_it.data()->bounding_box(); - if (outword_it.at_last()) + for (i = 0; i < best_choice->length() && blob != NULL; ++i, + blob = blob->next) { + out_box = blob->bounding_box(); + if (blob->next == NULL) next_left = 9999; else - next_left = outword_it.data_relative(1)->bounding_box().left(); + next_left = blob->next->bounding_box().left(); // Dont touch small or touching blobs - it is too dangerous. if ((out_box.width() > 8 * word_res->denorm.scale()) && (out_box.left() > prev_right) && (out_box.right() < next_left)) { @@ -1564,25 +857,26 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) { } } +// Note: After running this function word_res->best_choice->blob_choices() +// might not contain the right BLOB_CHOICE coresponding to each character +// in word_res->best_choice. However, the length of blob_choices and +// word_res->best_choice will remain the same. void Tesseract::flip_0O(WERD_RES *word_res) { WERD_CHOICE *best_choice = word_res->best_choice; int i; - PBLOB_IT outword_it; TBOX out_box; if (!tessedit_flip_0O) return; - outword_it.set_to_list(word_res->outword->blob_list ()); - - for (i = 0, outword_it.mark_cycle_pt (); - i < best_choice->length() && !outword_it.cycled_list (); - ++i, outword_it.forward ()) { + TBLOB* blob = word_res->rebuild_word->blobs; + for (i = 0; i < best_choice->length() && blob != NULL; ++i, + blob = blob->next) { if (unicharset.get_isupper(best_choice->unichar_id(i)) || unicharset.get_isdigit(best_choice->unichar_id(i))) { - out_box = outword_it.data()->bounding_box (); - if ((out_box.top() < bln_baseline_offset + bln_x_height) || - (out_box.bottom() > bln_baseline_offset + bln_x_height / 4)) + out_box = blob->bounding_box(); + if ((out_box.top() < kBlnBaselineOffset + kBlnXHeight) || + (out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4)) return; //Beware words with sub/superscripts } } @@ -1593,7 +887,7 @@ void Tesseract::flip_0O(WERD_RES *word_res) { return; // 0 or O are not present/enabled in unicharset } bool modified = false; - for (i = 1; i < best_choice->length(); ++i, outword_it.forward ()) { + for (i = 1; i < best_choice->length(); ++i) { if (best_choice->unichar_id(i) == unichar_0 || best_choice->unichar_id(i) == unichar_O) { /* A0A */ diff --git a/ccmain/reject.h b/ccmain/reject.h index acf14dab4f..d3aa1ec5fd 100644 --- a/ccmain/reject.h +++ b/ccmain/reject.h @@ -20,121 +20,15 @@ #ifndef REJECT_H #define REJECT_H -#include "varable.h" +#include "params.h" #include "pageres.h" #include "notdll.h" -extern INT_VAR_H (tessedit_reject_mode, 5, "Rejection algorithm"); -extern INT_VAR_H (tessedit_ok_mode, 5, "Acceptance decision algorithm"); -extern BOOL_VAR_H (tessedit_use_nn, TRUE, ""); -extern BOOL_VAR_H (tessedit_rejection_debug, FALSE, "Adaption debug"); -extern BOOL_VAR_H (tessedit_rejection_stats, FALSE, "Show NN stats"); -extern BOOL_VAR_H (tessedit_flip_0O, TRUE, "Contextual 0O O0 flips"); -extern double_VAR_H (tessedit_lower_flip_hyphen, 1.5, -"Aspect ratio dot/hyphen test"); -extern double_VAR_H (tessedit_upper_flip_hyphen, 1.8, -"Aspect ratio dot/hyphen test"); -extern BOOL_VAR_H (rej_trust_doc_dawg, FALSE, -"Use DOC dawg in 11l conf. detector"); -extern BOOL_VAR_H (rej_1Il_use_dict_word, FALSE, "Use dictword test"); -extern BOOL_VAR_H (rej_1Il_trust_permuter_type, TRUE, "Dont double check"); -extern BOOL_VAR_H (one_ell_conflict_default, TRUE, -"one_ell_conflict default"); -extern BOOL_VAR_H (show_char_clipping, FALSE, "Show clip image window?"); -extern BOOL_VAR_H (nn_debug, FALSE, "NN DEBUGGING?"); -extern BOOL_VAR_H (nn_reject_debug, FALSE, "NN DEBUG each char?"); -extern BOOL_VAR_H (nn_lax, FALSE, "Use 2nd rate matches"); -extern BOOL_VAR_H (nn_double_check_dict, FALSE, "Double check"); -extern BOOL_VAR_H (nn_conf_double_check_dict, TRUE, -"Double check for confusions"); -extern BOOL_VAR_H (nn_conf_1Il, TRUE, "NN use 1Il conflicts"); -extern BOOL_VAR_H (nn_conf_Ss, TRUE, "NN use Ss conflicts"); -extern BOOL_VAR_H (nn_conf_hyphen, TRUE, "NN hyphen conflicts"); -extern BOOL_VAR_H (nn_conf_test_good_qual, FALSE, "NN dodgy 1Il cross check"); -extern BOOL_VAR_H (nn_conf_test_dict, TRUE, "NN dodgy 1Il cross check"); -extern BOOL_VAR_H (nn_conf_test_sensible, TRUE, "NN dodgy 1Il cross check"); -extern BOOL_VAR_H (nn_conf_strict_on_dodgy_chs, TRUE, -"Require stronger NN match"); -extern double_VAR_H (nn_dodgy_char_threshold, 0.99, "min accept score"); -extern INT_VAR_H (nn_conf_accept_level, 4, "NN accept dodgy 1Il matches? "); -extern INT_VAR_H (nn_conf_initial_i_level, 3, -"NN accept initial Ii match level "); -extern BOOL_VAR_H (no_unrej_dubious_chars, TRUE, -"Dubious chars next to reject?"); -extern BOOL_VAR_H (no_unrej_no_alphanum_wds, TRUE, -"Stop unrej of non A/N wds?"); -extern BOOL_VAR_H (no_unrej_1Il, FALSE, "Stop unrej of 1Ilchars?"); -extern BOOL_VAR_H (rej_use_tess_accepted, TRUE, -"Individual rejection control"); -extern BOOL_VAR_H (rej_use_tess_blanks, TRUE, "Individual rejection control"); -extern BOOL_VAR_H (rej_use_good_perm, TRUE, "Individual rejection control"); -extern BOOL_VAR_H (rej_use_sensible_wd, FALSE, "Extend permuter check"); -extern BOOL_VAR_H (rej_alphas_in_number_perm, FALSE, "Extend permuter check"); -extern double_VAR_H (rej_whole_of_mostly_reject_word_fract, 0.85, -"if >this fract"); -extern INT_VAR_H (rej_mostly_reject_mode, 1, -"0-never, 1-afterNN, 2-after new xht"); -extern double_VAR_H (tessed_fullstop_aspect_ratio, 1.2, -"if >this fract then reject"); -extern INT_VAR_H (net_image_width, 40, "NN input image width"); -extern INT_VAR_H (net_image_height, 36, "NN input image height"); -extern INT_VAR_H (net_image_x_height, 22, "NN input image x_height"); -extern INT_VAR_H (tessedit_image_border, 2, "Rej blbs near image edge limit"); -extern INT_VAR_H (net_bl_nodes, 20, "Number of baseline nodes"); -extern double_VAR_H (nn_reject_threshold, 0.5, "NN min accept score"); -extern double_VAR_H (nn_reject_head_and_shoulders, 0.6, -"top scores sep factor"); -extern STRING_VAR_H (ok_single_ch_non_alphanum_wds, "-?\075", -"Allow NN to unrej"); -extern STRING_VAR_H (ok_repeated_ch_non_alphanum_wds, "-?*\075", -"Allow NN to unrej"); -extern STRING_VAR_H (conflict_set_I_l_1, "Il1[]", "Il1 conflict set"); -extern STRING_VAR_H (conflict_set_S_s, "Ss$", "Ss conflict set"); -extern STRING_VAR_H (conflict_set_hyphen, "-_~", "hyphen conflict set"); -extern STRING_VAR_H (dubious_chars_left_of_reject, "!'+`()-./\\<>;:^_,~\"", -"Unreliable chars"); -extern STRING_VAR_H (dubious_chars_right_of_reject, "!'+`()-./\\<>;:^_,~\"", -"Unreliable chars"); -extern INT_VAR_H (min_sane_x_ht_pixels, 8, -"Reject any x-ht lt or eq than this"); void reject_blanks(WERD_RES *word); -void reject_I_1_L(WERD_RES *word); - //detailed results void reject_poor_matches(WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices); -float compute_reject_threshold( //compute threshold //detailed results - BLOB_CHOICE_LIST_CLIST *blob_choices); -int sort_floats( //qsort function - const void *arg1, //ptrs to floats - const void *arg2); -void reject_edge_blobs(WERD_RES *word); -BOOL8 word_contains_non_1_digit(const char *word, - const char *word_lengths); - //of character -inT16 nn_match_char(IMAGE &scaled_image, - float baseline_pos, //rel to scaled_image - BOOL8 dict_word, //part of dict wd? - BOOL8 checked_dict_word, //part of dict wd? - BOOL8 sensible_word, //part acceptable str? - BOOL8 centre, //not at word ends? - BOOL8 good_quality_word, //initial segmentation - char tess_ch //confirm this? - ); -inT16 evaluate_net_match(char top, - float top_score, - char next, - float next_score, - char tess_ch, - BOOL8 dict_word, - BOOL8 checked_dict_word, - BOOL8 sensible_word, - BOOL8 centre, - BOOL8 good_quality_word); -void dont_allow_dubious_chars(WERD_RES *word); - +float compute_reject_threshold(BLOB_CHOICE_LIST_CLIST *blob_choices); +BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths); void dont_allow_1Il(WERD_RES *word); - -void reject_mostly_rejects( //rej all if most rejectd - WERD_RES *word); void flip_hyphens(WERD_RES *word); void flip_0O(WERD_RES *word); BOOL8 non_0_digit(const char* str, int length); diff --git a/ccmain/tessbox.cpp b/ccmain/tessbox.cpp index b2952f4d9c..9c13da4c80 100644 --- a/ccmain/tessbox.cpp +++ b/ccmain/tessbox.cpp @@ -34,43 +34,31 @@ * @name tess_segment_pass1 * * Segment a word using the pass1 conditions of the tess segmenter. - * @param word bln word to do - * @param denorm de-normaliser - * @param matcher matcher function - * @param raw_choice raw result + * @param word word to do * @param blob_choices list of blob lists - * @param outword bln word output */ namespace tesseract { -WERD_CHOICE *Tesseract::tess_segment_pass1(WERD *word, - DENORM *denorm, - POLY_MATCHER matcher, - WERD_CHOICE *&raw_choice, - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword) { - WERD_CHOICE *result; //return value +void Tesseract::tess_segment_pass1(WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices) { int saved_enable_assoc = 0; int saved_chop_enable = 0; - if (word->flag (W_DONT_CHOP)) { + if (word->word->flag(W_DONT_CHOP)) { saved_enable_assoc = wordrec_enable_assoc; saved_chop_enable = chop_enable; wordrec_enable_assoc.set_value(0); chop_enable.set_value(0); - if (word->flag (W_REP_CHAR)) - permute_only_top = 1; + if (word->word->flag(W_REP_CHAR)) + getDict().permute_only_top.set_value(true); } set_pass1(); - // tprintf("pass1 chop on=%d, seg=%d, onlytop=%d",chop_enable,enable_assoc,permute_only_top); - result = recog_word (word, denorm, matcher, NULL, NULL, FALSE, - raw_choice, blob_choices, outword); - if (word->flag (W_DONT_CHOP)) { + recog_word(word, blob_choices); + if (word->word->flag(W_DONT_CHOP)) { wordrec_enable_assoc.set_value(saved_enable_assoc); chop_enable.set_value(saved_chop_enable); - permute_only_top = 0; + getDict().permute_only_top.set_value(false); } - return result; } @@ -78,101 +66,32 @@ WERD_CHOICE *Tesseract::tess_segment_pass1(WERD *word, * @name tess_segment_pass2 * * Segment a word using the pass2 conditions of the tess segmenter. - * @param word bln word to do - * @param denorm de-normaliser - * @param matcher matcher function - * @param raw_choice raw result + * @param word word to do * @param blob_choices list of blob lists - * @param outword bln word output */ -WERD_CHOICE *Tesseract::tess_segment_pass2(WERD *word, - DENORM *denorm, - POLY_MATCHER matcher, - WERD_CHOICE *&raw_choice, - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword) { - WERD_CHOICE *result; //return value +void Tesseract::tess_segment_pass2(WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices) { int saved_enable_assoc = 0; int saved_chop_enable = 0; - if (word->flag (W_DONT_CHOP)) { + if (word->word->flag(W_DONT_CHOP)) { saved_enable_assoc = wordrec_enable_assoc; saved_chop_enable = chop_enable; wordrec_enable_assoc.set_value(0); chop_enable.set_value(0); - if (word->flag (W_REP_CHAR)) - permute_only_top = 1; + if (word->word->flag(W_REP_CHAR)) + getDict().permute_only_top.set_value(true); } set_pass2(); - result = recog_word (word, denorm, matcher, NULL, NULL, FALSE, - raw_choice, blob_choices, outword); - if (word->flag (W_DONT_CHOP)) { + recog_word(word, blob_choices); + if (word->word->flag(W_DONT_CHOP)) { wordrec_enable_assoc.set_value(saved_enable_assoc); chop_enable.set_value(saved_chop_enable); - permute_only_top = 0; + getDict().permute_only_top.set_value(false); } - return result; } - -/** - * @name correct_segment_pass2 - * - * Segment a word correctly using the pass2 conditions of the tess segmenter. - * Then call the tester with all the correctly segmented blobs. - * If the correct segmentation cannot be found, the tester is called - * with the segmentation found by tess and all the correct flags set to - * false and all strings are NULL. - * @param word bln word to do - * @param denorm de-normaliser - * @param matcher matcher function - * @param tester tester function - * @param raw_choice raw result - * @param blob_choices list of blob lists - * @param outword bln word output - */ - -WERD_CHOICE *Tesseract::correct_segment_pass2(WERD *word, - DENORM *denorm, - POLY_MATCHER matcher, - POLY_TESTER tester, - WERD_CHOICE *&raw_choice, - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword) { - set_pass2(); - return recog_word (word, denorm, matcher, NULL, tester, TRUE, - raw_choice, blob_choices, outword); -} - - -/** - * @name test_segment_pass2 - * - * Segment a word correctly using the pass2 conditions of the tess segmenter. - * Then call the tester on all words used by tess in its search. - * Do this only on words where the correct segmentation could be found. - * @param word bln word to do - * @param denorm de-normaliser - * @param matcher matcher function - * @param tester tester function - * @param raw_choice raw result - * @param blob_choices list of blob lists - * @param outword bln word output - */ -WERD_CHOICE *Tesseract::test_segment_pass2(WERD *word, - DENORM *denorm, - POLY_MATCHER matcher, - POLY_TESTER tester, - WERD_CHOICE *&raw_choice, - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword) { - set_pass2(); - return recog_word (word, denorm, matcher, tester, NULL, TRUE, - raw_choice, blob_choices, outword); -} - - /** * @name tess_acceptable_word * @@ -180,202 +99,10 @@ WERD_CHOICE *Tesseract::test_segment_pass2(WERD *word, * @param word_choice after context * @param raw_choice before context */ -BOOL8 Tesseract::tess_acceptable_word(WERD_CHOICE *word_choice, - WERD_CHOICE *raw_choice) { - return getDict().AcceptableResult(*word_choice, *raw_choice); -} - - -/** - * @name tess_adaptable_word - * - * @return true if the word is regarded as "good enough". - * @param word word to test - * @param best_choice after context - * @param raw_choice before context - */ -BOOL8 Tesseract::tess_adaptable_word(WERD *word, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice) { - TWERD *tessword = make_tess_word(word, NULL); - int result = (tessword && best_choice && raw_choice && - AdaptableWord(tessword, *best_choice, *raw_choice)); - delete_word(tessword); - return result != 0; -} - - -/** - * @name tess_cn_matcher - * - * Match a blob using the Tess Char Normalized (non-adaptive) matcher - * only. - * @param pblob previous blob - * @param blob blob to match - * @param nblob next blob - * @param word word it came from - * @param denorm de-normaliser - * @param[out] ratings list of results - * @param[out] cpresults may be null - */ - -void Tesseract::tess_cn_matcher(PBLOB *pblob, - PBLOB *blob, - PBLOB *nblob, - WERD *word, - DENORM *denorm, - BLOB_CHOICE_LIST *ratings, - CLASS_PRUNER_RESULTS cpresults) { - TBLOB *tessblob; //converted blob - TEXTROW tessrow; //dummy row - - tess_cn_matching.set_value(true); //turn it on - tess_bn_matching.set_value(false); - //convert blob - tessblob = make_rotated_tess_blob(denorm, blob, true); - //make dummy row - make_tess_row(denorm, &tessrow); - //classify - AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, cpresults); - free_blob(tessblob); -} - - -/** - * @name tess_bn_matcher - * - * Match a blob using the Tess Baseline Normalized (adaptive) matcher - * only. - * @param pblob previous blob - * @param blob blob to match - * @param nblob next blob - * @param word word it came from - * @param denorm de-normaliser - * @param[out] ratings list of results - */ - -void Tesseract::tess_bn_matcher(PBLOB *pblob, - PBLOB *blob, - PBLOB *nblob, - WERD *word, - DENORM *denorm, - BLOB_CHOICE_LIST *ratings) { - TBLOB *tessblob; //converted blob - TEXTROW tessrow; //dummy row - - tess_bn_matching.set_value(true); //turn it on - tess_cn_matching.set_value(false); - //convert blob - tessblob = make_rotated_tess_blob(denorm, blob, true); - //make dummy row - make_tess_row(denorm, &tessrow); - //classify - AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, NULL); - free_blob(tessblob); -} - - -/** - * @name tess_default_matcher - * - * Match a blob using the default functionality of the Tess matcher. - * @param pblob previous blob - * @param blob blob to match - * @param nblob next blob - * @param word word it came from - * @param denorm de-normaliser - * @param[out] ratings list of results - * @param script (unused) - */ - -void Tesseract::tess_default_matcher(PBLOB *pblob, - PBLOB *blob, - PBLOB *nblob, - WERD *word, - DENORM *denorm, - BLOB_CHOICE_LIST *ratings, - const char* script) { - assert(ratings != NULL); - TBLOB *tessblob; //converted blob - TEXTROW tessrow; //dummy row - - tess_bn_matching.set_value(false); //turn it off - tess_cn_matching.set_value(false); - //convert blob - tessblob = make_rotated_tess_blob(denorm, blob, true); - //make dummy row - make_tess_row(denorm, &tessrow); - //classify - AdaptiveClassifier (tessblob, NULL, &tessrow, ratings, NULL); - free_blob(tessblob); -} -} // namespace tesseract - - -/** - * @name tess_training_tester - * - * Matcher tester function which actually trains tess. - * @param filename filename to output - * @param blob blob to match - * @param denorm de-normaliser - * @param correct ly segmented - * @param text correct text - * @param count chars in text - * @param[out] ratings list of results - */ - -void tess_training_tester(const STRING& filename, - PBLOB *blob, - DENORM *denorm, - BOOL8 correct, - char *text, - inT32 count, - BLOB_CHOICE_LIST *ratings) { - TBLOB *tessblob; //converted blob - TEXTROW tessrow; //dummy row - - if (correct) { - classify_norm_method.set_value(character); // force char norm spc 30/11/93 - tess_bn_matching.set_value(false); //turn it off - tess_cn_matching.set_value(false); - //convert blob - tessblob = make_tess_blob (blob, TRUE); - //make dummy row - make_tess_row(denorm, &tessrow); - //learn it - LearnBlob(filename, tessblob, &tessrow, text); - free_blob(tessblob); - } -} - - -namespace tesseract { -/** - * @name tess_adapter - * - * Adapt to the word using the Tesseract mechanism. - * @param word bln word - * @param denorm de-normalise - * @param choice string for word - * @param raw_choice before context - * @param rejmap reject map - */ -void Tesseract::tess_adapter(WERD *word, - DENORM *denorm, - const WERD_CHOICE& choice, - const WERD_CHOICE& raw_choice, - const char *rejmap) { - TWERD *tessword; //converted word - static TEXTROW tessrow; //dummy row - - //make dummy row - make_tess_row(denorm, &tessrow); - //make a word - tessword = make_tess_word (word, &tessrow); - AdaptToWord(tessword, &tessrow, choice, raw_choice, rejmap); - //adapt to it - delete_word(tessword); //free it +BOOL8 Tesseract::tess_acceptable_word( + WERD_CHOICE *word_choice, // after context + WERD_CHOICE *raw_choice) { // before context + return getDict().AcceptableResult(*word_choice); } diff --git a/ccmain/tessbox.h b/ccmain/tessbox.h index c755c31c72..b031007d77 100644 --- a/ccmain/tessbox.h +++ b/ccmain/tessbox.h @@ -24,13 +24,6 @@ #include "notdll.h" #include "tesseractclass.h" -void tess_training_tester( - const STRING& filename, - PBLOB *blob, - DENORM *denorm, - BOOL8 correct, - char *text, - inT32 count, - BLOB_CHOICE_LIST *ratings - ); +// TODO(ocr-team): Delete this along with other empty header files. + #endif diff --git a/ccmain/tessedit.cpp b/ccmain/tessedit.cpp index 258d3e94db..d2db3a8d2f 100644 --- a/ccmain/tessedit.cpp +++ b/ccmain/tessedit.cpp @@ -34,47 +34,28 @@ #include "reject.h" #include "pageres.h" //#include "gpapdest.h" -#include "mainblk.h" #include "nwmain.h" #include "pgedit.h" -#include "ocrshell.h" #include "tprintf.h" //#include "ipeerr.h" //#include "restart.h" #include "tessedit.h" //#include "fontfind.h" #include "permute.h" -#include "permdawg.h" #include "stopper.h" -#include "adaptmatch.h" #include "intmatcher.h" #include "chop.h" #include "efio.h" #include "danerror.h" #include "globals.h" #include "tesseractclass.h" -#include "varable.h" - -/* -** Include automatically generated configuration file if running autoconf -*/ -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif -// Includes libtiff if HAVE_LIBTIFF is defined -#ifdef HAVE_LIBTIFF -#include "tiffio.h" - -#endif +#include "params.h" #include "notdll.h" //phils nn stuff #define VARDIR "configs/" /*variables files */ //config under api #define API_CONFIG "configs/api_config" -#define EXTERN - -EXTERN BOOL_EVAR (tessedit_write_vars, FALSE, "Write all vars to file"); ETEXT_DESC *global_monitor = NULL; // progress monitor @@ -83,7 +64,7 @@ namespace tesseract { // Read a "config" file containing a set of variable, value pairs. // Searches the standard places: tessdata/configs, tessdata/tessconfigs // and also accepts a relative or absolute path name. -void Tesseract::read_config_file(const char *filename, bool global_only) { +void Tesseract::read_config_file(const char *filename, bool init_only) { STRING path = datadir; path += "configs/"; path += filename; @@ -100,33 +81,25 @@ void Tesseract::read_config_file(const char *filename, bool global_only) { path = filename; } } - read_variables_file(path.string(), global_only); + ParamUtils::ReadParamsFile(path.string(), init_only, this->params()); } // Returns false if a unicharset file for the specified language was not found // or was invalid. // This function initializes TessdataManager. After TessdataManager is // no longer needed, TessdataManager::End() should be called. +// +// This function sets tessedit_oem_mode to the given OcrEngineMode oem, unless +// it is OEM_DEFAULT, in which case the value of the variable will be obtained +// from the language-specific config file (stored in [lang].traineddata), from +// the config files specified on the command line or left as the default +// OEM_TESSERACT_ONLY if none of the configs specify this variable. bool Tesseract::init_tesseract_lang_data( const char *arg0, const char *textbase, const char *language, - char **configs, int configs_size, bool configs_global_only) { - FILE *var_file; - static char c_path[MAX_PATH]; //path for c code - + OcrEngineMode oem, char **configs, int configs_size, + bool configs_init_only) { // Set the basename, compute the data directory. main_setup(arg0, textbase); - debug_window_on.set_value (FALSE); - - if (tessedit_write_vars) { - var_file = fopen ("edited.cfg", "w"); - if (var_file != NULL) { - print_variables(var_file); - fclose(var_file); - } - } - strcpy (c_path, datadir.string()); - c_path[strlen (c_path) - strlen (m_data_sub_dir.string ())] = '\0'; - demodir = c_path; // Set the language data path prefix lang = language != NULL ? language : "eng"; @@ -134,25 +107,51 @@ bool Tesseract::init_tesseract_lang_data( language_data_path_prefix += lang; language_data_path_prefix += "."; - // Load tesseract variables from config files. - for (int i = 0; i < configs_size; ++i) { - read_config_file(configs[i], configs_global_only); - } - // Initialize TessdataManager. STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix; - tessdata_manager.Init(tessdata_path.string()); + tessdata_manager.Init(tessdata_path.string(), + tessdata_manager_debug_level); // If a language specific config file (lang.config) exists, load it in. if (tessdata_manager.SeekToStart(TESSDATA_LANG_CONFIG)) { - read_variables_from_fp(tessdata_manager.GetDataFilePtr(), - tessdata_manager.GetEndOffset(TESSDATA_LANG_CONFIG), - false); - if (global_tessdata_manager_debug_level) { + ParamUtils::ReadParamsFromFp( + tessdata_manager.GetDataFilePtr(), + tessdata_manager.GetEndOffset(TESSDATA_LANG_CONFIG), + false, this->params()); + if (tessdata_manager_debug_level) { tprintf("Loaded language config file\n"); } } + // Load tesseract variables from config files. This is done after loading + // language-specific variables from [lang].traineddata file, so that custom + // config files can override values in [lang].traineddata file. + for (int i = 0; i < configs_size; ++i) { + read_config_file(configs[i], configs_init_only); + } + + if (((STRING &)tessedit_write_params_to_file).length() > 0) { + FILE *params_file = fopen(tessedit_write_params_to_file.string(), "w"); + if (params_file != NULL) { + ParamUtils::PrintParams(params_file, this->params()); + fclose(params_file); + if (tessdata_manager_debug_level > 0) { + tprintf("Wrote parameters to %s\n", + tessedit_write_params_to_file.string()); + } + } else { + tprintf("Failed to open %s for writing params.\n", + tessedit_write_params_to_file.string()); + } + } + + // Determine which ocr engine(s) should be loaded and used for recognition. + if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem); + if (tessdata_manager_debug_level) { + tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n", + static_cast(tessedit_ocr_engine_mode)); + } + // Load the unicharset if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) || !unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) { @@ -162,51 +161,63 @@ bool Tesseract::init_tesseract_lang_data( tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n"); return false; } - if (global_tessdata_manager_debug_level) tprintf("Loaded unicharset\n"); + right_to_left_ = unicharset.any_right_to_left(); + if (tessdata_manager_debug_level) tprintf("Loaded unicharset\n"); - if (!global_tessedit_ambigs_training && + if (!tessedit_ambigs_training && tessdata_manager.SeekToStart(TESSDATA_AMBIGS)) { unichar_ambigs.LoadUnicharAmbigs( tessdata_manager.GetDataFilePtr(), tessdata_manager.GetEndOffset(TESSDATA_AMBIGS), - &unicharset); - if (global_tessdata_manager_debug_level) tprintf("Loaded ambigs\n"); + ambigs_debug_level, use_ambigs_for_adaption, &unicharset); + if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n"); } + + // Load Cube objects if necessary. + if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) { + ASSERT_HOST(init_cube_objects(false, &tessdata_manager)); + if (tessdata_manager_debug_level) + tprintf("Loaded Cube w/out combiner\n"); + } else if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) { + ASSERT_HOST(init_cube_objects(true, &tessdata_manager)); + if (tessdata_manager_debug_level) + tprintf("Loaded Cube with combiner\n"); + } + return true; } int Tesseract::init_tesseract( const char *arg0, const char *textbase, const char *language, - char **configs, int configs_size, bool configs_global_only) { - if (!init_tesseract_lang_data(arg0, textbase, language, configs, - configs_size, configs_global_only)) { + OcrEngineMode oem, char **configs, int configs_size, + bool configs_init_only) { + if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs, + configs_size, configs_init_only)) { return -1; } - start_recog(textbase); + // If only Cube will be used, skip loading Tesseract classifier's + // pre-trained templates. + bool init_tesseract_classifier = + (tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY || + tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED); + // If only Cube will be used and if it has its own Unicharset, + // skip initializing permuter and loading Tesseract Dawgs. + bool init_dict = + !(tessedit_ocr_engine_mode == OEM_CUBE_ONLY && + tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET)); + program_editup(textbase, init_tesseract_classifier, init_dict); tessdata_manager.End(); return 0; //Normal exit } -// Init everything except the language model -int Tesseract::init_tesseract_classifier( - const char *arg0, const char *textbase, const char *language, - char **configs, int configs_size, bool configs_global_only) { - if (!init_tesseract_lang_data (arg0, textbase, language, configs, - configs_size, configs_global_only)) { - return -1; - } - // Dont initialize the permuter. - program_editup(textbase, false); - tessdata_manager.End(); - return 0; -} - // init the LM component int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase, const char *language) { - init_tesseract_lang_data(arg0, textbase, language, NULL, 0, false); - getDict().init_permute(); + if (!init_tesseract_lang_data(arg0, textbase, language, + OEM_TESSERACT_ONLY, NULL, 0, false)) + return -1; + getDict().Load(); tessdata_manager.End(); return 0; } @@ -226,42 +237,3 @@ enum CMD_EVENTS }; } // namespace tesseract - -#ifdef _TIFFIO_ -void read_tiff_image(TIFF* tif, IMAGE* image) { - tdata_t buf; - uint32 image_width, image_height; - uint16 photometric; - inT16 bpp; - inT16 samples_per_pixel = 0; - TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &image_width); - TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &image_height); - if (!TIFFGetField(tif, TIFFTAG_BITSPERSAMPLE, &bpp)) - bpp = 1; // Binary is default if no value provided. - TIFFGetField(tif, TIFFTAG_SAMPLESPERPIXEL, &samples_per_pixel); - TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometric); - if (samples_per_pixel > 1) - bpp *= samples_per_pixel; - // Tesseract's internal representation is 0-is-black, - // so if the photometric is 1 (min is black) then high-valued pixels - // are 1 (white), otherwise they are 0 (black). - uinT8 high_value = photometric == 1; - image->create(image_width, image_height, bpp); - IMAGELINE line; - line.init(image_width); - - buf = _TIFFmalloc(TIFFScanlineSize(tif)); - int bytes_per_line = (image_width*bpp + 7)/8; - uinT8* dest_buf = image->get_buffer(); - // This will go badly wrong with one of the more exotic tiff formats, - // but the majority will work OK. - for (int y = 0; y < image_height; ++y) { - TIFFReadScanline(tif, buf, y); - memcpy(dest_buf, buf, bytes_per_line); - dest_buf += bytes_per_line; - } - if (high_value == 0) - invert_image(image); - _TIFFfree(buf); -} -#endif diff --git a/ccmain/tessedit.h b/ccmain/tessedit.h index affbdc159c..e19e5ddee6 100644 --- a/ccmain/tessedit.h +++ b/ccmain/tessedit.h @@ -20,9 +20,8 @@ #ifndef TESSEDIT_H #define TESSEDIT_H -#include "tessclas.h" -#include "ocrclass.h" -#include "pgedit.h" +#include "blobs.h" +#include "pgedit.h" #include "notdll.h" //progress monitor diff --git a/ccmain/tesseract_cube_combiner.cpp b/ccmain/tesseract_cube_combiner.cpp new file mode 100644 index 0000000000..87e68a17a4 --- /dev/null +++ b/ccmain/tesseract_cube_combiner.cpp @@ -0,0 +1,308 @@ +/********************************************************************** + * File: tesseract_cube_combiner.h + * Description: Declaration of the Tesseract & Cube results combiner Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The TesseractCubeCombiner class provides the functionality of combining +// the recognition results of Tesseract and Cube at the word level + +#include +#include +#include +#include + +#include "tesseract_cube_combiner.h" + +#include "cube_object.h" +#include "cube_reco_context.h" +#include "cube_utils.h" +#include "neural_net.h" +#include "tesseractclass.h" +#include "word_altlist.h" + +namespace tesseract { + +TesseractCubeCombiner::TesseractCubeCombiner(CubeRecoContext *cube_cntxt) { + cube_cntxt_ = cube_cntxt; + combiner_net_ = NULL; +} + +TesseractCubeCombiner::~TesseractCubeCombiner() { + if (combiner_net_ != NULL) { + delete combiner_net_; + combiner_net_ = NULL; + } +} + +bool TesseractCubeCombiner::LoadCombinerNet() { + ASSERT_HOST(cube_cntxt_); + // Compute the path of the combiner net + string data_path; + cube_cntxt_->GetDataFilePath(&data_path); + string net_file_name = data_path + cube_cntxt_->Lang() + + ".tesseract_cube.nn"; + + // Return false if file does not exist + FILE *fp = fopen(net_file_name.c_str(), "r"); + if (fp == NULL) + return false; + else + fclose(fp); + + // Load and validate net + combiner_net_ = NeuralNet::FromFile(net_file_name); + if (combiner_net_ == NULL) { + tprintf("Could not read combiner net file %s", net_file_name.c_str()); + return false; + } else if (combiner_net_->out_cnt() != 2) { + tprintf("Invalid combiner net file %s! Output count != 2\n", + net_file_name.c_str()); + delete combiner_net_; + combiner_net_ = NULL; + return false; + } + return true; +} + +// Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally +// strips punc and/or normalizes case and then converts back +string TesseractCubeCombiner::NormalizeString(const string &str, + bool remove_punc, + bool norm_case) { + // convert to UTF32 + string_32 str32; + CubeUtils::UTF8ToUTF32(str.c_str(), &str32); + // strip punc and normalize + string_32 new_str32; + for (int idx = 0; idx < str32.length(); idx++) { + // if no punc removal is required or not a punctuation character + if (!remove_punc || iswpunct(str32[idx]) == 0) { + char_32 norm_char = str32[idx]; + // normalize case if required + if (norm_case && iswalpha(norm_char)) { + norm_char = towlower(norm_char); + } + new_str32.push_back(norm_char); + } + } + // convert back to UTF8 + string new_str; + CubeUtils::UTF32ToUTF8(new_str32.c_str(), &new_str); + return new_str; +} + +// Compares 2 strings optionally ignoring punctuation +int TesseractCubeCombiner::CompareStrings(const string &str1, + const string &str2, + bool ignore_punc, + bool ignore_case) { + if (!ignore_punc && !ignore_case) { + return str1.compare(str2); + } + string norm_str1 = NormalizeString(str1, ignore_punc, ignore_case); + string norm_str2 = NormalizeString(str2, ignore_punc, ignore_case); + return norm_str1.compare(norm_str2); +} + +// Check if a string is a valid Tess dict word or not +bool TesseractCubeCombiner::ValidWord(const string &str) { + return (cube_cntxt_->TesseractObject()->getDict().valid_word(str.c_str()) + > 0); +} + +// Public method for computing the combiner features. The agreement +// output parameter will be true if both answers are identical, +// and false otherwise. +bool TesseractCubeCombiner::ComputeCombinerFeatures(const string &tess_str, + int tess_confidence, + CubeObject *cube_obj, + WordAltList *cube_alt_list, + vector *features, + bool *agreement) { + features->clear(); + *agreement = false; + if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) + return false; + + // Get Cube's best string; return false if empty + char_32 *cube_best_str32 = cube_alt_list->Alt(0); + if (cube_best_str32 == NULL || CubeUtils::StrLen(cube_best_str32) < 1) + return false; + string cube_best_str; + int cube_best_cost = cube_alt_list->AltCost(0); + int cube_best_bigram_cost = 0; + bool cube_best_bigram_cost_valid = true; + if (cube_cntxt_->Bigrams()) + cube_best_bigram_cost = cube_cntxt_->Bigrams()-> + Cost(cube_best_str32, cube_cntxt_->CharacterSet(), + &cube_cntxt_->TesseractObject()->unicharset); + else + cube_best_bigram_cost_valid = false; + CubeUtils::UTF32ToUTF8(cube_best_str32, &cube_best_str); + + // Get Tesseract's UTF32 string + string_32 tess_str32; + CubeUtils::UTF8ToUTF32(tess_str.c_str(), &tess_str32); + + // Compute agreement flag + *agreement = (tess_str.compare(cube_best_str) == 0); + + // Get Cube's second best string; if empty, return false + char_32 *cube_next_best_str32; + string cube_next_best_str; + int cube_next_best_cost = WORST_COST; + if (cube_alt_list->AltCount() > 1) { + cube_next_best_str32 = cube_alt_list->Alt(1); + if (cube_next_best_str32 == NULL || + CubeUtils::StrLen(cube_next_best_str32) == 0) { + return false; + } + cube_next_best_cost = cube_alt_list->AltCost(1); + CubeUtils::UTF32ToUTF8(cube_next_best_str32, &cube_next_best_str); + } + // Rank of Tesseract's top result in Cube's alternate list + int tess_rank = 0; + for (tess_rank = 0; tess_rank < cube_alt_list->AltCount(); tess_rank++) { + string alt_str; + CubeUtils::UTF32ToUTF8(cube_alt_list->Alt(tess_rank), &alt_str); + if (alt_str == tess_str) + break; + } + + // Cube's cost for tesseract's result. Note that this modifies the + // state of cube_obj, including its alternate list by calling RecognizeWord() + int tess_cost = cube_obj->WordCost(tess_str.c_str()); + // Cube's bigram cost of Tesseract's string + int tess_bigram_cost = 0; + int tess_bigram_cost_valid = true; + if (cube_cntxt_->Bigrams()) + tess_bigram_cost = cube_cntxt_->Bigrams()-> + Cost(tess_str32.c_str(), cube_cntxt_->CharacterSet(), + &cube_cntxt_->TesseractObject()->unicharset); + else + tess_bigram_cost_valid = false; + + // Tesseract confidence + features->push_back(tess_confidence); + // Cube cost of Tesseract string + features->push_back(tess_cost); + // Cube Rank of Tesseract string + features->push_back(tess_rank); + // length of Tesseract OCR string + features->push_back(tess_str.length()); + // Tesseract OCR string in dictionary + features->push_back(ValidWord(tess_str)); + if (tess_bigram_cost_valid) { + // bigram cost of Tesseract string + features->push_back(tess_bigram_cost); + } + // Cube tess_cost of Cube best string + features->push_back(cube_best_cost); + // Cube tess_cost of Cube next best string + features->push_back(cube_next_best_cost); + // length of Cube string + features->push_back(cube_best_str.length()); + // Cube string in dictionary + features->push_back(ValidWord(cube_best_str)); + if (cube_best_bigram_cost_valid) { + // bigram cost of Cube string + features->push_back(cube_best_bigram_cost); + } + // case-insensitive string comparison, including punctuation + int compare_nocase_punc = CompareStrings(cube_best_str.c_str(), + tess_str.c_str(), false, true); + features->push_back(compare_nocase_punc == 0); + // case-sensitive string comparison, ignoring punctuation + int compare_case_nopunc = CompareStrings(cube_best_str.c_str(), + tess_str.c_str(), true, false); + features->push_back(compare_case_nopunc == 0); + // case-insensitive string comparison, ignoring punctuation + int compare_nocase_nopunc = CompareStrings(cube_best_str.c_str(), + tess_str.c_str(), true, true); + features->push_back(compare_nocase_nopunc == 0); + return true; +} + +// The CubeObject parameter is used for 2 purposes: 1) to retrieve +// cube's alt list, and 2) to compute cube's word cost for the +// tesseract result. The call to CubeObject::WordCost() modifies +// the object's alternate list, so previous state will be lost. +float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res, + CubeObject *cube_obj) { + // If no combiner is loaded or the cube object is undefined, + // tesseract wins with probability 1.0 + if (combiner_net_ == NULL || cube_obj == NULL) { + tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): " + "Cube objects not initialized; defaulting to Tesseract\n"); + return 1.0; + } + + // Retrieve the alternate list from the CubeObject's current state. + // If the alt list empty, tesseract wins with probability 1.0 + WordAltList *cube_alt_list = cube_obj->AlternateList(); + if (cube_alt_list == NULL) + cube_alt_list = cube_obj->RecognizeWord(); + if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) { + tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): " + "Cube returned no results; defaulting to Tesseract\n"); + return 1.0; + } + return CombineResults(tess_res, cube_obj, cube_alt_list); +} + +// The alt_list parameter is expected to have been extracted from the +// CubeObject that recognized the word to be combined. The cube_obj +// parameter passed may be either same instance or a separate instance to +// be used only by the combiner. In both cases, its alternate +// list will be modified by an internal call to RecognizeWord(). +float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res, + CubeObject *cube_obj, + WordAltList *cube_alt_list) { + // If no combiner is loaded or the cube object is undefined, or the + // alt list is empty, tesseract wins with probability 1.0 + if (combiner_net_ == NULL || cube_obj == NULL || + cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) { + tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): " + "Cube result cannot be retrieved; defaulting to Tesseract\n"); + return 1.0; + } + + // Tesseract result string, tesseract confidence, and cost of + // tesseract result according to cube + string tess_str = tess_res->best_choice->unichar_string().string(); + // Map certainty [-20.0, 0.0] to confidence [0, 100] + int tess_confidence = MIN(100, MAX(1, static_cast( + 100 + (5 * tess_res->best_choice->certainty())))); + + // Compute the combiner features. If feature computation fails or + // answers are identical, tesseract wins with probability 1.0 + vector features; + bool agreement; + bool combiner_success = ComputeCombinerFeatures(tess_str, tess_confidence, + cube_obj, cube_alt_list, + &features, &agreement); + if (!combiner_success || agreement) + return 1.0; + + // Classify combiner feature vector and return output (probability + // of tesseract class). + double net_out[2]; + if (!combiner_net_->FeedForward(&features[0], net_out)) + return 1.0; + return net_out[1]; +} +} diff --git a/ccmain/tesseract_cube_combiner.h b/ccmain/tesseract_cube_combiner.h new file mode 100644 index 0000000000..4773f8293e --- /dev/null +++ b/ccmain/tesseract_cube_combiner.h @@ -0,0 +1,103 @@ +/********************************************************************** + * File: tesseract_cube_combiner.h + * Description: Declaration of the Tesseract & Cube results combiner Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The TesseractCubeCombiner class provides the functionality of combining +// the recognition results of Tesseract and Cube at the word level + +#ifndef TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H +#define TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H + +#include +#include +#include "pageres.h" + +#ifdef __MSW32__ +#include +using namespace std; +#endif + +#ifdef USE_STD_NAMESPACE +using std::string; +using std::vector; +#endif + +namespace tesseract { + +class CubeObject; +class NeuralNet; +class CubeRecoContext; +class WordAltList; + +class TesseractCubeCombiner { + public: + explicit TesseractCubeCombiner(CubeRecoContext *cube_cntxt); + virtual ~TesseractCubeCombiner(); + + // There are 2 public methods for combining the results of tesseract + // and cube. Both return the probability that the Tesseract result is + // correct. The difference between the two interfaces is in how the + // passed-in CubeObject is used. + + // The CubeObject parameter is used for 2 purposes: 1) to retrieve + // cube's alt list, and 2) to compute cube's word cost for the + // tesseract result. Both uses may modify the state of the + // CubeObject (including the BeamSearch state) with a call to + // RecognizeWord(). + float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj); + + // The alt_list parameter is expected to have been extracted from the + // CubeObject that recognized the word to be combined. The cube_obj + // parameter passed in is a separate instance to be used only by + // the combiner. + float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj, + WordAltList *alt_list); + + // Public method for computing the combiner features. The agreement + // output parameter will be true if both answers are identical, + // false otherwise. Modifies the cube_alt_list, so no assumptions + // should be made about its state upon return. + bool ComputeCombinerFeatures(const string &tess_res, + int tess_confidence, + CubeObject *cube_obj, + WordAltList *cube_alt_list, + vector *features, + bool *agreement); + + // Is the word valid according to Tesseract's language model + bool ValidWord(const string &str); + + // Loads the combiner neural network from file, using cube_cntxt_ + // to find path. + bool LoadCombinerNet(); + private: + // Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally + // strips punc and/or normalizes case and then converts back + string NormalizeString(const string &str, bool remove_punc, bool norm_case); + + // Compares 2 strings after optionally normalizing them and or stripping + // punctuation + int CompareStrings(const string &str1, const string &str2, bool ignore_punc, + bool norm_case); + + NeuralNet *combiner_net_; // pointer to the combiner NeuralNet object + CubeRecoContext *cube_cntxt_; // used for language ID and data paths +}; +} + +#endif // TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp index 1c70ac4ddc..5a43617879 100644 --- a/ccmain/tesseractclass.cpp +++ b/ccmain/tesseractclass.cpp @@ -19,6 +19,8 @@ /////////////////////////////////////////////////////////////////////// #include "tesseractclass.h" +#include "cube_reco_context.h" +#include "tesseract_cube_combiner.h" #include "globals.h" // Include automatically generated configuration file if running autoconf. @@ -35,44 +37,373 @@ namespace tesseract { Tesseract::Tesseract() : BOOL_MEMBER(tessedit_resegment_from_boxes, false, - "Take segmentation and labeling from box file"), + "Take segmentation and labeling from box file", + this->params()), + BOOL_MEMBER(tessedit_resegment_from_line_boxes, false, + "Conversion of word/line box file to char box file", + this->params()), BOOL_MEMBER(tessedit_train_from_boxes, false, - "Generate training data from boxed chars"), + "Generate training data from boxed chars", this->params()), + BOOL_MEMBER(tessedit_make_boxes_from_boxes, false, + "Generate more boxes from boxed chars", this->params()), BOOL_MEMBER(tessedit_dump_pageseg_images, false, - "Dump itermediate images made during page segmentation"), + "Dump intermediate images made during page segmentation", + this->params()), // The default for pageseg_mode is the old behaviour, so as not to // upset anything that relies on that. - INT_MEMBER(tessedit_pageseg_mode, 2, - "Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char" - " (Values from PageSegMode enum in baseapi.h)"), - INT_MEMBER(tessedit_accuracyvspeed, 0, - "Accuracy V Speed tradeoff: 0 fastest, 100 most accurate" - " (Values from AccuracyVSpeed enum in baseapi.h)"), - BOOL_MEMBER(tessedit_train_from_boxes_word_level, false, - "Generate training data from boxed chars at word level."), + INT_MEMBER(tessedit_pageseg_mode, PSM_SINGLE_BLOCK, + "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," + " 5=line, 6=word, 7=char" + " (Values from PageSegMode enum in publictypes.h)", + this->params()), + INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY, + "Which OCR engine(s) to run (Tesseract, Cube, both)." + " Defaults to loading and running only Tesseract" + " (no Cube,no combiner)." + " Values from OcrEngineMode enum in tesseractclass.h)", + this->params()), STRING_MEMBER(tessedit_char_blacklist, "", - "Blacklist of chars not to recognize"), + "Blacklist of chars not to recognize", this->params()), STRING_MEMBER(tessedit_char_whitelist, "", - "Whitelist of chars to recognize"), - BOOL_MEMBER(global_tessedit_ambigs_training, false, - "Perform training for ambiguities"), + "Whitelist of chars to recognize", this->params()), + BOOL_INIT_MEMBER(tessedit_ambigs_training, false, + "Perform training for ambiguities", this->params()), + STRING_MEMBER(tessedit_write_params_to_file, "", + "Write all parameters to the given file.", this->params()), + BOOL_MEMBER(tessedit_adapt_to_char_fragments, true, + "Adapt to words that contain " + " a character composed form fragments", this->params()), + BOOL_MEMBER(tessedit_adaption_debug, false, "Generate and print debug" + " information for adaption", this->params()), + BOOL_MEMBER(applybox_rebalance, TRUE, "Drop dead", this->params()), + INT_MEMBER(applybox_debug, 1, "Debug level", this->params()), + INT_MEMBER(applybox_page, 0, + "Page number to apply boxes from", this->params()), + STRING_MEMBER(applybox_test_exclusions, "", + "Chars ignored for testing", this->params()), + double_MEMBER(applybox_error_band, 0.15, + "Err band as fract of xht", this->params()), + STRING_MEMBER(applybox_exposure_pattern, ".exp", "Exposure value follows" + " this pattern in the image filename. The name of the image" + " files are expected to be in the form" + " [lang].[fontname].exp[num].tif", this->params()), + BOOL_MEMBER(applybox_learn_chars_and_char_frags_mode, false, + "Learn both character fragments (as is done in the" + " special low exposure mode) as well as unfragmented" + " characters.", this->params()), + BOOL_MEMBER(applybox_learn_ngrams_mode, false, "Each bounding box" + " is assumed to contain ngrams. Only learn the ngrams" + " whose outlines overlap horizontally.", this->params()), + BOOL_MEMBER(tessedit_print_text, false, + "Write text to stdout", this->params()), + BOOL_MEMBER(tessedit_draw_words, false, + "Draw source words", this->params()), + BOOL_MEMBER(tessedit_draw_outwords, false, + "Draw output words", this->params()), + BOOL_MEMBER(tessedit_training_tess, false, + "Call Tess to learn blobs", this->params()), + BOOL_MEMBER(tessedit_dump_choices, false, + "Dump char choices", this->params()), + BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true, + "Try to improve fuzzy spaces", this->params()), + BOOL_MEMBER(tessedit_unrej_any_wd, false, + "Dont bother with word plausibility", this->params()), + BOOL_MEMBER(tessedit_fix_hyphens, true, + "Crunch double hyphens?", this->params()), + BOOL_MEMBER(tessedit_redo_xheight, true, + "Check/Correct x-height", this->params()), + BOOL_MEMBER(tessedit_enable_doc_dict, true, + "Add words to the document dictionary", this->params()), + BOOL_MEMBER(tessedit_debug_fonts, false, + "Output font info per char", this->params()), + BOOL_MEMBER(tessedit_debug_block_rejection, false, + "Block and Row stats", this->params()), + INT_MEMBER(debug_x_ht_level, 0, "Reestimate debug", this->params()), + BOOL_MEMBER(debug_acceptable_wds, false, + "Dump word pass/fail chk", this->params()), + STRING_MEMBER(chs_leading_punct, "('`\"", + "Leading punctuation", this->params()), + STRING_MEMBER(chs_trailing_punct1, ").,;:?!", + "1st Trailing punctuation", this->params()), + STRING_MEMBER(chs_trailing_punct2, ")'`\"", + "2nd Trailing punctuation", this->params()), + double_MEMBER(quality_rej_pc, 0.08, + "good_quality_doc lte rejection limit", this->params()), + double_MEMBER(quality_blob_pc, 0.0, + "good_quality_doc gte good blobs limit", this->params()), + double_MEMBER(quality_outline_pc, 1.0, + "good_quality_doc lte outline error limit", this->params()), + double_MEMBER(quality_char_pc, 0.95, + "good_quality_doc gte good char limit", this->params()), + INT_MEMBER(quality_min_initial_alphas_reqd, 2, + "alphas in a good word", this->params()), + BOOL_MEMBER(tessedit_tess_adapt_to_rejmap, false, + "Use reject map to control Tesseract adaption", this->params()), + INT_MEMBER(tessedit_tess_adaption_mode, 0x27, + "Adaptation decision algorithm for tess", this->params()), + BOOL_MEMBER(tessedit_minimal_rej_pass1, false, + "Do minimal rejection on pass 1 output", this->params()), + BOOL_MEMBER(tessedit_test_adaption, false, + "Test adaption criteria", this->params()), + BOOL_MEMBER(tessedit_matcher_log, false, + "Log matcher activity", this->params()), + INT_MEMBER(tessedit_test_adaption_mode, 3, + "Adaptation decision algorithm for tess", this->params()), + BOOL_MEMBER(save_best_choices, false, + "Save the results of the recognition step (blob_choices)" + " within the corresponding WERD_CHOICE", this->params()), + BOOL_MEMBER(test_pt, false, "Test for point", this->params()), + double_MEMBER(test_pt_x, 99999.99, "xcoord", this->params()), + double_MEMBER(test_pt_y, 99999.99, "ycoord", this->params()), + INT_MEMBER(cube_debug_level, 1, "Print cube debug info.", this->params()), + STRING_MEMBER(outlines_odd, "%| ", "Non standard number of outlines", + this->params()), + STRING_MEMBER(outlines_2, "ij!?%\":;", + "Non standard number of outlines", this->params()), + BOOL_MEMBER(docqual_excuse_outline_errs, false, + "Allow outline errs in unrejection?", this->params()), + BOOL_MEMBER(tessedit_good_quality_unrej, true, + "Reduce rejection on good docs", this->params()), + BOOL_MEMBER(tessedit_use_reject_spaces, true, + "Reject spaces?", this->params()), + double_MEMBER(tessedit_reject_doc_percent, 65.00, + "%rej allowed before rej whole doc", this->params()), + double_MEMBER(tessedit_reject_block_percent, 45.00, + "%rej allowed before rej whole block", this->params()), + double_MEMBER(tessedit_reject_row_percent, 40.00, + "%rej allowed before rej whole row", this->params()), + double_MEMBER(tessedit_whole_wd_rej_row_percent, 70.00, + "Number of row rejects in whole word rejects" + "which prevents whole row rejection", this->params()), + BOOL_MEMBER(tessedit_preserve_blk_rej_perfect_wds, true, + "Only rej partially rejected words in block rejection", + this->params()), + BOOL_MEMBER(tessedit_preserve_row_rej_perfect_wds, true, + "Only rej partially rejected words in row rejection", + this->params()), + BOOL_MEMBER(tessedit_dont_blkrej_good_wds, false, + "Use word segmentation quality metric", this->params()), + BOOL_MEMBER(tessedit_dont_rowrej_good_wds, false, + "Use word segmentation quality metric", this->params()), + INT_MEMBER(tessedit_preserve_min_wd_len, 2, + "Only preserve wds longer than this", this->params()), + BOOL_MEMBER(tessedit_row_rej_good_docs, true, + "Apply row rejection to good docs", this->params()), + double_MEMBER(tessedit_good_doc_still_rowrej_wd, 1.1, + "rej good doc wd if more than this fraction rejected", + this->params()), + BOOL_MEMBER(tessedit_reject_bad_qual_wds, true, + "Reject all bad quality wds", this->params()), + BOOL_MEMBER(tessedit_debug_doc_rejection, false, + "Page stats", this->params()), + BOOL_MEMBER(tessedit_debug_quality_metrics, false, + "Output data to debug file", this->params()), + BOOL_MEMBER(bland_unrej, false, + "unrej potential with no chekcs", this->params()), + double_MEMBER(quality_rowrej_pc, 1.1, + "good_quality_doc gte good char limit", this->params()), + BOOL_MEMBER(unlv_tilde_crunching, true, + "Mark v.bad words for tilde crunch", this->params()), + BOOL_MEMBER(crunch_early_merge_tess_fails, true, + "Before word crunch?", this->params()), + BOOL_MEMBER(crunch_early_convert_bad_unlv_chs, false, + "Take out ~^ early?", this->params()), + double_MEMBER(crunch_terrible_rating, 80.0, + "crunch rating lt this", this->params()), + BOOL_MEMBER(crunch_terrible_garbage, true, "As it says", this->params()), + double_MEMBER(crunch_poor_garbage_cert, -9.0, + "crunch garbage cert lt this", this->params()), + double_MEMBER(crunch_poor_garbage_rate, 60, + "crunch garbage rating lt this", this->params()), + double_MEMBER(crunch_pot_poor_rate, 40, + "POTENTIAL crunch rating lt this", this->params()), + double_MEMBER(crunch_pot_poor_cert, -8.0, + "POTENTIAL crunch cert lt this", this->params()), + BOOL_MEMBER(crunch_pot_garbage, true, + "POTENTIAL crunch garbage", this->params()), + double_MEMBER(crunch_del_rating, 60, + "POTENTIAL crunch rating lt this", this->params()), + double_MEMBER(crunch_del_cert, -10.0, + "POTENTIAL crunch cert lt this", this->params()), + double_MEMBER(crunch_del_min_ht, 0.7, + "Del if word ht lt xht x this", this->params()), + double_MEMBER(crunch_del_max_ht, 3.0, + "Del if word ht gt xht x this", this->params()), + double_MEMBER(crunch_del_min_width, 3.0, + "Del if word width lt xht x this", this->params()), + double_MEMBER(crunch_del_high_word, 1.5, + "Del if word gt xht x this above bl", this->params()), + double_MEMBER(crunch_del_low_word, 0.5, + "Del if word gt xht x this below bl", this->params()), + double_MEMBER(crunch_small_outlines_size, 0.6, + "Small if lt xht x this", this->params()), + INT_MEMBER(crunch_rating_max, 10, + "For adj length in rating per ch", this->params()), + INT_MEMBER(crunch_pot_indicators, 1, + "How many potential indicators needed", this->params()), + BOOL_MEMBER(crunch_leave_ok_strings, true, + "Dont touch sensible strings", this->params()), + BOOL_MEMBER(crunch_accept_ok, true, + "Use acceptability in okstring", this->params()), + BOOL_MEMBER(crunch_leave_accept_strings, false, + "Dont pot crunch sensible strings", this->params()), + BOOL_MEMBER(crunch_include_numerals, false, + "Fiddle alpha figures", this->params()), + INT_MEMBER(crunch_leave_lc_strings, 4, + "Dont crunch words with long lower case strings", + this->params()), + INT_MEMBER(crunch_leave_uc_strings, 4, + "Dont crunch words with long lower case strings", + this->params()), + INT_MEMBER(crunch_long_repetitions, 3, + "Crunch words with long repetitions", this->params()), + INT_MEMBER(crunch_debug, 0, "As it says", this->params()), + INT_MEMBER(fixsp_non_noise_limit, 1, + "How many non-noise blbs either side?", this->params()), + double_MEMBER(fixsp_small_outlines_size, 0.28, + "Small if lt xht x this", this->params()), + BOOL_MEMBER(tessedit_prefer_joined_punct, false, + "Reward punctation joins", this->params()), + INT_MEMBER(fixsp_done_mode, 1, + "What constitues done for spacing", this->params()), + INT_MEMBER(debug_fix_space_level, 0, + "Contextual fixspace debug", this->params()), + STRING_MEMBER(numeric_punctuation, ".,", + "Punct. chs expected WITHIN numbers", this->params()), + INT_MEMBER(x_ht_acceptance_tolerance, 8, + "Max allowed deviation of blob top outside of font data", + this->params()), + INT_MEMBER(x_ht_min_change, 8, + "Min change in xht before actually trying it", this->params()), + BOOL_MEMBER(tessedit_write_block_separators, false, + "Write block separators in output", this->params()), + BOOL_MEMBER(tessedit_write_raw_output, false, + "Write raw stuff to name.raw", this->params()), + BOOL_MEMBER(tessedit_write_output, false, + "Write text to name.txt", this->params()), + BOOL_MEMBER(tessedit_write_ratings, false, + "Return ratings in IPEOCRAPI data", this->params()), + BOOL_MEMBER(tessedit_write_rep_codes, false, + "Write repetition char code", this->params()), + BOOL_MEMBER(tessedit_write_unlv, false, + "Write .unlv output file", this->params()), + BOOL_MEMBER(tessedit_create_hocr, false, + "Write .html hOCR output file", this->params()), + STRING_MEMBER(unrecognised_char, "|", + "Output char for unidentified blobs", this->params()), + INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()), + INT_MEMBER(suspect_space_level, 100, + "Min suspect level for rejecting spaces", this->params()), + INT_MEMBER(suspect_short_words, 2, + "Dont Suspect dict wds longer than this", this->params()), + BOOL_MEMBER(suspect_constrain_1Il, false, + "UNLV keep 1Il chars rejected", this->params()), + double_MEMBER(suspect_rating_per_ch, 999.9, + "Dont touch bad rating limit", this->params()), + double_MEMBER(suspect_accept_rating, -999.9, + "Accept good rating limit", this->params()), + BOOL_MEMBER(tessedit_minimal_rejection, false, + "Only reject tess failures", this->params()), + BOOL_MEMBER(tessedit_zero_rejection, false, + "Dont reject ANYTHING", this->params()), + BOOL_MEMBER(tessedit_word_for_word, false, + "Make output have exactly one word per WERD", this->params()), + BOOL_MEMBER(tessedit_zero_kelvin_rejection, false, + "Dont reject ANYTHING AT ALL", this->params()), + BOOL_MEMBER(tessedit_consistent_reps, true, + "Force all rep chars the same", this->params()), + INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm", this->params()), + INT_MEMBER(tessedit_ok_mode, 5, + "Acceptance decision algorithm", this->params()), + BOOL_MEMBER(tessedit_rejection_debug, false, + "Adaption debug", this->params()), + BOOL_MEMBER(tessedit_flip_0O, true, + "Contextual 0O O0 flips", this->params()), + double_MEMBER(tessedit_lower_flip_hyphen, 1.5, + "Aspect ratio dot/hyphen test", this->params()), + double_MEMBER(tessedit_upper_flip_hyphen, 1.8, + "Aspect ratio dot/hyphen test", this->params()), + BOOL_MEMBER(rej_trust_doc_dawg, false, + "Use DOC dawg in 11l conf. detector", this->params()), + BOOL_MEMBER(rej_1Il_use_dict_word, false, + "Use dictword test", this->params()), + BOOL_MEMBER(rej_1Il_trust_permuter_type, true, + "Dont double check", this->params()), + BOOL_MEMBER(rej_use_tess_accepted, true, + "Individual rejection control", this->params()), + BOOL_MEMBER(rej_use_tess_blanks, true, + "Individual rejection control", this->params()), + BOOL_MEMBER(rej_use_good_perm, true, + "Individual rejection control", this->params()), + BOOL_MEMBER(rej_use_sensible_wd, false, + "Extend permuter check", this->params()), + BOOL_MEMBER(rej_alphas_in_number_perm, false, + "Extend permuter check", this->params()), + double_MEMBER(rej_whole_of_mostly_reject_word_fract, 0.85, + "if >this fract", this->params()), + INT_MEMBER(tessedit_image_border, 2, + "Rej blbs near image edge limit", this->params()), + STRING_MEMBER(ok_repeated_ch_non_alphanum_wds, "-?*\075", + "Allow NN to unrej", this->params()), + STRING_MEMBER(conflict_set_I_l_1, "Il1[]", + "Il1 conflict set", this->params()), + INT_MEMBER(min_sane_x_ht_pixels, 8, + "Reject any x-ht lt or eq than this", this->params()), + BOOL_MEMBER(tessedit_create_boxfile, false, + "Output text with boxes", this->params()), + BOOL_MEMBER(tessedit_read_image, true, + "Ensure the image is read", this->params()), + INT_MEMBER(tessedit_serial_unlv, 0, "0->Whole page, 1->serial" + " no adapt, 2->serial with adapt", this->params()), + INT_MEMBER(tessedit_page_number, -1, "-1 -> All pages" + " , else specifc page to process", this->params()), + BOOL_MEMBER(tessedit_write_images, false, + "Capture the image from the IPE", this->params()), + BOOL_MEMBER(interactive_mode, false, "Run interactively?", this->params()), + STRING_MEMBER(file_type, ".tif", "Filename extension", this->params()), + INT_MEMBER(testedit_match_debug, 0, + "Integer match debug ctrl", this->params()), + BOOL_MEMBER(tessedit_override_permuter, true, + "According to dict_word", this->params()), + INT_INIT_MEMBER(tessdata_manager_debug_level, 0, "Debug level for" + " TessdataManager functions.", this->params()), + double_MEMBER(min_orientation_margin, 12.0, + "Min acceptable orientation margin", this->params()), + backup_config_file_(NULL), pix_binary_(NULL), + pix_grey_(NULL), + orig_image_changed_(false), + textord_(this), + right_to_left_(false), deskew_(1.0f, 0.0f), reskew_(1.0f, 0.0f), - hindi_image_(false) { + cube_cntxt_(NULL), + tess_cube_combiner_(NULL) { } Tesseract::~Tesseract() { Clear(); + // Delete cube objects. + if (cube_cntxt_ != NULL) { + delete cube_cntxt_; + cube_cntxt_ = NULL; + } + if (tess_cube_combiner_ != NULL) { + delete tess_cube_combiner_; + tess_cube_combiner_ = NULL; + } } void Tesseract::Clear() { #ifdef HAVE_LIBLEPT if (pix_binary_ != NULL) pixDestroy(&pix_binary_); + if (pix_grey_ != NULL) + pixDestroy(&pix_grey_); #endif - deskew_ = FCOORD(1.0f, 0.0f); - reskew_ = FCOORD(1.0f, 0.0f); + deskew_ = FCOORD(1.0f, 0.0f); + reskew_ = FCOORD(1.0f, 0.0f); + orig_image_changed_ = false; } void Tesseract::SetBlackAndWhitelist() { diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h index 8203a36750..5474d9ceb8 100644 --- a/ccmain/tesseractclass.h +++ b/ccmain/tesseractclass.h @@ -21,17 +21,18 @@ #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__ #define TESSERACT_CCMAIN_TESSERACTCLASS_H__ -#include "varable.h" +#include "allheaders.h" +#include "params.h" #include "wordrec.h" #include "ocrclass.h" #include "control.h" #include "docqual.h" +#include "textord.h" -class CHAR_SAMPLES_LIST; -class CHAR_SAMPLE_LIST; class PAGE_RES; class PAGE_RES_IT; class BLOCK_LIST; +class CharSamp; class TO_BLOCK_LIST; class IMAGE; class WERD_RES; @@ -42,6 +43,7 @@ struct Pix; class WERD_CHOICE; class WERD; class BLOB_CHOICE_LIST_CLIST; +struct OSResults; // Top-level class for all tesseract global instance data. @@ -55,22 +57,76 @@ class BLOB_CHOICE_LIST_CLIST; // know about the content of a higher-level directory. // The following scheme will grant the easiest access to lower-level // global members without creating a cyclic dependency: -// ccmain inherits wordrec, includes textord as a member -// wordrec inherits classify -// classify inherits ccstruct, includes dict as a member -// ccstruct inherits c_util, includes image as a member -// c_util inherits cc_util -// textord has a pointer to ccstruct, but doesn't own it. -// dict has a pointer to ccstruct, but doesn't own it. +// +// Class Hierarchy (^ = inheritance): +// +// CCUtil (ccutil/ccutil.h) +// ^ Members include: UNICHARSET +// CUtil (cutil/cutil_class.h) +// ^ Members include: TBLOB*, TEXTBLOCK* +// CCStruct (ccstruct/ccstruct.h) +// ^ Members include: Image +// Classify (classify/classify.h) +// ^ Members include: Dict +// WordRec (wordrec/wordrec.h) +// ^ Members include: WERD*, DENORM* +// Tesseract (ccmain/tesseractclass.h) +// Members include: Pix*, CubeRecoContext*, +// TesseractCubeCombiner* +// +// Other important classes: +// +// TessBaseAPI (api/baseapi.h) +// Members include: BLOCK_LIST*, PAGE_RES*, +// Tesseract*, ImageThresholder* +// Dict (dict/dict.h) +// Members include: Image* (private) // // NOTE: that each level contains members that correspond to global // data that is defined (and used) at that level, not necessarily where // the type is defined so for instance: -// BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs"); +// BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs"); // goes inside the Textord class, not the cc_util class. namespace tesseract { +class CubeLineObject; +class CubeObject; +class CubeRecoContext; +class TesseractCubeCombiner; + +// A collection of various variables for statistics and debugging. +struct TesseractStats { + TesseractStats() + : adaption_word_number(0), + doc_blob_quality(0), + doc_outline_errs(0), + doc_char_quality(0), + good_char_count(0), + doc_good_char_quality(0), + word_count(0), + dict_words(0), + tilde_crunch_written(false), + last_char_was_newline(true), + last_char_was_tilde(false), + write_results_empty_block(true) {} + + inT32 adaption_word_number; + inT16 doc_blob_quality; + inT16 doc_outline_errs; + inT16 doc_char_quality; + inT16 good_char_count; + inT16 doc_good_char_quality; + inT32 word_count; // count of word in the document + inT32 dict_words; // number of dicitionary words in the document + STRING dump_words_str; // accumulator used by dump_words() + // Flags used by write_results() + bool tilde_crunch_written; + bool last_char_was_newline; + bool last_char_was_tilde; + bool write_results_empty_block; +}; + class Tesseract : public Wordrec { public: Tesseract(); @@ -90,89 +146,112 @@ class Tesseract : public Wordrec { Pix* pix_binary() const { return pix_binary_; } + Pix* pix_grey() const { + return pix_grey_; + } + void set_pix_grey(Pix* grey_pix) { + pix_grey_ = grey_pix; + } + int ImageWidth() const { + return pixGetWidth(pix_binary_); + } + int ImageHeight() const { + return pixGetHeight(pix_binary_); + } + + const Textord& textord() const { + return textord_; + } + Textord* mutable_textord() { + return &textord_; + } + + bool right_to_left() const { + return right_to_left_; + } void SetBlackAndWhitelist(); - int SegmentPage(const STRING* input_file, - IMAGE* image, BLOCK_LIST* blocks); - int AutoPageSeg(int width, int height, int resolution, - bool single_column, IMAGE* image, - BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); + + int SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, + Tesseract* osd_tess, OSResults* osr); + void SetupWordScripts(BLOCK_LIST* blocks); + int AutoPageSeg(int resolution, bool single_column, + bool osd, bool only_osd, + BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks, + Tesseract* osd_tess, OSResults* osr); //// control.h ///////////////////////////////////////////////////////// - void recog_all_words( //process words - PAGE_RES *page_res, //page structure - //progress monitor - volatile ETEXT_DESC *monitor, - TBOX *target_word_box=0L, - inT16 dopasses=0 - ); + bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box, + const char* word_config, int pass); + void recog_all_words(PAGE_RES* page_res, + ETEXT_DESC* monitor, + const TBOX* target_word_box, + const char* word_config, + int dopasses); void classify_word_pass1( //recog one word WERD_RES *word, //word to do ROW *row, - BLOCK* block, - BOOL8 cluster_adapt, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting); - void recog_pseudo_word( //recognize blobs - BLOCK_LIST *block_list, //blocks to check + BLOCK* block); + void recog_pseudo_word(PAGE_RES* page_res, // blocks to check TBOX &selection_box); - // This method returns all the blobs in the specified blocks. - // It's the caller's responsibility to destroy the returned list. - C_BLOB_LIST* get_blobs_from_blocks(BLOCK_LIST* blocks // blocks to look at. - ); - - // This method can be used to perform word-level training using box files. - // TODO: this can be modified to perform training in general case too. - void train_word_level_with_boxes( - const STRING& box_file, // File with boxes. - const STRING& out_file, // Output file. - BLOCK_LIST* blocks // Blocks to use. - ); - void fix_rep_char(WERD_RES *word); - void fix_quotes( //make double quotes - WERD_CHOICE *choice, //choice to fix - WERD *word, //word to do //char choices + void fix_rep_char(PAGE_RES_IT* page_res_it); + void ExplodeRepeatedWord(BLOB_CHOICE* best_choice, PAGE_RES_IT* page_res_it); + + // Callback helper for fix_quotes returns a double quote if both + // arguments are quote, otherwise INVALID_UNICHAR_ID. + UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2); + void fix_quotes(WERD_RES* word_res, BLOB_CHOICE_LIST_CLIST *blob_choices); ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s, const char *lengths); void match_word_pass2( //recog one word WERD_RES *word, //word to do ROW *row, - BLOCK* block, - float x_height); + BLOCK* block); void classify_word_pass2( //word to do WERD_RES *word, BLOCK* block, ROW *row); - BOOL8 recog_interactive( //recognize blobs - BLOCK *block, //block - ROW *row, //row of word - WERD *word //word to recognize - ); - void fix_hyphens( //crunch double hyphens - WERD_CHOICE *choice, //choice to fix - WERD *word, //word to do //char choices + void ReportXhtFixResult(bool accept_new_word, float new_x_ht, + WERD_RES* word, WERD_RES* new_word); + bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row); + bool TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row); + BOOL8 recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res); + + // Callback helper for fix_hyphens returns UNICHAR_ID of - if both + // arguments are hyphen, otherwise INVALID_UNICHAR_ID. + UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2); + // Callback helper for fix_hyphens returns true if box1 and box2 overlap + // (assuming both on the same textline, are in order and a chopped em dash.) + bool HyphenBoxesOverlap(const TBOX& box1, const TBOX& box2); + void fix_hyphens(WERD_RES* word_res, BLOB_CHOICE_LIST_CLIST *blob_choices); void set_word_fonts( WERD_RES *word, // word to adapt to BLOB_CHOICE_LIST_CLIST *blob_choices); // detailed results void font_recognition_pass( //good chars in word PAGE_RES_IT &page_res_it); - + BOOL8 check_debug_pt(WERD_RES *word, int location); + //// cube_control.cpp /////////////////////////////////////////////////// + bool init_cube_objects(bool load_combiner, + TessdataManager *tessdata_manager); + void run_cube(PAGE_RES *page_res); + void cube_recognize(CubeObject *cube_obj, PAGE_RES_IT *page_res_it); + void fill_werd_res(const BoxWord& cube_box_word, + WERD_CHOICE* cube_werd_choice, + const char* cube_best_str, + PAGE_RES_IT *page_res_it); + bool extract_cube_state(CubeObject* cube_obj, int* num_chars, + Boxa** char_boxes, CharSamp*** char_samples); + bool create_cube_box_word(Boxa *char_boxes, int num_chars, + TBOX word_box, BoxWord* box_word); //// output.h ////////////////////////////////////////////////////////// - void output_pass( //Tess output pass //send to api - PAGE_RES_IT &page_res_it, - BOOL8 write_to_shm, - TBOX *target_word_box); - FILE *open_outfile( //open .map & .unlv file - const char *extension); - void write_results( //output a word - PAGE_RES_IT &page_res_it, //full info - char newline_type, //type of newline - BOOL8 force_eol, //override tilde crunch? - BOOL8 write_to_shm //send to api + void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box); + void write_results(PAGE_RES_IT &page_res_it, // full info + char newline_type, // type of newline + BOOL8 force_eol // override tilde crunch? ); void set_unlv_suspects(WERD_RES *word); UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated? @@ -181,53 +260,55 @@ class Tesseract : public Wordrec { inT16 count_alphanums(const WERD_CHOICE &word); inT16 count_alphas(const WERD_CHOICE &word); //// tessedit.h //////////////////////////////////////////////////////// - void read_config_file(const char *filename, bool global_only); + void read_config_file(const char *filename, bool init_only); int init_tesseract(const char *arg0, const char *textbase, const char *language, + OcrEngineMode oem, char **configs, int configs_size, - bool configs_global_only); + bool configs_init_only); + int init_tesseract(const char *datapath, + const char *language, + OcrEngineMode oem) { + return init_tesseract(datapath, NULL, language, oem, NULL, 0, false); + } int init_tesseract_lm(const char *arg0, const char *textbase, const char *language); - // Initializes the tesseract classifier without loading language models. - int init_tesseract_classifier(const char *arg0, - const char *textbase, - const char *language, - char **configs, - int configs_size, - bool configs_global_only); - void recognize_page(STRING& image_name); void end_tesseract(); bool init_tesseract_lang_data(const char *arg0, const char *textbase, const char *language, + OcrEngineMode oem, char **configs, int configs_size, - bool configs_global_only); + bool configs_init_only); //// pgedit.h ////////////////////////////////////////////////////////// SVMenuNode *build_menu_new(); - void pgeditor_main(BLOCK_LIST *blocks); + void pgeditor_main(int width, int height, PAGE_RES* page_res); void process_image_event( // action in image win const SVEvent &event); - void pgeditor_read_file( // of serialised file - STRING &filename, - BLOCK_LIST *blocks // block list to add to - ); - void do_new_source( // serialise - ); BOOL8 process_cmd_win_event( // UI command semantics inT32 cmd_event, // which menu item? char *new_value // any prompt data ); + void debug_word(PAGE_RES* page_res, const TBOX &selection_box); + void do_re_display( + BOOL8 (tesseract::Tesseract::*word_painter)(BLOCK* block, + ROW* row, + WERD_RES* word_res)); + BOOL8 word_display(BLOCK* block, ROW* row, WERD_RES* word_res); + BOOL8 word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res); + BOOL8 word_blank_and_set_display(BLOCK* block, ROW* row, WERD_RES* word_res); + BOOL8 word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res); + BOOL8 word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res); //// reject.h ////////////////////////////////////////////////////////// - const char *char_ambiguities(char c); void make_reject_map( //make rej map for wd //detailed results WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices, @@ -246,8 +327,6 @@ class Tesseract : public Wordrec { void dont_allow_1Il(WERD_RES *word); inT16 count_alphanums( //how many alphanums WERD_RES *word); - BOOL8 repeated_ch_string(const char *rep_ch_str, - const char *lengths); void flip_0O(WERD_RES *word); BOOL8 non_0_digit(UNICHAR_ID unichar_id); BOOL8 non_O_upper(UNICHAR_ID unichar_id); @@ -263,77 +342,21 @@ class Tesseract : public Wordrec { inT16 pass); inT16 safe_dict_word(const WERD_CHOICE &word); void flip_hyphens(WERD_RES *word); + void reject_I_1_L(WERD_RES *word); + void reject_edge_blobs(WERD_RES *word); + void reject_mostly_rejects(WERD_RES *word); //// adaptions.h /////////////////////////////////////////////////////// - void adapt_to_good_ems(WERD_RES *word, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting); - void adapt_to_good_samples(WERD_RES *word, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting); BOOL8 word_adaptable( //should we adapt? WERD_RES *word, uinT16 mode); - void reject_suspect_ems(WERD_RES *word); - void collect_ems_for_adaption(WERD_RES *word, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting); - void collect_characters_for_adaption(WERD_RES *word, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting); - void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting, - CHAR_SAMPLE *sample, - CHAR_SAMPLES *best_cluster); - void cluster_sample(CHAR_SAMPLE *sample, - CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting); - void complete_clustering(CHAR_SAMPLES_LIST *char_clusters, - CHAR_SAMPLE_LIST *chars_waiting); //// tfacepp.cpp /////////////////////////////////////////////////////// - WERD_CHOICE *recog_word_recursive( //recog one owrd - WERD *word, //word to do - DENORM *denorm, //de-normaliser - //matcher function - POLY_MATCHER matcher, - //tester function - POLY_TESTER tester, - //trainer function - POLY_TESTER trainer, - BOOL8 testing, //true if answer driven - //raw result - WERD_CHOICE *&raw_choice, - //list of blob lists - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword //bln word output - ); - WERD_CHOICE *recog_word( //recog one owrd - WERD *word, //word to do - DENORM *denorm, //de-normaliser - POLY_MATCHER matcher, //matcher function - POLY_TESTER tester, //tester function - POLY_TESTER trainer, //trainer function - BOOL8 testing, //true if answer driven - WERD_CHOICE *&raw_choice, //raw result - //list of blob lists - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword //bln word output - ); - WERD_CHOICE *split_and_recog_word( //recog one owrd - WERD *word, //word to do - DENORM *denorm, //de-normaliser - //matcher function - POLY_MATCHER matcher, - //tester function - POLY_TESTER tester, - //trainer function - POLY_TESTER trainer, - BOOL8 testing, //true if answer driven - //raw result - WERD_CHOICE *&raw_choice, - //list of blob lists - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword //bln word output - ); + void recog_word_recursive(WERD_RES* word, + BLOB_CHOICE_LIST_CLIST *blob_choices); + void recog_word(WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices); + void split_and_recog_word(WERD_RES* word, + BLOB_CHOICE_LIST_CLIST *blob_choices); //// fixspace.cpp /////////////////////////////////////////////////////// BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position); inT16 eval_word_spacing(WERD_RES_LIST &word_res_list); @@ -345,10 +368,17 @@ class Tesseract : public Wordrec { ROW *row, BLOCK* block); void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block); - void fix_fuzzy_spaces( //find fuzzy words - volatile ETEXT_DESC *monitor, //progress monitor - inT32 word_count, //count of words in doc + void fix_fuzzy_spaces( //find fuzzy words + ETEXT_DESC *monitor, //progress monitor + inT32 word_count, //count of words in doc PAGE_RES *page_res); + void dump_words(WERD_RES_LIST &perm, inT16 score, + inT16 mode, BOOL8 improved); + BOOL8 uniformly_spaced(WERD_RES *word); + BOOL8 fixspace_thinks_word_done(WERD_RES *word); + inT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score); + float blob_noise_score(TBLOB *blob); + void break_noisiest_blob_word(WERD_RES_LIST &words); //// docqual.cpp //////////////////////////////////////////////////////// GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word); BOOL8 potential_word_crunch(WERD_RES *word, @@ -363,176 +393,422 @@ class Tesseract : public Wordrec { void quality_based_rejection(PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc); void convert_bad_unlv_chs(WERD_RES *word_res); + // Callback helper for merge_tess_fails returns a space if both + // arguments are space, otherwise INVALID_UNICHAR_ID. + UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2); void merge_tess_fails(WERD_RES *word_res); void tilde_delete(PAGE_RES_IT &page_res_it); - void insert_rej_cblobs(WERD_RES *word); + inT16 word_blob_quality(WERD_RES *word, ROW *row); + void word_char_quality(WERD_RES *word, ROW *row, inT16 *match_count, + inT16 *accepted_match_count); + void unrej_good_chs(WERD_RES *word, ROW *row); + inT16 count_outline_errs(char c, inT16 outline_count); + inT16 word_outline_errs(WERD_RES *word); + BOOL8 terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level); + CRUNCH_MODE word_deletable(WERD_RES *word, inT16 &delete_mode); + inT16 failure_count(WERD_RES *word); + BOOL8 noise_outlines(TWERD *word); //// pagewalk.cpp /////////////////////////////////////////////////////// void process_selected_words ( - BLOCK_LIST * block_list, //blocks to check + PAGE_RES* page_res, // blocks to check //function to call TBOX & selection_box, - BOOL8 (tesseract::Tesseract::*word_processor) ( - BLOCK *, - ROW *, - WERD *)); + BOOL8 (tesseract::Tesseract::*word_processor) (BLOCK* block, + ROW* row, + WERD_RES* word_res)); //// tessbox.cpp /////////////////////////////////////////////////////// void tess_add_doc_word( //test acceptability WERD_CHOICE *word_choice //after context ); - void tess_adapter( //adapt to word - WERD *word, //bln word - DENORM *denorm, //de-normalise - const WERD_CHOICE& choice, //string for word - const WERD_CHOICE& raw_choice, //before context - const char *rejmap //reject map - ); - WERD_CHOICE *test_segment_pass2( //recog one word - WERD *word, //bln word to do - DENORM *denorm, //de-normaliser - POLY_MATCHER matcher, //matcher function - POLY_TESTER tester, //tester function - //raw result - WERD_CHOICE *&raw_choice, - //list of blob lists - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword //bln word output - ); - WERD_CHOICE *tess_segment_pass1( //recog one word - WERD *word, //bln word to do - DENORM *denorm, //de-normaliser - POLY_MATCHER matcher, //matcher function - //raw result - WERD_CHOICE *&raw_choice, - //list of blob lists - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword //bln word output - ); - WERD_CHOICE *tess_segment_pass2( //recog one word - WERD *word, //bln word to do - DENORM *denorm, //de-normaliser - POLY_MATCHER matcher, //matcher function - //raw result - WERD_CHOICE *&raw_choice, - //list of blob lists - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword //bln word output - ); - WERD_CHOICE *correct_segment_pass2( //recog one word - WERD *word, //bln word to do - DENORM *denorm, //de-normaliser - POLY_MATCHER matcher, //matcher function - POLY_TESTER tester, //tester function - //raw result - WERD_CHOICE *&raw_choice, - //list of blob lists - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword //bln word output - ); - void tess_default_matcher( //call tess - PBLOB *pblob, //previous blob - PBLOB *blob, //blob to match - PBLOB *nblob, //next blob - WERD *word, //word it came from - DENORM *denorm, //de-normaliser - BLOB_CHOICE_LIST *ratings, //list of results - const char* script - ); - void tess_bn_matcher( //call tess - PBLOB *pblob, //previous blob - PBLOB *blob, //blob to match - PBLOB *nblob, //next blob - WERD *word, //word it came from - DENORM *denorm, //de-normaliser - BLOB_CHOICE_LIST *ratings //list of results - ); - void tess_cn_matcher( //call tess - PBLOB *pblob, //previous blob - PBLOB *blob, //blob to match - PBLOB *nblob, //next blob - WERD *word, //word it came from - DENORM *denorm, //de-normaliser - BLOB_CHOICE_LIST *ratings, //list of results - // Sorted array of CP_RESULT_STRUCT from class pruner. - CLASS_PRUNER_RESULTS cpresults - ); - BOOL8 tess_adaptable_word( //test adaptability - WERD *word, //word to test - WERD_CHOICE *word_choice, //after context - WERD_CHOICE *raw_choice //before context - ); + void tess_segment_pass1(WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices); + void tess_segment_pass2(WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices); BOOL8 tess_acceptable_word( //test acceptability WERD_CHOICE *word_choice, //after context WERD_CHOICE *raw_choice //before context ); //// applybox.cpp ////////////////////////////////////////////////////// - void apply_box_testing(BLOCK_LIST *block_list); - void apply_boxes(const STRING& fname, - BLOCK_LIST *block_list //real blocks - ); - // converts an array of boxes to a block list - int Boxes2BlockList(int box_cnt, TBOX *boxes, BLOCK_LIST *block_list, - bool right2left); - //// blobcmp.cpp /////////////////////////////////////////////////////// - float compare_tess_blobs(TBLOB *blob1, - TEXTROW *row1, - TBLOB *blob2, - TEXTROW *row2); - //// paircmp.cpp /////////////////////////////////////////////////////// - float compare_bln_blobs( //match 2 blobs - PBLOB *blob1, //first blob - DENORM *denorm1, - PBLOB *blob2, //other blob - DENORM *denorm2); - float compare_blobs( //match 2 blobs - PBLOB *blob1, //first blob - ROW *row1, //row it came from - PBLOB *blob2, //other blob - ROW *row2); - BOOL8 compare_blob_pairs( //blob processor - BLOCK *, - ROW *row, //row it came from - WERD *, - PBLOB *blob //blob to compare - ); - //// fixxht.cpp /////////////////////////////////////////////////////// - void check_block_occ(WERD_RES *word_res); + // Applies the box file based on the image name fname, and resegments + // the words in the block_list (page), with: + // blob-mode: one blob per line in the box file, words as input. + // word/line-mode: one blob per space-delimited unit after the #, and one word + // per line in the box file. (See comment above for box file format.) + // If find_segmentation is true, (word/line mode) then the classifier is used + // to re-segment words/lines to match the space-delimited truth string for + // each box. In this case, the input box may be for a word or even a whole + // text line, and the output words will contain multiple blobs corresponding + // to the space-delimited input string. + // With find_segmentation false, no classifier is needed, but the chopper + // can still be used to correctly segment touching characters with the help + // of the input boxes. + // In the returned PAGE_RES, the WERD_RES are setup as they would be returned + // from normal classification, ie. with a word, chopped_word, rebuild_word, + // seam_array, denorm, box_word, and best_state, but NO best_choice or + // raw_choice, as they would require a UNICHARSET, which we aim to avoid. + // Instead, the correct_text member of WERD_RES is set, and this may be later + // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords + // is not required before calling ApplyBoxTraining. + PAGE_RES* ApplyBoxes(const STRING& fname, bool find_segmentation, + BLOCK_LIST *block_list); + // Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: + // All fuzzy spaces are removed, and all the words are maximally chopped. + PAGE_RES* SetupApplyBoxes(BLOCK_LIST *block_list); + // Tests the chopper by exhaustively running chop_one_blob. + // The word_res will contain filled chopped_word, seam_array, denorm, + // box_word and best_state for the maximally chopped word. + void MaximallyChopWord(BLOCK* block, ROW* row, WERD_RES* word_res); + // Gather consecutive blobs that match the given box into the best_state + // and corresponding correct_text. + // Fights over which box owns which blobs are settled by pre-chopping and + // applying the blobs to box or next_box with the least non-overlap. + // Returns false if the box was in error, which can only be caused by + // failing to find an appropriate blob for a box. + // This means that occasionally, blobs may be incorrectly segmented if the + // chopper fails to find a suitable chop point. + bool ResegmentCharBox(PAGE_RES* page_res, + const TBOX& box, const TBOX& next_box, + const char* correct_text); + // Consume all source blobs that strongly overlap the given box, + // putting them into a new word, with the correct_text label. + // Fights over which box owns which blobs are settled by + // applying the blobs to box or next_box with the least non-overlap. + // Returns false if the box was in error, which can only be caused by + // failing to find an overlapping blob for a box. + bool ResegmentWordBox(BLOCK_LIST *block_list, + const TBOX& box, const TBOX& next_box, + const char* correct_text); + // Resegments the words by running the classifier in an attempt to find the + // correct segmentation that produces the required string. + void ReSegmentByClassification(PAGE_RES* page_res); + // Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID. + // Returns false if an invalid UNICHAR_ID is encountered. + bool ConvertStringToUnichars(const char* utf8, + GenericVector* class_ids); + // Resegments the word to achieve the target_text from the classifier. + // Returns false if the re-segmentation fails. + // Uses brute-force combination of upto kMaxGroupSize adjacent blobs, and + // applies a full search on the classifier results to find the best classified + // segmentation. As a compromise to obtain better recall, 1-1 ambigiguity + // substitutions ARE used. + bool FindSegmentation(const GenericVector& target_text, + WERD_RES* word_res); + // Recursive helper to find a match to the target_text (from text_index + // position) in the choices (from choices_pos position). + // Choices is an array of GenericVectors, of length choices_length, with each + // element representing a starting position in the word, and the + // GenericVector holding classification results for a sequence of consecutive + // blobs, with index 0 being a single blob, index 1 being 2 blobs etc. + void SearchForText(const GenericVector* choices, + int choices_pos, int choices_length, + const GenericVector& target_text, + int text_index, + float rating, GenericVector* segmentation, + float* best_rating, GenericVector* best_segmentation); + // Counts up the labelled words and the blobs within. + // Deletes all unused or emptied words, counting the unused ones. + // Resets W_BOL and W_EOL flags correctly. + // Builds the rebuild_word and rebuilds the box_word. + void TidyUp(PAGE_RES* page_res); + // Logs a bad box by line in the box file and box coords. + void ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch, + const char *err_msg); + // Creates a fake best_choice entry in each WERD_RES with the correct text. + void CorrectClassifyWords(PAGE_RES* page_res); + // Call LearnWord to extract features for labelled blobs within each word. + // Features are written to the given filename. + void ApplyBoxTraining(const STRING& filename, PAGE_RES* page_res); + + //// fixxht.cpp /////////////////////////////////////////////////////// + // Returns the number of misfit blob tops in this word. + int CountMisfitTops(WERD_RES *word_res); + // Returns a new x-height in pixels (original image coords) that is + // maximally compatible with the result in word_res. + // Returns 0.0f if no x-height is found that is better than the current + // estimate. + float ComputeCompatibleXheight(WERD_RES *word_res); //// Data members /////////////////////////////////////////////////////// + // TODO(ocr-team): Remove obsolete parameters. BOOL_VAR_H(tessedit_resegment_from_boxes, false, "Take segmentation and labeling from box file"); + BOOL_VAR_H(tessedit_resegment_from_line_boxes, false, + "Conversion of word/line box file to char box file"); BOOL_VAR_H(tessedit_train_from_boxes, false, "Generate training data from boxed chars"); + BOOL_VAR_H(tessedit_make_boxes_from_boxes, false, + "Generate more boxes from boxed chars"); BOOL_VAR_H(tessedit_dump_pageseg_images, false, - "Dump itermediate images made during page segmentation"); - INT_VAR_H(tessedit_pageseg_mode, 2, - "Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char" - " (Values from PageSegMode enum in baseapi.h)"); - INT_VAR_H(tessedit_accuracyvspeed, 0, - "Accuracy V Speed tradeoff: 0 fastest, 100 most accurate" - " (Values from AccuracyVSpeed enum in baseapi.h)"); - BOOL_VAR_H(tessedit_train_from_boxes_word_level, false, - "Generate training data from boxed chars at word level."); + "Dump intermediate images made during page segmentation"); + INT_VAR_H(tessedit_pageseg_mode, PSM_SINGLE_BLOCK, + "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," + " 5=line, 6=word, 7=char" + " (Values from PageSegMode enum in publictypes.h)"); + INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY, + "Which OCR engine(s) to run (Tesseract, Cube, both). Defaults" + " to loading and running only Tesseract (no Cube, no combiner)." + " (Values from OcrEngineMode enum in tesseractclass.h)"); STRING_VAR_H(tessedit_char_blacklist, "", "Blacklist of chars not to recognize"); STRING_VAR_H(tessedit_char_whitelist, "", "Whitelist of chars to recognize"); - BOOL_VAR_H(global_tessedit_ambigs_training, false, + BOOL_VAR_H(tessedit_ambigs_training, false, "Perform training for ambiguities"); + STRING_VAR_H(tessedit_write_params_to_file, "", + "Write all parameters to the given file."); + BOOL_VAR_H(tessedit_adapt_to_char_fragments, true, + "Adapt to words that contain " + " a character composed form fragments"); + BOOL_VAR_H(tessedit_adaption_debug, false, + "Generate and print debug information for adaption"); + BOOL_VAR_H(applybox_rebalance, true, "Drop dead"); + INT_VAR_H(applybox_debug, 1, "Debug level"); + INT_VAR_H(applybox_page, 0, "Page number to apply boxes from"); + STRING_VAR_H(applybox_test_exclusions, "", "Chars ignored for testing"); + double_VAR_H(applybox_error_band, 0.15, "Err band as fract of xht"); + STRING_VAR_H(applybox_exposure_pattern, ".exp", + "Exposure value follows this pattern in the image" + " filename. The name of the image files are expected" + " to be in the form [lang].[fontname].exp[num].tif"); + BOOL_VAR_H(applybox_learn_chars_and_char_frags_mode, false, + "Learn both character fragments (as is done in the" + " special low exposure mode) as well as unfragmented" + " characters."); + BOOL_VAR_H(applybox_learn_ngrams_mode, false, + "Each bounding box is assumed to contain ngrams. Only" + " learn the ngrams whose outlines overlap horizontally."); + BOOL_VAR_H(tessedit_print_text, false, "Write text to stdout"); + BOOL_VAR_H(tessedit_draw_words, false, "Draw source words"); + BOOL_VAR_H(tessedit_draw_outwords, false, "Draw output words"); + BOOL_VAR_H(tessedit_training_tess, false, "Call Tess to learn blobs"); + BOOL_VAR_H(tessedit_dump_choices, false, "Dump char choices"); + BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true, + "Try to improve fuzzy spaces"); + BOOL_VAR_H(tessedit_unrej_any_wd, false, + "Dont bother with word plausibility"); + BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?"); + BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height"); + BOOL_VAR_H(tessedit_enable_doc_dict, true, + "Add words to the document dictionary"); + BOOL_VAR_H(tessedit_debug_fonts, false, "Output font info per char"); + BOOL_VAR_H(tessedit_debug_block_rejection, false, "Block and Row stats"); + INT_VAR_H(debug_x_ht_level, 0, "Reestimate debug"); + BOOL_VAR_H(debug_acceptable_wds, false, "Dump word pass/fail chk"); + STRING_VAR_H(chs_leading_punct, "('`\"", "Leading punctuation"); + STRING_VAR_H(chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation"); + STRING_VAR_H(chs_trailing_punct2, ")'`\"", "2nd Trailing punctuation"); + double_VAR_H(quality_rej_pc, 0.08, "good_quality_doc lte rejection limit"); + double_VAR_H(quality_blob_pc, 0.0, "good_quality_doc gte good blobs limit"); + double_VAR_H(quality_outline_pc, 1.0, + "good_quality_doc lte outline error limit"); + double_VAR_H(quality_char_pc, 0.95, "good_quality_doc gte good char limit"); + INT_VAR_H(quality_min_initial_alphas_reqd, 2, "alphas in a good word"); + BOOL_VAR_H(tessedit_tess_adapt_to_rejmap, false, + "Use reject map to control Tesseract adaption"); + INT_VAR_H(tessedit_tess_adaption_mode, 0x27, + "Adaptation decision algorithm for tess"); + BOOL_VAR_H(tessedit_minimal_rej_pass1, false, + "Do minimal rejection on pass 1 output"); + BOOL_VAR_H(tessedit_test_adaption, false, "Test adaption criteria"); + BOOL_VAR_H(tessedit_matcher_log, false, "Log matcher activity"); + INT_VAR_H(tessedit_test_adaption_mode, 3, + "Adaptation decision algorithm for tess"); + BOOL_VAR_H(save_best_choices, false, + "Save the results of the recognition step" + " (blob_choices) within the corresponding WERD_CHOICE"); + BOOL_VAR_H(test_pt, false, "Test for point"); + double_VAR_H(test_pt_x, 99999.99, "xcoord"); + double_VAR_H(test_pt_y, 99999.99, "ycoord"); + INT_VAR_H(cube_debug_level, 1, "Print cube debug info."); + STRING_VAR_H(outlines_odd, "%| ", "Non standard number of outlines"); + STRING_VAR_H(outlines_2, "ij!?%\":;", "Non standard number of outlines"); + BOOL_VAR_H(docqual_excuse_outline_errs, false, + "Allow outline errs in unrejection?"); + BOOL_VAR_H(tessedit_good_quality_unrej, true, + "Reduce rejection on good docs"); + BOOL_VAR_H(tessedit_use_reject_spaces, true, "Reject spaces?"); + double_VAR_H(tessedit_reject_doc_percent, 65.00, + "%rej allowed before rej whole doc"); + double_VAR_H(tessedit_reject_block_percent, 45.00, + "%rej allowed before rej whole block"); + double_VAR_H(tessedit_reject_row_percent, 40.00, + "%rej allowed before rej whole row"); + double_VAR_H(tessedit_whole_wd_rej_row_percent, 70.00, + "Number of row rejects in whole word rejects" + "which prevents whole row rejection"); + BOOL_VAR_H(tessedit_preserve_blk_rej_perfect_wds, true, + "Only rej partially rejected words in block rejection"); + BOOL_VAR_H(tessedit_preserve_row_rej_perfect_wds, true, + "Only rej partially rejected words in row rejection"); + BOOL_VAR_H(tessedit_dont_blkrej_good_wds, false, + "Use word segmentation quality metric"); + BOOL_VAR_H(tessedit_dont_rowrej_good_wds, false, + "Use word segmentation quality metric"); + INT_VAR_H(tessedit_preserve_min_wd_len, 2, + "Only preserve wds longer than this"); + BOOL_VAR_H(tessedit_row_rej_good_docs, true, + "Apply row rejection to good docs"); + double_VAR_H(tessedit_good_doc_still_rowrej_wd, 1.1, + "rej good doc wd if more than this fraction rejected"); + BOOL_VAR_H(tessedit_reject_bad_qual_wds, true, + "Reject all bad quality wds"); + BOOL_VAR_H(tessedit_debug_doc_rejection, false, "Page stats"); + BOOL_VAR_H(tessedit_debug_quality_metrics, false, + "Output data to debug file"); + BOOL_VAR_H(bland_unrej, false, "unrej potential with no chekcs"); + double_VAR_H(quality_rowrej_pc, 1.1, + "good_quality_doc gte good char limit"); + BOOL_VAR_H(unlv_tilde_crunching, true, + "Mark v.bad words for tilde crunch"); + BOOL_VAR_H(crunch_early_merge_tess_fails, true, "Before word crunch?"); + BOOL_VAR_H(crunch_early_convert_bad_unlv_chs, false, "Take out ~^ early?"); + double_VAR_H(crunch_terrible_rating, 80.0, "crunch rating lt this"); + BOOL_VAR_H(crunch_terrible_garbage, true, "As it says"); + double_VAR_H(crunch_poor_garbage_cert, -9.0, + "crunch garbage cert lt this"); + double_VAR_H(crunch_poor_garbage_rate, 60, "crunch garbage rating lt this"); + double_VAR_H(crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this"); + double_VAR_H(crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this"); + BOOL_VAR_H(crunch_pot_garbage, true, "POTENTIAL crunch garbage"); + double_VAR_H(crunch_del_rating, 60, "POTENTIAL crunch rating lt this"); + double_VAR_H(crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this"); + double_VAR_H(crunch_del_min_ht, 0.7, "Del if word ht lt xht x this"); + double_VAR_H(crunch_del_max_ht, 3.0, "Del if word ht gt xht x this"); + double_VAR_H(crunch_del_min_width, 3.0, "Del if word width lt xht x this"); + double_VAR_H(crunch_del_high_word, 1.5, + "Del if word gt xht x this above bl"); + double_VAR_H(crunch_del_low_word, 0.5, "Del if word gt xht x this below bl"); + double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this"); + INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch"); + INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed"); + BOOL_VAR_H(crunch_leave_ok_strings, true, "Dont touch sensible strings"); + BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring"); + BOOL_VAR_H(crunch_leave_accept_strings, false, + "Dont pot crunch sensible strings"); + BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures"); + INT_VAR_H(crunch_leave_lc_strings, 4, + "Dont crunch words with long lower case strings"); + INT_VAR_H(crunch_leave_uc_strings, 4, + "Dont crunch words with long lower case strings"); + INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions"); + INT_VAR_H(crunch_debug, 0, "As it says"); + INT_VAR_H(fixsp_non_noise_limit, 1, + "How many non-noise blbs either side?"); + double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this"); + BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctation joins"); + INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing"); + INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug"); + STRING_VAR_H(numeric_punctuation, ".,", + "Punct. chs expected WITHIN numbers"); + INT_VAR_H(x_ht_acceptance_tolerance, 8, + "Max allowed deviation of blob top outside of font data"); + INT_VAR_H(x_ht_min_change, 8, "Min change in xht before actually trying it"); + BOOL_VAR_H(tessedit_write_block_separators, false, + "Write block separators in output"); + BOOL_VAR_H(tessedit_write_raw_output, false, + "Write raw stuff to name.raw"); + BOOL_VAR_H(tessedit_write_output, false, "Write text to name.txt"); + BOOL_VAR_H(tessedit_write_ratings, false, + "Return ratings in IPEOCRAPI data"); + BOOL_VAR_H(tessedit_write_rep_codes, false, + "Write repetition char code"); + BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file"); + BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file"); + STRING_VAR_H(unrecognised_char, "|", + "Output char for unidentified blobs"); + INT_VAR_H(suspect_level, 99, "Suspect marker level"); + INT_VAR_H(suspect_space_level, 100, + "Min suspect level for rejecting spaces"); + INT_VAR_H(suspect_short_words, 2, + "Dont Suspect dict wds longer than this"); + BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected"); + double_VAR_H(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit"); + double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit"); + BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures"); + BOOL_VAR_H(tessedit_zero_rejection, false, "Dont reject ANYTHING"); + BOOL_VAR_H(tessedit_word_for_word, false, + "Make output have exactly one word per WERD"); + BOOL_VAR_H(tessedit_zero_kelvin_rejection, false, + "Dont reject ANYTHING AT ALL"); + BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same"); + INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm"); + INT_VAR_H(tessedit_ok_mode, 5, "Acceptance decision algorithm"); + BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug"); + BOOL_VAR_H(tessedit_flip_0O, true, "Contextual 0O O0 flips"); + double_VAR_H(tessedit_lower_flip_hyphen, 1.5, + "Aspect ratio dot/hyphen test"); + double_VAR_H(tessedit_upper_flip_hyphen, 1.8, + "Aspect ratio dot/hyphen test"); + BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector"); + BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test"); + BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Dont double check"); + BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control"); + BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control"); + BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control"); + BOOL_VAR_H(rej_use_sensible_wd, false, "Extend permuter check"); + BOOL_VAR_H(rej_alphas_in_number_perm, false, "Extend permuter check"); + double_VAR_H(rej_whole_of_mostly_reject_word_fract, 0.85, "if >this fract"); + INT_VAR_H(tessedit_image_border, 2, "Rej blbs near image edge limit"); + STRING_VAR_H(ok_repeated_ch_non_alphanum_wds, "-?*\075", + "Allow NN to unrej"); + STRING_VAR_H(conflict_set_I_l_1, "Il1[]", "Il1 conflict set"); + INT_VAR_H(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this"); + BOOL_VAR_H(tessedit_create_boxfile, false, "Output text with boxes"); + BOOL_VAR_H(tessedit_read_image, true, "Ensure the image is read"); + INT_VAR_H(tessedit_serial_unlv, 0, + "0->Whole page, 1->serial no adapt, 2->serial with adapt"); + INT_VAR_H(tessedit_page_number, -1, + "-1 -> All pages, else specifc page to process"); + BOOL_VAR_H(tessedit_write_images, false, "Capture the image from the IPE"); + BOOL_VAR_H(interactive_mode, false, "Run interactively?"); + STRING_VAR_H(file_type, ".tif", "Filename extension"); + INT_VAR_H(testedit_match_debug, 0, "Integer match debug ctrl"); + BOOL_VAR_H(tessedit_override_permuter, true, "According to dict_word"); + INT_VAR_H(tessdata_manager_debug_level, 0, + "Debug level for TessdataManager functions."); + // Min acceptable orientation margin (difference in scores between top and 2nd + // choice in OSResults::orientations) to believe the page orientation. + double_VAR_H(min_orientation_margin, 12.0, + "Min acceptable orientation margin"); + //// ambigsrecog.cpp ///////////////////////////////////////////////////////// - FILE *init_ambigs_training(const STRING &fname); - void ambigs_training_segmented(const STRING &fname, - PAGE_RES *page_res, - volatile ETEXT_DESC *monitor, - FILE *output_file); - void ambigs_classify_and_output(PAGE_RES_IT *page_res_it, + FILE *init_recog_training(const STRING &fname); + void recog_training_segmented(const STRING &fname, + PAGE_RES *page_res, + volatile ETEXT_DESC *monitor, + FILE *output_file); + void ambigs_classify_and_output(WERD_RES *werd_res, + ROW_RES *row_res, + BLOCK_RES *block_res, const char *label, FILE *output_file); + + inline CubeRecoContext *GetCubeRecoContext() { return cube_cntxt_; } + private: + // The filename of a backup config file. If not null, then we currently + // have a temporary debug config file loaded, and backup_config_file_ + // will be loaded, and set to null when debug is complete. + const char* backup_config_file_; + // The filename of a config file to read when processing a debug word. + STRING word_config_; Pix* pix_binary_; + Pix* pix_grey_; + // The boolean records if the currently set + // pix_binary_ member has been modified due to any processing so that this + // may hurt Cube's recognition phase. + bool orig_image_changed_; + // Page segmentation/layout + Textord textord_; + // True if the primary language uses right_to_left reading order. + bool right_to_left_; FCOORD deskew_; FCOORD reskew_; - bool hindi_image_; + TesseractStats stats_; + // Cube objects. + CubeRecoContext* cube_cntxt_; + TesseractCubeCombiner *tess_cube_combiner_; }; } // namespace tesseract diff --git a/ccmain/tessio.h b/ccmain/tessio.h deleted file mode 100644 index c48ec0f6b7..0000000000 --- a/ccmain/tessio.h +++ /dev/null @@ -1,210 +0,0 @@ -/********************************************************************** - * File: tessio.h (Formerly tessread.h) - * Description: Read/write Tesseract format row files. - * Author: Ray Smith - * Created: Wed Oct 09 15:02:46 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSIO_H -#define TESSIO_H - -#include -#include "tessclas.h" -#include "notdll.h" - -/** - * open read & close - * @param name file name - * @param topright corner - */ -TEXTROW *get_tess_row_file( - const char *name, - TPOINT *topright - ); -/** - * open read & close - * @param name file name - * @param topright corner - */ -TBLOB *get_tess_blob_file( - const char *name, - TPOINT *topright - ); -/** - * read row file - * @param gphfd file to read - * @param count number expected - * @param imagesize size of image - */ -TEXTROW *readrows( - int gphfd, - int count, - TPOINT *imagesize - ); -/** - * read some words - * @param gphfd file to read - * @param count number expected - * @param row row it comes from - * @param imagesize size of image - */ -TWERD *readwords( - int gphfd, - int count, - TEXTROW *row, - TPOINT *imagesize - ); -/** - * read some blobs - * @param gphfd file to read - * @param count number expected - * @param imagesize size of image - */ -TBLOB *readblobs( - int gphfd, - int count, - TPOINT *imagesize - ); -/** - * get a string - * @param gphfd file to read - * @param ratingspace size to read - */ -char *readratings( - int gphfd, - int ratingspace - ); -/** - * read some outlines - * @param gphfd file to read - * @param outlines array of ptrs - * @param outlinecount no to read - */ -void readoutlines( - int gphfd, - TESSLINE **outlines, - int outlinecount - ); -/** - * read with testing - * @param fd file to read - * @param start buffer to write - * @param size amount to write - * @param checkeof give error on eof? - */ -int readgph( - int fd, - void *start, - int size, - int checkeof - ); -/** - * write a row - * @param name file name - * @param row row to write - */ -void write_row( - FILE *name, - TEXTROW *row - ); -/** - * write special row - * @param name file name - * @param row row to write - * @param wordcount number of words to go - */ -void write_error_row( - FILE *name, - TEXTROW *row, - int wordcount - ); -/** - * write special blob - * @param name file name - * @param blob blob to write - * @param charlist true chars - * @param charcount number of true chars - */ -void write_error_blob( - FILE *name, - TBLOB *blob, - char *charlist, - int charcount - ); -/** - * write special word - * @param name file name - * @param word word to write - * @param charlist true chars - * @param charcount number of true chars - */ -void write_error_word( - FILE *name, - TWERD *word, - char *charlist, - int charcount - ); -/** - * write a blob - * @param name file to write - * @param blob blob to write - */ -void writeblob( - FILE *name, - TBLOB *blob - ); -/** - * serialize - * @param name file to write to - * @param blob current blob - * @param outline current outline - * @param outlineno current serial no - */ -void serial_outlines( - FILE *name, - TBLOB *blob, - register TESSLINE *outline, - int *outlineno - ); -/** - * count loopsize - * @param vector vectors to count - */ -int countloop( - register BYTEVEC *vector - ); -/** - * get serial no - * @param outline start of search - * @param target outline to find - * @param serial serial no so far - */ -int outlineserial( - register TESSLINE *outline, - register TESSLINE *target, - int serial - ); -/** - * Interface to fwrite - * @param name file to write - * @param start buffer to write - * @param size amount to write - */ -void writegph( - FILE *name, - void *start, - int size - ); -#endif diff --git a/ccmain/tessvars.cpp b/ccmain/tessvars.cpp index 62bedf4aec..ee938010cf 100644 --- a/ccmain/tessvars.cpp +++ b/ccmain/tessvars.cpp @@ -17,22 +17,9 @@ * **********************************************************************/ -#include "mfcpch.h" -#include "tessvars.h" - -#define EXTERN +#include -EXTERN INT_VAR (tessedit_adapt_kludge, 0, -"Use acceptable result or dangambigs"); -EXTERN BOOL_VAR (interactive_mode, FALSE, "Run interactively?"); -EXTERN BOOL_VAR (edit_variables, FALSE, "Variables Editor Window?"); -// xiaofan EXTERN STRING_VAR(file_type,".bl","Filename extension"); -EXTERN STRING_VAR (file_type, ".tif", "Filename extension"); -INT_VAR (testedit_match_debug, 0, "Integer match debug ctrl"); -EXTERN INT_VAR (tessedit_dangambigs_chop, FALSE, -"Use UnicharAmbigs to direct chop"); -EXTERN INT_VAR (tessedit_dangambigs_assoc, FALSE, -"Use UnicharAmbigs to direct assoc"); +#include "mfcpch.h" +#include "tessvars.h" -EXTERN IMAGE page_image; //image of page -EXTERN FILE *debug_fp = stderr; //write debug stuff here +FILE *debug_fp = stderr; // write debug stuff here diff --git a/ccmain/tessvars.h b/ccmain/tessvars.h index d0f0ec259b..eeaa358573 100644 --- a/ccmain/tessvars.h +++ b/ccmain/tessvars.h @@ -20,29 +20,10 @@ #ifndef TESSVARS_H #define TESSVARS_H -#include "varable.h" +#include + #include "img.h" -#include "tordmain.h" #include "notdll.h" -extern INT_VAR_H (tessedit_adapt_kludge, 0, -"Use acceptable result or dangambigs"); -extern BOOL_VAR_H (interactive_mode, FALSE, "Run interactively?"); -extern BOOL_VAR_H (edit_variables, FALSE, "Variables Editor Window?"); -//xiaofan extern STRING_VAR_H(file_type,".bl","Filename extension"); -extern STRING_VAR_H (file_type, ".tif", "Filename extension"); -extern INT_VAR_H (tessedit_truncate_wordchoice_log, 10, -"Max words to keep in list"); -extern INT_VAR_H (testedit_match_debug, 0, "Integer match debug ctrl"); -extern INT_VAR_H (tessedit_truncate_chopper, 1, -"Shorten chopper seam search"); -extern INT_VAR_H (tessedit_fix_sideways_chops, 1, -"Fix sideways chop problem"); -extern INT_VAR_H (tessedit_dangambigs_chop, FALSE, -"Use UnicharAmbigs to direct chop"); -extern INT_VAR_H (tessedit_dangambigs_assoc, FALSE, -"Use UnicharAmbigs to direct assoc"); - -extern IMAGE page_image; //image of page -extern FILE *debug_fp; //write debug stuff here +extern FILE *debug_fp; // write debug stuff here #endif diff --git a/ccmain/tfacep.h b/ccmain/tfacep.h index 6041f7bee6..80c9bd8fad 100644 --- a/ccmain/tfacep.h +++ b/ccmain/tfacep.h @@ -17,45 +17,23 @@ * **********************************************************************/ -#ifndef TFACEP_H -#define TFACEP_H +#ifndef TFACEP_H +#define TFACEP_H -#include "hosthplb.h" -#include "tessclas.h" -#include "tessarray.h" -#include "tstruct.h" -#include "notdll.h" -#include "choices.h" +#include "hosthplb.h" +#include "blobs.h" +#include "tessarray.h" +#include "tstruct.h" +#include "notdll.h" #include "oldlist.h" -#include "tface.h" #include "permute.h" -#include "adaptmatch.h" #include "blobclass.h" #include "stopper.h" #include "associate.h" #include "chop.h" -#include "expandblob.h" -#include "tordvars.h" -#include "metrics.h" -#include "tface.h" -#include "badwords.h" #include "structures.h" typedef void (*TESS_TESTER) (TBLOB *, BOOL8, char *, inT32, LIST); -typedef LIST (*TESS_MATCHER) (TBLOB *, TBLOB *, TBLOB *, void *, TEXTROW *); +typedef LIST (*TESS_MATCHER) (TBLOB *, TBLOB *, TBLOB *); -extern TEXTROW normalized_row; -extern int display_ratings; - -#if 0 -#define strsave(s) \ - ((s) ? \ - ((char*) strcpy ((char*)alloc_string (strlen(s)+1), s)) : \ - (NULL)) -#endif - -#define BOLD_ON "&dB(s3B" -#define BOLD_OFF "&d@(s0B" -#define UNDERLINE_ON "&dD" -#define UNDERLINE_OFF "&d@" #endif diff --git a/ccmain/tfacepp.cpp b/ccmain/tfacepp.cpp index 57c085e4e5..72c3661baa 100644 --- a/ccmain/tfacepp.cpp +++ b/ccmain/tfacepp.cpp @@ -39,11 +39,6 @@ #include "reject.h" #include "tesseractclass.h" -#define EXTERN - -EXTERN BOOL_VAR (tessedit_override_permuter, TRUE, "According to dict_word"); - - #define MAX_UNDIVIDED_LENGTH 24 @@ -55,70 +50,52 @@ EXTERN BOOL_VAR (tessedit_override_permuter, TRUE, "According to dict_word"); * Convert the output back to editor form. **********************************************************************/ namespace tesseract { -WERD_CHOICE *Tesseract::recog_word( //recog one owrd - WERD *word, //word to do - DENORM *denorm, //de-normaliser - //matcher function - POLY_MATCHER matcher, - POLY_TESTER tester, //tester function - POLY_TESTER trainer, //trainer function - BOOL8 testing, //true if answer driven - //raw result - WERD_CHOICE *&raw_choice, - //list of blob lists - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword //bln word output - ) { - WERD_CHOICE *word_choice; - uinT8 perm_type; - uinT8 real_dict_perm_type; - - if (word->blob_list ()->empty ()) { - word_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f, - TOP_CHOICE_PERM, unicharset); - raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f, - TOP_CHOICE_PERM, unicharset); - outword = word->poly_copy (denorm->row ()->x_height ()); - } - else - word_choice = recog_word_recursive (word, denorm, matcher, tester, - trainer, testing, raw_choice, - blob_choices, outword); - if ((word_choice->length() != outword->blob_list()->length()) || - (word_choice->length() != blob_choices->length())) { - tprintf - ("recog_word ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n", - word_choice->debug_string(unicharset).string(), - word_choice->length(), outword->blob_list()->length(), - blob_choices->length()); +void Tesseract::recog_word(WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices) { + ASSERT_HOST(word->chopped_word->blobs != NULL); + recog_word_recursive(word, blob_choices); + word->SetupBoxWord(); + if ((word->best_choice->length() != word->box_word->length()) || + (word->best_choice->length() != blob_choices->length())) { + tprintf("recog_word ASSERT FAIL String:\"%s\"; " + "Strlen=%d; #Blobs=%d; #Choices=%d\n", + word->best_choice->debug_string(unicharset).string(), + word->best_choice->length(), word->box_word->length(), + blob_choices->length()); } - ASSERT_HOST(word_choice->length() == outword->blob_list()->length()); - ASSERT_HOST(word_choice->length() == blob_choices->length()); - - /* Copy any reject blobs into the outword */ - outword->rej_blob_list()->deep_copy(word->rej_blob_list(), &PBLOB::deep_copy); - + ASSERT_HOST(word->best_choice->length() == word->box_word->length()); + ASSERT_HOST(word->best_choice->length() == blob_choices->length()); if (tessedit_override_permuter) { /* Override the permuter type if a straight dictionary check disagrees. */ - perm_type = word_choice->permuter(); + uinT8 perm_type = word->best_choice->permuter(); if ((perm_type != SYSTEM_DAWG_PERM) && (perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) { - real_dict_perm_type = dict_word(*word_choice); + uinT8 real_dict_perm_type = dict_word(*word->best_choice); if (((real_dict_perm_type == SYSTEM_DAWG_PERM) || (real_dict_perm_type == FREQ_DAWG_PERM) || (real_dict_perm_type == USER_DAWG_PERM)) && - (alpha_count(word_choice->unichar_string().string(), - word_choice->unichar_lengths().string()) > 0)) { - word_choice->set_permuter (real_dict_perm_type); // use dict perm + (alpha_count(word->best_choice->unichar_string().string(), + word->best_choice->unichar_lengths().string()) > 0)) { + word->best_choice->set_permuter(real_dict_perm_type); // use dict perm } } - if (tessedit_rejection_debug && perm_type != word_choice->permuter ()) { - tprintf ("Permuter Type Flipped from %d to %d\n", - perm_type, word_choice->permuter ()); + if (tessedit_rejection_debug && + perm_type != word->best_choice->permuter()) { + tprintf("Permuter Type Flipped from %d to %d\n", + perm_type, word->best_choice->permuter()); } } - assert ((word_choice == NULL) == (raw_choice == NULL)); - return word_choice; + // Factored out from control.cpp + ASSERT_HOST((word->best_choice == NULL) == (word->raw_choice == NULL)); + if (word->best_choice == NULL || word->best_choice->length() == 0 || + strspn(word->best_choice->unichar_string().string(), " ") == + word->best_choice->length()) { + word->tess_failed = true; + word->reject_map.initialise(word->box_word->length()); + word->reject_map.rej_word_tess_failure(); + } else { + word->tess_failed = false; + } } @@ -128,105 +105,65 @@ WERD_CHOICE *Tesseract::recog_word( //recog one owrd * Convert the word to tess form and pass it to the tess segmenter. * Convert the output back to editor form. **********************************************************************/ -WERD_CHOICE * -Tesseract::recog_word_recursive( - WERD *word, // word to do - DENORM *denorm, // de-normaliser - POLY_MATCHER matcher, // matcher function - POLY_TESTER tester, // tester function - POLY_TESTER trainer, // trainer function - BOOL8 testing, // true if answer driven - WERD_CHOICE *&raw_choice, // raw result - BLOB_CHOICE_LIST_CLIST *blob_choices, // list of blob lists - WERD *&outword // bln word output - ) { - inT32 initial_blob_choice_len; - inT32 word_length; // no of blobs - STRING word_string; // converted from tess - STRING word_string_lengths; - BLOB_CHOICE_LIST_VECTOR *tess_ratings; // tess results - TWERD *tessword; // tess format - BLOB_CHOICE_LIST_C_IT blob_choices_it; // iterator - - tess_matcher = matcher; // install matcher - tess_tester = testing ? tester : NULL; - tess_trainer = testing ? trainer : NULL; - tess_denorm = denorm; - tess_word = word; - // blob_matchers[1]=call_matcher; - if (word->blob_list ()->length () > MAX_UNDIVIDED_LENGTH) { - return split_and_recog_word (word, denorm, matcher, tester, trainer, - testing, raw_choice, blob_choices, - outword); - } else { - UNICHAR_ID space_id = unicharset.unichar_to_id(" "); - WERD_CHOICE *best_choice = new WERD_CHOICE(); - raw_choice = new WERD_CHOICE(); - initial_blob_choice_len = blob_choices->length(); - tessword = make_tess_word (word, NULL); - tess_ratings = cc_recog(tessword, best_choice, raw_choice, - testing && tester != NULL, - testing && trainer != NULL, - word->flag(W_EOL)); - - outword = make_ed_word (tessword, word); // convert word - if (outword == NULL) { - outword = word->poly_copy (denorm->row ()->x_height ()); - } - delete_word(tessword); // get rid of it - word_length = outword->blob_list()->length(); // no of blobs +void Tesseract::recog_word_recursive(WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices) { + int word_length = word->chopped_word->NumBlobs(); // no of blobs + if (word_length > MAX_UNDIVIDED_LENGTH) { + return split_and_recog_word(word, blob_choices); + } + int initial_blob_choice_len = blob_choices->length(); + BLOB_CHOICE_LIST_VECTOR* tess_ratings = cc_recog(word); - // Put BLOB_CHOICE_LISTs from tess_ratings into blob_choices. - blob_choices_it.set_to_list(blob_choices); - for (int i = 0; i < tess_ratings->length(); ++i) { - blob_choices_it.add_to_end(tess_ratings->get(i)); - } - delete tess_ratings; + // Put BLOB_CHOICE_LISTs from tess_ratings into blob_choices. + BLOB_CHOICE_LIST_C_IT blob_choices_it(blob_choices); + for (int i = 0; i < tess_ratings->length(); ++i) { + blob_choices_it.add_to_end(tess_ratings->get(i)); + } + delete tess_ratings; - // Pad raw_choice with spaces if needed. - if (raw_choice->length() < word_length) { - while (raw_choice->length() < word_length) { - raw_choice->append_unichar_id(space_id, 1, 0.0, - raw_choice->certainty()); - } - raw_choice->populate_unichars(unicharset); + word_length = word->rebuild_word->NumBlobs(); // No of blobs in output. + // Pad raw_choice with spaces if needed. + if (word->raw_choice->length() < word_length) { + UNICHAR_ID space_id = unicharset.unichar_to_id(" "); + while (word->raw_choice->length() < word_length) { + word->raw_choice->append_unichar_id(space_id, 1, 0.0, + word->raw_choice->certainty()); } + word->raw_choice->populate_unichars(unicharset); + } - // Do sanity checks and minor fixes on best_choice. - if (best_choice->length() > word_length) { - tprintf("recog_word: Discarded long string \"%s\"" - " (%d characters vs %d blobs)\n", - best_choice->unichar_string().string (), - best_choice->length(), word_length); - best_choice->make_bad(); // should never happen - tprintf("Word is at (%g,%g)\n", - denorm->origin(), - denorm->y(word->bounding_box().bottom(), 0.0)); + // Do sanity checks and minor fixes on best_choice. + if (word->best_choice->length() > word_length) { + word->best_choice->make_bad(); // should never happen + tprintf("recog_word: Discarded long string \"%s\"" + " (%d characters vs %d blobs)\n", + word->best_choice->unichar_string().string(), + word->best_choice->length(), word_length); + tprintf("Word is at:"); + word->word->bounding_box().print(); + } + if (blob_choices->length() - initial_blob_choice_len != word_length) { + word->best_choice->make_bad(); // force rejection + tprintf("recog_word: Choices list len:%d; blob lists len:%d\n", + blob_choices->length(), word_length); + blob_choices_it.set_to_list(blob_choices); // list of lists + while (blob_choices->length() - initial_blob_choice_len < word_length) { + blob_choices_it.add_to_end(new BLOB_CHOICE_LIST()); // add a fake one + tprintf("recog_word: Added dummy choice list\n"); } - if (blob_choices->length() - initial_blob_choice_len != word_length) { - best_choice->make_bad(); // force rejection - tprintf ("recog_word: Choices list len:%d; blob lists len:%d\n", - blob_choices->length(), word_length); - blob_choices_it.set_to_list(blob_choices); // list of lists - while (blob_choices->length() - initial_blob_choice_len < word_length) { - blob_choices_it.add_to_end(new BLOB_CHOICE_LIST()); // add a fake one - tprintf("recog_word: Added dummy choice list\n"); - } - while (blob_choices->length() - initial_blob_choice_len > word_length) { - blob_choices_it.move_to_last(); // should never happen - delete blob_choices_it.extract(); - tprintf("recog_word: Deleted choice list\n"); - } + while (blob_choices->length() - initial_blob_choice_len > word_length) { + blob_choices_it.move_to_last(); // should never happen + delete blob_choices_it.extract(); + tprintf("recog_word: Deleted choice list\n"); } - if (best_choice->length() < word_length) { - while (best_choice->length() < word_length) { - best_choice->append_unichar_id(space_id, 1, 0.0, - best_choice->certainty()); - } - best_choice->populate_unichars(unicharset); + } + if (word->best_choice->length() < word_length) { + UNICHAR_ID space_id = unicharset.unichar_to_id(" "); + while (word->best_choice->length() < word_length) { + word->best_choice->append_unichar_id(space_id, 1, 0.0, + word->best_choice->certainty()); } - - return best_choice; + word->best_choice->populate_unichars(unicharset); } } @@ -234,143 +171,76 @@ Tesseract::recog_word_recursive( /********************************************************************** * split_and_recog_word * - * Convert the word to tess form and pass it to the tess segmenter. - * Convert the output back to editor form. + * Split the word into 2 smaller pieces at the largest gap. + * Recognize the pieces and stick the results back together. **********************************************************************/ -WERD_CHOICE * -Tesseract::split_and_recog_word( //recog one owrd - WERD *word, //word to do - DENORM *denorm, //de-normaliser - POLY_MATCHER matcher, //matcher function - POLY_TESTER tester, //tester function - POLY_TESTER trainer, //trainer function - BOOL8 testing, //true if answer driven - //raw result - WERD_CHOICE *&raw_choice, - //list of blob lists - BLOB_CHOICE_LIST_CLIST *blob_choices, - WERD *&outword //bln word output - ) { - // inT32 outword1_len; - // inT32 outword2_len; - WERD *first_word; //poly copy of word - WERD *second_word; //fabricated word - WERD *outword2; //2nd output word - PBLOB *blob; - WERD_CHOICE *result; //return value - WERD_CHOICE *result2; //output of 2nd word - WERD_CHOICE *raw_choice2; //raw version of 2nd - float gap; //blob gap - float bestgap; //biggest gap - PBLOB_LIST new_blobs; //list of gathered blobs - PBLOB_IT blob_it; - //iterator - PBLOB_IT new_blob_it = &new_blobs; - - first_word = word->poly_copy (denorm->row ()->x_height ()); - blob_it.set_to_list (first_word->blob_list ()); - bestgap = (float) -MAX_INT32; - while (!blob_it.at_last ()) { - blob = blob_it.data (); - //gap to next - gap = (float) blob_it.data_relative(1)->bounding_box().left() - - blob->bounding_box().right(); - blob_it.forward (); - if (gap > bestgap) { - bestgap = gap; //find biggest - new_blob_it = blob_it; //save position +void Tesseract::split_and_recog_word(WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices) { + // Find the biggest blob gap in the chopped_word. + int bestgap = -MAX_INT32; + TPOINT best_split_pt; + TBLOB* best_end = NULL; + TBLOB* prev_blob = NULL; + for (TBLOB* blob = word->chopped_word->blobs; blob != NULL; + blob = blob->next) { + if (prev_blob != NULL) { + TBOX prev_box = prev_blob->bounding_box(); + TBOX blob_box = blob->bounding_box(); + int gap = blob_box.left() - prev_box.right(); + if (gap > bestgap) { + bestgap = gap; + best_end = prev_blob; + best_split_pt.x = (prev_box.right() + blob_box.left()) / 2; + best_split_pt.y = (prev_box.top() + prev_box.bottom() + + blob_box.top() + blob_box.bottom()) / 4; + } } + prev_blob = blob; } - //take 2nd half - new_blobs.assign_to_sublist (&new_blob_it, &blob_it); - //make it a word - second_word = new WERD (&new_blobs, 1, NULL); - ASSERT_HOST (word->blob_list ()->length () == - first_word->blob_list ()->length () + - second_word->blob_list ()->length ()); - - result = recog_word_recursive (first_word, denorm, matcher, - tester, trainer, testing, raw_choice, - blob_choices, outword); - delete first_word; //done that one - result2 = recog_word_recursive (second_word, denorm, matcher, - tester, trainer, testing, raw_choice2, - blob_choices, outword2); - delete second_word; //done that too - *result += *result2; //combine ratings - delete result2; - *raw_choice += *raw_choice2; - delete raw_choice2; //finished with it - // outword1_len= outword->blob_list()->length(); - // outword2_len= outword2->blob_list()->length(); - outword->join_on (outword2); //join words - delete outword2; - // if ( outword->blob_list()->length() != outword1_len + outword2_len ) - // tprintf( "Split&Recog: part1len=%d; part2len=%d; combinedlen=%d\n", - // outword1_len, outword2_len, outword->blob_list()->length() ); - // ASSERT_HOST( outword->blob_list()->length() == outword1_len + outword2_len ); - return result; + ASSERT_HOST(best_end != NULL); + + // Make a copy of the word to put the 2nd half in. + WERD_RES* word2 = new WERD_RES(*word); + // Blow away the copied chopped_word, as we want to work with the blobs + // from the input chopped_word so the seam_arrays can be merged. + delete word2->chopped_word; + word2->chopped_word = new TWERD; + word2->chopped_word->blobs = best_end->next; + best_end->next = NULL; + // Make a new seamarray on both words. + free_seam_list(word->seam_array); + word->seam_array = start_seam_list(word->chopped_word->blobs); + word2->seam_array = start_seam_list(word2->chopped_word->blobs); + // Recognize the first part of the word. + recog_word_recursive(word, blob_choices); + // Recognize the second part of the word. + recog_word_recursive(word2, blob_choices); + // Tack the word2 outputs onto the end of the word outputs. + // New blobs might have appeared on the end of word1. + for (best_end = word->chopped_word->blobs; best_end->next != NULL; + best_end = best_end->next); + best_end->next = word2->chopped_word->blobs; + TBLOB* blob; + for (blob = word->rebuild_word->blobs; blob->next != NULL; blob = blob->next); + blob->next = word2->rebuild_word->blobs; + word2->chopped_word->blobs = NULL; + word2->rebuild_word->blobs = NULL; + // Copy the seams onto the end of the word1 seam_array. + // Since the seam list is one element short, an empty seam marking the + // end of the last blob in the first word is needed first. + word->seam_array = add_seam(word->seam_array, + new_seam(0.0, best_split_pt, NULL, NULL, NULL)); + for (int i = 0; i < array_count(word2->seam_array); ++i) { + SEAM* seam = reinterpret_cast(array_value(word2->seam_array, i)); + array_value(word2->seam_array, i) = NULL; + word->seam_array = add_seam(word->seam_array, seam); + } + word->best_state += word2->best_state; + // Append the word choices. + *word->best_choice += *word2->best_choice; + *word->raw_choice += *word2->raw_choice; + delete word2; } } // namespace tesseract - -/********************************************************************** - * call_tester - * - * Called from Tess with a blob in tess form. - * Convert the blob to editor form. - * Call the tester setup by the segmenter in tess_tester. - **********************************************************************/ -#if 0 // dead code -void call_tester( //call a tester - const STRING& filename, - TBLOB *tessblob, //blob to test - BOOL8 correct_blob, //true if good - char *text, //source text - inT32 count, //chars in text - LIST result //output of matcher - ) { - PBLOB *blob; //converted blob - BLOB_CHOICE_LIST ratings; //matcher result - - blob = make_ed_blob (tessblob);//convert blob - if (blob == NULL) - return; - //make it right type - convert_choice_list(result, ratings); - if (tess_tester != NULL) - (*tess_tester) (filename, blob, tess_denorm, correct_blob, text, count, &ratings); - delete blob; //don't need that now -} -#endif - -/********************************************************************** - * call_train_tester - * - * Called from Tess with a blob in tess form. - * Convert the blob to editor form. - * Call the trainer setup by the segmenter in tess_trainer. - **********************************************************************/ -#if 0 // dead code -void call_train_tester( //call a tester - const STRING& filename, - TBLOB *tessblob, //blob to test - BOOL8 correct_blob, //true if good - char *text, //source text - inT32 count, //chars in text - LIST result //output of matcher - ) { - PBLOB *blob; //converted blob - BLOB_CHOICE_LIST ratings; //matcher result - - blob = make_ed_blob (tessblob);//convert blob - if (blob == NULL) - return; - //make it right type - convert_choice_list(result, ratings); - if (tess_trainer != NULL) - (*tess_trainer) (filename, blob, tess_denorm, correct_blob, text, count, &ratings); - delete blob; //don't need that now -} -#endif diff --git a/ccmain/tfacepp.h b/ccmain/tfacepp.h index b72c8b7016..c9fd31a325 100644 --- a/ccmain/tfacepp.h +++ b/ccmain/tfacepp.h @@ -20,15 +20,12 @@ #ifndef TFACEPP_H #define TFACEPP_H -#include "varable.h" #include "tstruct.h" #include "ratngs.h" -#include "tessclas.h" +#include "blobs.h" #include "notdll.h" #include "tesseractclass.h" -extern BOOL_VAR_H (tessedit_override_permuter, TRUE, -"According to dict_word"); void call_tester( //call a tester TBLOB *tessblob, //blob to test BOOL8 correct_blob, //true if good diff --git a/ccmain/thresholder.cpp b/ccmain/thresholder.cpp index f647d22941..e8d9807f3c 100644 --- a/ccmain/thresholder.cpp +++ b/ccmain/thresholder.cpp @@ -230,6 +230,11 @@ void ImageThresholder::ThresholdToPix(Pix** pix) { } } +// Common initialization shared between SetImage methods. +void ImageThresholder::Init() { + SetRectangle(0, 0, image_width_, image_height_); +} + // Get a clone/copy of the source image rectangle. // The returned Pix must be pixDestroyed. // This function will be used in the future by the page layout analysis, and @@ -253,12 +258,24 @@ Pix* ImageThresholder::GetPixRect() { RawRectToPix(&raw_pix); return raw_pix; } -#endif -// Common initialization shared between SetImage methods. -void ImageThresholder::Init() { - SetRectangle(0, 0, image_width_, image_height_); +// Get a clone/copy of the source image rectangle, reduced to greyscale. +// The returned Pix must be pixDestroyed. +// This function will be used in the future by the page layout analysis, and +// the layout analysis that uses it will only be available with Leptonica, +// so there is no raw equivalent. +Pix* ImageThresholder::GetPixRectGrey() { + Pix* pix = GetPixRect(); // May have to be reduced to grey. + int depth = pixGetDepth(pix); + if (depth != 8) { + Pix* result = depth < 8 ? pixConvertTo8(pix, false) + : pixConvertRGBToLuminance(pix); + pixDestroy(&pix); + return result; + } + return pix; } +#endif // Otsu threshold the rectangle, taking everything except the image buffer // pointer from the class, to the output IMAGE. diff --git a/ccmain/thresholder.h b/ccmain/thresholder.h index 7022a46b17..7d11f64528 100644 --- a/ccmain/thresholder.h +++ b/ccmain/thresholder.h @@ -66,7 +66,7 @@ class ImageThresholder { virtual void GetImageSizes(int* left, int* top, int* width, int* height, int* imagewidth, int* imageheight); - /// Return true if HAVE_LIBLEPT and this thresholder implements the Pix + /// Return true if this thresholder implements the Pix /// interface. virtual bool HasThresholdToPix() const; @@ -75,11 +75,15 @@ class ImageThresholder { return image_bytespp_ >= 3; } + /// Returns true if the source image is binary. + bool IsBinary() const { + return image_bytespp_ == 0; + } + /// Threshold the source image as efficiently as possible to the output /// tesseract IMAGE class. virtual void ThresholdToIMAGE(IMAGE* image); -#ifdef HAVE_LIBLEPT /// Pix vs raw, which to use? /// Implementations should provide the ability to source and target Pix /// where possible. A future version of Tesseract may choose to use Pix @@ -101,7 +105,13 @@ class ImageThresholder { /// the layout analysis that uses it will only be available with Leptonica, /// so there is no raw equivalent. Pix* GetPixRect(); -#endif + + /// Get a clone/copy of the source image rectangle, reduced to greyscale. + /// The returned Pix must be pixDestroyed. + /// This function will be used in the future by the page layout analysis, and + /// the layout analysis that uses it will only be available with Leptonica, + /// so there is no raw equivalent. + Pix* GetPixRectGrey(); protected: // ---------------------------------------------------------------------- @@ -133,7 +143,6 @@ class ImageThresholder { /// output IMAGE. void CopyBinaryRectRawToIMAGE(IMAGE* image) const; -#ifdef HAVE_LIBLEPT /// Otsu threshold the rectangle, taking everything except the image buffer /// pointer from the class, to the output Pix. void OtsuThresholdRectToPix(const unsigned char* imagedata, @@ -152,14 +161,11 @@ class ImageThresholder { /// Cut out the requested rectangle of the binary image to the output IMAGE. void CopyBinaryRectPixToIMAGE(IMAGE* image) const; -#endif protected: -#ifdef HAVE_LIBLEPT /// Clone or other copy of the source Pix. /// The pix will always be PixDestroy()ed on destruction of the class. Pix* pix_; -#endif /// Exactly one of pix_ and image_data_ is not NULL. const unsigned char* image_data_; //< Raw source image. @@ -178,4 +184,3 @@ class ImageThresholder { #endif // TESSERACT_CCMAIN_THRESHOLDER_H__ - diff --git a/ccmain/tstruct.cpp b/ccmain/tstruct.cpp index 4c15ca4529..9f6074e8d2 100644 --- a/ccmain/tstruct.cpp +++ b/ccmain/tstruct.cpp @@ -18,370 +18,12 @@ **********************************************************************/ #include "mfcpch.h" - -#ifdef _MSC_VER -#pragma warning(disable:4244) // Conversion warnings -#endif - -#include "tfacep.h" -#include "tstruct.h" -#include "makerow.h" -#include "ocrblock.h" -//#include "structures.h" - -static ERRCODE BADFRAGMENTS = "Couldn't find matching fragment ends"; - -ELISTIZE (FRAGMENT) -//extern /*"C"*/ oldoutline(TESSLINE*); -/********************************************************************** - * FRAGMENT::FRAGMENT - * - * Constructor for fragments. - **********************************************************************/ -FRAGMENT::FRAGMENT ( //constructor -EDGEPT * head_pt, //start point -EDGEPT * tail_pt //end point -):head (head_pt->pos.x, head_pt->pos.y), tail (tail_pt->pos.x, -tail_pt->pos.y) { - headpt = head_pt; // save ptrs - tailpt = tail_pt; -} - -// Helper function to make a fake PBLOB formed from the bounding box -// of the given old-format outline. -static PBLOB* MakeRectBlob(TESSLINE* ol) { - POLYPT_LIST poly_list; - POLYPT_IT poly_it = &poly_list; - FCOORD pos, vec; - POLYPT *polypt; - - // Create points at each of the 4 corners of the rectangle in turn. - pos = FCOORD(ol->topleft.x, ol->topleft.y); - vec = FCOORD(0.0f, ol->botright.y - ol->topleft.y); - polypt = new POLYPT(pos, vec); - poly_it.add_after_then_move(polypt); - pos = FCOORD(ol->topleft.x, ol->botright.y); - vec = FCOORD(ol->botright.x - ol->topleft.x, 0.0f); - polypt = new POLYPT(pos, vec); - poly_it.add_after_then_move(polypt); - pos = FCOORD(ol->botright.x, ol->botright.y); - vec = FCOORD(0.0f, ol->topleft.y - ol->botright.y); - polypt = new POLYPT(pos, vec); - poly_it.add_after_then_move(polypt); - pos = FCOORD(ol->botright.x, ol->topleft.y); - vec = FCOORD(ol->topleft.x - ol->botright.x, 0.0f); - polypt = new POLYPT(pos, vec); - poly_it.add_after_then_move(polypt); - - OUTLINE_LIST out_list; - OUTLINE_IT out_it = &out_list; - out_it.add_after_then_move(new OUTLINE(&poly_it)); - return new PBLOB(&out_list); -} - -/********************************************************************** - * make_ed_word - * - * Make an editor format word from the tess style word. - **********************************************************************/ - -WERD *make_ed_word( //construct word - TWERD *tessword, //word to convert - WERD *clone //clone this one - ) { - WERD *word; //converted word - TBLOB *tblob; //current blob - PBLOB *blob; //new blob - PBLOB_LIST blobs; //list of blobs - PBLOB_IT blob_it = &blobs; //iterator - - for (tblob = tessword->blobs; tblob != NULL; tblob = tblob->next) { - blob = make_ed_blob (tblob); - if (blob == NULL && tblob->outlines != NULL) { - // Make a fake blob using the bounding box rectangle of the 1st outline. - blob = MakeRectBlob(tblob->outlines); - } - if (blob != NULL) { - blob_it.add_after_then_move (blob); - } - } - if (!blobs.empty ()) - word = new WERD (&blobs, clone); - else - word = NULL; - return word; -} - - -/********************************************************************** - * make_ed_blob - * - * Make an editor format blob from the tess style blob. - **********************************************************************/ - -PBLOB *make_ed_blob( //construct blob - TBLOB *tessblob //blob to convert - ) { - TESSLINE *tessol; //tess outline - FRAGMENT_LIST fragments; //list of fragments - OUTLINE *outline; //current outline - OUTLINE_LIST out_list; //list of outlines - OUTLINE_IT out_it = &out_list; //iterator - - for (tessol = tessblob->outlines; tessol != NULL; tessol = tessol->next) { - //stick in list - register_outline(tessol, &fragments); - } - while (!fragments.empty ()) { - outline = make_ed_outline (&fragments); - if (outline != NULL) { - out_it.add_after_then_move (outline); - } - } - if (out_it.empty()) - return NULL; //couldn't do it - return new PBLOB (&out_list); //turn to blob -} - - -/********************************************************************** - * make_ed_outline - * - * Make an editor format outline from the list of fragments. - **********************************************************************/ - -OUTLINE *make_ed_outline( //constructoutline - FRAGMENT_LIST *list //list of fragments - ) { - FRAGMENT *fragment; //current fragment - EDGEPT *edgept; //current point - ICOORD headpos; //coords of head - ICOORD tailpos; //coords of tail - FCOORD pos; //coords of edgept - FCOORD vec; //empty - POLYPT *polypt; //current point - POLYPT_LIST poly_list; //list of point - POLYPT_IT poly_it = &poly_list;//iterator - FRAGMENT_IT fragment_it = list;//fragment - - headpos = fragment_it.data ()->head; - do { - fragment = fragment_it.data (); - edgept = fragment->headpt; //start of segment - do { - pos = FCOORD (edgept->pos.x, edgept->pos.y); - vec = FCOORD (edgept->vec.x, edgept->vec.y); - polypt = new POLYPT (pos, vec); - //add to list - poly_it.add_after_then_move (polypt); - edgept = edgept->next; - } - while (edgept != fragment->tailpt); - tailpos = ICOORD (edgept->pos.x, edgept->pos.y); - //get rid of it - delete fragment_it.extract (); - if (tailpos != headpos) { - if (fragment_it.empty ()) { - return NULL; - } - fragment_it.forward (); - //find next segment - for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list () && - fragment_it.data ()->head != tailpos; - fragment_it.forward ()); - if (fragment_it.data ()->head != tailpos) { - // It is legitimate for the heads to not all match to tails, - // since not all combinations of seams always make sense. - for (fragment_it.mark_cycle_pt (); - !fragment_it.cycled_list (); fragment_it.forward ()) { - fragment = fragment_it.extract (); - delete fragment; - } - return NULL; //can't do it - } - } - } - while (tailpos != headpos); - return new OUTLINE (&poly_it); //turn to outline -} - - -/********************************************************************** - * register_outline - * - * Add the fragments in the given outline to the list - **********************************************************************/ - -void register_outline( //add fragments - TESSLINE *outline, //tess format - FRAGMENT_LIST *list //list to add to - ) { - EDGEPT *startpt; //start of outline - EDGEPT *headpt; //start of fragment - EDGEPT *tailpt; //end of fragment - FRAGMENT *fragment; //new fragment - FRAGMENT_IT it = list; //iterator - - startpt = outline->loop; - do { - startpt = startpt->next; - if (startpt == NULL) - return; //illegal! - } - while (startpt->flags[0] == 0 && startpt != outline->loop); - headpt = startpt; - do - startpt = startpt->next; - while (startpt->flags[0] != 0 && startpt != headpt); - if (startpt->flags[0] != 0) - return; //all hidden! - - headpt = startpt; - do { - tailpt = headpt; - do - tailpt = tailpt->next; - while (tailpt->flags[0] == 0 && tailpt != startpt); - fragment = new FRAGMENT (headpt, tailpt); - it.add_after_then_move (fragment); - while (tailpt->flags[0] != 0) - tailpt = tailpt->next; - headpt = tailpt; - } - while (tailpt != startpt); -} - - -/********************************************************************** - * make_tess_row - * - * Make a fake row structure to pass to the tesseract matchers. - **********************************************************************/ - -void make_tess_row( //make fake row - DENORM *denorm, //row info - TEXTROW *tessrow //output row - ) { - tessrow->baseline.segments = 1; - tessrow->baseline.xstarts[0] = -32767; - tessrow->baseline.xstarts[1] = 32767; - tessrow->baseline.quads[0].a = 0; - tessrow->baseline.quads[0].b = 0; - tessrow->baseline.quads[0].c = bln_baseline_offset; - tessrow->xheight.segments = 1; - tessrow->xheight.xstarts[0] = -32767; - tessrow->xheight.xstarts[1] = 32767; - tessrow->xheight.quads[0].a = 0; - tessrow->xheight.quads[0].b = 0; - tessrow->xheight.quads[0].c = bln_x_height + bln_baseline_offset; - tessrow->lineheight = bln_x_height; - if (denorm != NULL) { - tessrow->ascrise = denorm->row ()->ascenders () * denorm->scale (); - tessrow->descdrop = denorm->row ()->descenders () * denorm->scale (); - } else { - tessrow->ascrise = bln_baseline_offset; - tessrow->descdrop = -bln_baseline_offset; - } -} - - -/********************************************************************** - * make_tess_word - * - * Convert the word to Tess format. - **********************************************************************/ - -TWERD *make_tess_word( //convert word - WERD *word, //word to do - TEXTROW *row //fake row - ) { - TWERD *tessword; //tess format - - tessword = newword (); //use old allocator - tessword->row = row; //give them something - //copy string - tessword->correct = strsave (word->text ()); - tessword->guess = NULL; - tessword->blobs = make_tess_blobs (word->blob_list ()); - tessword->blanks = 1; - tessword->blobcount = word->blob_list ()->length (); - tessword->next = NULL; - return tessword; -} - - -/********************************************************************** - * make_tess_blobs - * - * Make Tess style blobs from a list of BLOBs. - **********************************************************************/ - -TBLOB *make_tess_blobs( //make tess blobs - PBLOB_LIST *bloblist //list to convert - ) { - PBLOB_IT it = bloblist; //iterator - PBLOB *blob; //current blob - TBLOB *head; //output list - TBLOB *tail; //end of list - TBLOB *tessblob; - - head = NULL; - tail = NULL; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - blob = it.data (); - tessblob = make_tess_blob (blob, TRUE); - if (head) - tail->next = tessblob; - else - head = tessblob; - tail = tessblob; - } - return head; -} - -/********************************************************************** - * make_rotated_tess_blob - * - * Make a single Tess style blob, applying the given rotation and - * renormalizing. - **********************************************************************/ -TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob, - BOOL8 flatten) { - if (denorm != NULL && denorm->block() != NULL && - denorm->block()->classify_rotation().y() != 0.0) { - TBOX box = blob->bounding_box(); - int src_width = box.width(); - int src_height = box.height(); - src_width = static_cast(src_width / denorm->scale() + 0.5); - src_height = static_cast(src_height / denorm->scale() + 0.5); - int x_middle = (box.left() + box.right()) / 2; - int y_middle = (box.top() + box.bottom()) / 2; - PBLOB* rotated_blob = PBLOB::deep_copy(blob); - rotated_blob->move(FCOORD(-x_middle, -y_middle)); - rotated_blob->rotate(denorm->block()->classify_rotation()); - ICOORD median_size = denorm->block()->median_size(); - int tolerance = median_size.x() / 8; - // TODO(dsl/rays) find a better normalization solution. In the mean time - // make it work for CJK by normalizing for Cap height in the same way - // as is applied in compute_block_xheight when the row is presumed to - // be ALLCAPS, i.e. the x-height is the fixed fraction - // blob height * textord_merge_x / (textord_merge_x + textord_merge_asc) - if (NearlyEqual(src_width, static_cast(median_size.x()), tolerance) && - NearlyEqual(src_height, static_cast(median_size.y()), tolerance)) { - float target_height = bln_x_height * (textord_merge_x + textord_merge_asc) - / textord_merge_x; - rotated_blob->scale(target_height / box.width()); - rotated_blob->move(FCOORD(0.0f, - bln_baseline_offset - - rotated_blob->bounding_box().bottom())); - } - TBLOB* result = make_tess_blob(rotated_blob, flatten); - delete rotated_blob; - return result; - } else { - return make_tess_blob(blob, flatten); - } -} +#include "ccstruct.h" +#include "helpers.h" +#include "tfacep.h" +#include "tstruct.h" +#include "makerow.h" +#include "ocrblock.h" /********************************************************************** * make_tess_blob @@ -389,24 +31,9 @@ TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob, * Make a single Tess style blob **********************************************************************/ -TBLOB *make_tess_blob( //make tess blob - PBLOB *blob, //blob to convert - BOOL8 flatten //flatten outline structure - ) { - inT32 index; - TBLOB *tessblob; - - tessblob = newblob (); - tessblob->outlines = (struct olinestruct *) - make_tess_outlines (blob->out_list (), flatten); - for (index = 0; index < TBLOBFLAGS; index++) - tessblob->flags[index] = 0; //!! - tessblob->correct = 0; - tessblob->guess = 0; - for (index = 0; index < MAX_WO_CLASSES; index++) { - tessblob->classes[index] = 0; - tessblob->values[index] = 0; - } +TBLOB *make_tess_blob(PBLOB *blob) { + TBLOB* tessblob = new TBLOB; + tessblob->outlines = make_tess_outlines(blob->out_list(), false); tessblob->next = NULL; return tessblob; } @@ -418,10 +45,8 @@ TBLOB *make_tess_blob( //make tess blob * Make Tess style outlines from a list of OUTLINEs. **********************************************************************/ -TESSLINE *make_tess_outlines( //make tess outlines - OUTLINE_LIST *outlinelist, //list to convert - BOOL8 flatten //flatten outline structure - ) { +TESSLINE *make_tess_outlines(OUTLINE_LIST *outlinelist, // List to convert. + bool is_holes) { // These are hole outlines. OUTLINE_IT it = outlinelist; //iterator OUTLINE *outline; //current outline TESSLINE *head; //output list @@ -430,31 +55,21 @@ TESSLINE *make_tess_outlines( //make tess outlines head = NULL; tail = NULL; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - outline = it.data (); - tessoutline = newoutline (); - tessoutline->compactloop = NULL; - tessoutline->loop = make_tess_edgepts (outline->polypts (), - tessoutline->topleft, - tessoutline->botright); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + outline = it.data(); + tessoutline = new TESSLINE; + tessoutline->loop = make_tess_edgepts(outline->polypts(), + tessoutline->topleft, + tessoutline->botright); if (tessoutline->loop == NULL) { - oldoutline(tessoutline); + delete tessoutline; continue; } tessoutline->start = tessoutline->loop->pos; - tessoutline->node = NULL; tessoutline->next = NULL; - tessoutline->child = NULL; - if (!outline->child ()->empty ()) { - if (flatten) - tessoutline->next = (struct olinestruct *) - make_tess_outlines (outline->child (), flatten); - else { - tessoutline->next = NULL; - tessoutline->child = (struct olinestruct *) - make_tess_outlines (outline->child (), flatten); - } - } + tessoutline->is_hole = is_holes; + if (!outline->child()->empty()) + tessoutline->next = make_tess_outlines(outline->child(), true); else tessoutline->next = NULL; if (head) @@ -492,22 +107,17 @@ EDGEPT *make_tess_edgepts( //make tess edgepts tl.y = -MAX_INT16; br.x = -MAX_INT16; br.y = MAX_INT16; - for (it.mark_cycle_pt (); !it.cycled_list ();) { - edgept = it.data (); - tessedgept = newedgept (); - tessedgept->pos.x = (inT16) edgept->pos.x (); - tessedgept->pos.y = (inT16) edgept->pos.y (); - if (tessedgept->pos.x < tl.x) - tl.x = tessedgept->pos.x; - if (tessedgept->pos.x > br.x) - br.x = tessedgept->pos.x; - if (tessedgept->pos.y > tl.y) - tl.y = tessedgept->pos.y; - if (tessedgept->pos.y < br.y) - br.y = tessedgept->pos.y; - if (head != NULL && tessedgept->pos.x == tail->pos.x - && tessedgept->pos.y == tail->pos.y) { - oldedgept(tessedgept); + for (it.mark_cycle_pt(); !it.cycled_list ();) { + edgept = it.data(); + tessedgept = new EDGEPT; + tessedgept->pos.x = (inT16) edgept->pos.x(); + tessedgept->pos.y = (inT16) edgept->pos.y(); + UpdateRange(tessedgept->pos.x, &tl.x, &br.x); + UpdateRange(tessedgept->pos.y, &br.y, &tl.y); + if (head != NULL && + tessedgept->pos.x == tail->pos.x && + tessedgept->pos.y == tail->pos.y) { + delete tessedgept; } else { for (index = 0; index < EDGEPTFLAGS; index++) @@ -530,7 +140,7 @@ EDGEPT *make_tess_edgepts( //make tess edgepts tail->vec.x = head->pos.x - tail->pos.x; tail->vec.y = head->pos.y - tail->pos.y; if (head == tail) { - oldedgept(head); + delete head; return NULL; //empty } return head; diff --git a/ccmain/tstruct.h b/ccmain/tstruct.h index ccacaa86f3..204f94c53c 100644 --- a/ccmain/tstruct.h +++ b/ccmain/tstruct.h @@ -21,62 +21,13 @@ #define TSTRUCT_H #include "werd.h" -#include "tessclas.h" +#include "blobs.h" #include "ratngs.h" #include "notdll.h" -class FRAGMENT:public ELIST_LINK -{ - public: - FRAGMENT() { //constructor - } - FRAGMENT(EDGEPT *head_pt, //start - EDGEPT *tail_pt); //end - - ICOORD head; //coords of start - ICOORD tail; //coords of end - EDGEPT *headpt; //start point - EDGEPT *tailpt; //end point - - NEWDELETE2 (FRAGMENT) -}; - -ELISTIZEH (FRAGMENT) -WERD *make_ed_word( //construct word - TWERD *tessword, //word to convert - WERD *clone //clone this one - ); -PBLOB *make_ed_blob( //construct blob - TBLOB *tessblob //blob to convert - ); -OUTLINE *make_ed_outline( //constructoutline - FRAGMENT_LIST *list //list of fragments - ); -void register_outline( //add fragments - TESSLINE *outline, //tess format - FRAGMENT_LIST *list //list to add to - ); -void make_tess_row( //make fake row - DENORM *denorm, //row info - TEXTROW *tessrow //output row - ); -TWERD *make_tess_word( //convert owrd - WERD *word, //word to do - TEXTROW *row //fake row - ); -TBLOB *make_tess_blobs( //make tess blobs - PBLOB_LIST *bloblist //list to convert - ); -TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob, - BOOL8 flatten); -TBLOB *make_tess_blob( //make tess blob - PBLOB *blob, //blob to convert - BOOL8 flatten //flatten outline structure - ); -TESSLINE *make_tess_outlines( //make tess outlines - OUTLINE_LIST *outlinelist, //list to convert - BOOL8 flatten //flatten outline structure - ); +TBLOB *make_tess_blob(PBLOB *blob); +TESSLINE *make_tess_outlines(OUTLINE_LIST *outlinelist, // List to convert + bool is_holes); // These are hole outlines. EDGEPT *make_tess_edgepts( //make tess edgepts POLYPT_LIST *edgeptlist, //list to convert TPOINT &tl, //bounding box diff --git a/ccmain/varabled.h b/ccmain/varabled.h deleted file mode 100644 index 0a5a034900..0000000000 --- a/ccmain/varabled.h +++ /dev/null @@ -1,139 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: varabled.cpp -// Description: Variables Editor -// Author: Joern Wanke -// Created: Wed Jul 18 10:05:01 PDT 2007 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -/** - * @file varabled.h - * The variables editor is used to edit all the variables used within - * tesseract from the ui. - */ -#ifndef GRAPHICS_DISABLED -#ifndef VARABLED_H -#define VARABLED_H - -#include "elst.h" -#include "scrollview.h" -#include "varable.h" -#include "tesseractclass.h" - -class SVMenuNode; - -/** A list of all possible variable types used. */ -enum VarType { - VT_INTEGER, - VT_BOOLEAN, - VT_STRING, - VT_DOUBLE -}; - -/** - * A rather hackish helper structure which can take any kind of variable input - * (defined by VarType) and do a couple of common operations on them, like - * comparisond or getting its value. It is used in the context of the - * VariablesEditor as a bridge from the internal tesseract variables to the - * ones displayed by the ScrollView server. - */ -class VariableContent : public ELIST_LINK { - public: - /** Compare two VC objects by their name. */ - static int Compare(const void* v1, const void* v2); - - /** Gets a VC object identified by its ID. */ - static VariableContent* GetVariableContentById(int id); - - /** Constructors for the various VarTypes. */ - VariableContent() { - } - VariableContent(STRING_VARIABLE* it); - VariableContent(INT_VARIABLE* it); - VariableContent(BOOL_VARIABLE* it); - VariableContent(double_VARIABLE* it); - - - /** Getters and Setters. */ - void SetValue(const char* val); - const char* GetValue() const; - const char* GetName() const; - const char* GetDescription() const; - - int GetId() { return my_id_; } - bool HasChanged() { return changed_; } - - private: - /** The unique ID of this VC object. */ - int my_id_; - /** Whether the variable was changed_ and thus needs to be rewritten. */ - bool changed_; - /** The actual vartype of this VC object. */ - VarType var_type_; - - STRING_VARIABLE* sIt; - INT_VARIABLE* iIt; - BOOL_VARIABLE* bIt; - double_VARIABLE* dIt; -}; - -ELISTIZEH(VariableContent) - -/** - * The variables editor enables the user to edit all the variables used within - * tesseract. It can be invoked on its own, but is supposed to be invoked by - * the program editor. - */ -class VariablesEditor : public SVEventHandler { - public: - /** - * Integrate the variables editor as popupmenu into the existing scrollview - * window (usually the pg editor). If sv == null, create a new empty - * empty window and attach the variables editor to that window (ugly). - */ - VariablesEditor(const tesseract::Tesseract*, ScrollView* sv = NULL); - - /** Event listener. Waits for SVET_POPUP events and processes them. */ - void Notify(const SVEvent* sve); - - private: - /** - * Gets the up to the first 3 prefixes from s (split by _). - * For example, tesseract_foo_bar will be split into tesseract, foo, and bar. - */ - void GetPrefixes(const char* s, STRING* level_one, - STRING* level_two, STRING* level_three); - - /** - * Gets the first n words (split by _) and puts them in t. - * For example, tesseract_foo_bar with N=2 will yield tesseract_foo_. - */ - void GetFirstWords(const char *s, // source string - int n, // number of words - char *t); // target string - - /** - * Find all editable variables used within tesseract and create a - * SVMenuNode tree from it. - */ - SVMenuNode *BuildListOfAllLeaves(); - - /** Write all (changed_) variables to a config file. */ - void WriteVars(char* filename, bool changes_only); - - ScrollView* sv_window_; -}; - -#endif -#endif diff --git a/ccmain/werdit.cpp b/ccmain/werdit.cpp index e6fd01518a..34e0581760 100644 --- a/ccmain/werdit.cpp +++ b/ccmain/werdit.cpp @@ -1,3 +1,4 @@ + /********************************************************************** * File: werdit.cpp (Formerly wordit.c) * Description: An iterator for passing over all the words in a document. @@ -18,99 +19,7 @@ **********************************************************************/ #include "mfcpch.h" -#include "werdit.h" - -#define EXTERN - -//EXTERN BOOL_VAR(wordit_linearc,FALSE,"Pass poly of linearc to Tess"); - -/********************************************************************** - * WERDIT::start_page - * - * Get ready to iterate over the page by setting the iterators. - **********************************************************************/ - -void WERDIT::start_page( //set iterators - BLOCK_LIST *block_list //blocks to check - ) { - block_it.set_to_list (block_list); - block_it.mark_cycle_pt (); - do { - while (block_it.data ()->row_list ()->empty () - && !block_it.cycled_list ()) { - block_it.forward (); - } - if (!block_it.data ()->row_list ()->empty ()) { - row_it.set_to_list (block_it.data ()->row_list ()); - row_it.mark_cycle_pt (); - while (row_it.data ()->word_list ()->empty () - && !row_it.cycled_list ()) { - row_it.forward (); - } - if (!row_it.data ()->word_list ()->empty ()) { - word_it.set_to_list (row_it.data ()->word_list ()); - word_it.mark_cycle_pt (); - } - } - } - while (!block_it.cycled_list () && row_it.data ()->word_list ()->empty ()); -} - - -/********************************************************************** - * WERDIT::forward - * - * Give the next word on the page, or NULL if none left. - * This code assumes all rows to be non-empty, but blocks are allowed - * to be empty as eventually we will have non-text blocks. - * The output is always a copy and needs to be deleted by somebody. - **********************************************************************/ - -WERD *WERDIT::forward() { //use iterators - WERD *word; //actual word - // WERD *larc_word; //linearc copy - WERD *result; //output word - ROW *row; //row of word - - if (word_it.cycled_list ()) { - return NULL; //finished page - } - else { - word = word_it.data (); - row = row_it.data (); - word_it.forward (); - if (word_it.cycled_list ()) { - row_it.forward (); //finished row - if (row_it.cycled_list ()) { - do { - block_it.forward (); //finished block - if (!block_it.cycled_list ()) { - row_it.set_to_list (block_it.data ()->row_list ()); - row_it.mark_cycle_pt (); - } - } - //find non-empty block - while (!block_it.cycled_list () - && row_it.cycled_list ()); - } - if (!row_it.cycled_list ()) { - word_it.set_to_list (row_it.data ()->word_list ()); - word_it.mark_cycle_pt (); - } - } - - // if (wordit_linearc && !word->flag(W_POLYGON)) - // { - // larc_word=word->larc_copy(row->x_height()); - // result=larc_word->poly_copy(row->x_height()); - // delete larc_word; - // } - // else - result = word->poly_copy (row->x_height ()); - return result; - } -} - +#include "werdit.h" /********************************************************************** * make_pseudo_word @@ -119,74 +28,33 @@ WERD *WERDIT::forward() { //use iterators * The word is always a copy and needs to be deleted. **********************************************************************/ -WERD *make_pseudo_word( //make fake word - BLOCK_LIST *block_list, //blocks to check //block of selection +WERD *make_pseudo_word(PAGE_RES* page_res, // Blocks to check. TBOX &selection_box, BLOCK *&pseudo_block, - ROW *&pseudo_row //row of selection - ) { - BLOCK_IT block_it(block_list); - BLOCK *block; - ROW_IT row_it; - ROW *row; - WERD_IT word_it; - WERD *word; - PBLOB_IT blob_it; - PBLOB *blob; - PBLOB_LIST new_blobs; //list of gathered blobs - //iterator - PBLOB_IT new_blob_it = &new_blobs; - WERD *pseudo_word; //fabricated word - WERD *poly_word; //poly copy of word - // WERD *larc_word; //linearc copy - - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - block = block_it.data (); - if (block->bounding_box ().overlap (selection_box)) { - pseudo_block = block; - row_it.set_to_list (block->row_list ()); - for (row_it.mark_cycle_pt (); - !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - if (row->bounding_box ().overlap (selection_box)) { - word_it.set_to_list (row->word_list ()); - for (word_it.mark_cycle_pt (); - !word_it.cycled_list (); word_it.forward ()) { - word = word_it.data (); - if (word->bounding_box ().overlap (selection_box)) { - // if (wordit_linearc && !word->flag(W_POLYGON)) - // { - // larc_word=word->larc_copy(row->x_height()); - // poly_word=larc_word->poly_copy(row->x_height()); - // delete larc_word; - // } - // else - poly_word = word->poly_copy (row->x_height ()); - blob_it.set_to_list (poly_word->blob_list ()); - for (blob_it.mark_cycle_pt (); - !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - if (blob->bounding_box (). - overlap (selection_box)) { - new_blob_it.add_after_then_move (blob_it. - extract - ()); - //steal off list - pseudo_row = row; - } - } - delete poly_word; //get rid of it - } - } + ROW *&pseudo_row) { // Row of selection. + PAGE_RES_IT pr_it(page_res); + C_BLOB_LIST new_blobs; // list of gathered blobs + C_BLOB_IT new_blob_it = &new_blobs; // iterator + WERD *pseudo_word; // fabricated word + + for (WERD_RES* word_res = pr_it.word(); word_res != NULL; + word_res = pr_it.forward()) { + WERD* word = word_res->word; + if (word->bounding_box().overlap(selection_box)) { + C_BLOB_IT blob_it(word->cblob_list()); + for (blob_it.mark_cycle_pt(); + !blob_it.cycled_list(); blob_it.forward()) { + C_BLOB* blob = blob_it.data(); + if (blob->bounding_box().overlap(selection_box)) { + new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob)); + pseudo_row = pr_it.row()->row; + pseudo_block = pr_it.block()->block; } } } } - if (!new_blobs.empty ()) { - //make new word - pseudo_word = new WERD (&new_blobs, 1, NULL); - } + if (!new_blobs.empty()) + pseudo_word = new WERD(&new_blobs, 1, NULL); else pseudo_word = NULL; return pseudo_word; diff --git a/ccmain/werdit.h b/ccmain/werdit.h index e509685fdd..b3f6181d6e 100644 --- a/ccmain/werdit.h +++ b/ccmain/werdit.h @@ -20,48 +20,12 @@ #ifndef WERDIT_H #define WERDIT_H -#include "varable.h" -#include "ocrblock.h" +#include "pageres.h" #include "notdll.h" -class WERDIT -{ - public: - WERDIT() { - } //empty contructor - WERDIT( //empty contructor - BLOCK_LIST *blocklist) { //blocks on page - start_page(blocklist); //ready to scan - } - - void start_page( //get ready - BLOCK_LIST *blocklist); //blocks on page - - WERD *forward(); //get next word - WERD *next_word() { //get next word - return word_it.data (); //already at next - } - ROW *row() { //get current row - return word_it.cycled_list ()? NULL : row_it.data (); - } - ROW *next_row() { //get next row - return row_it.data_relative (1); - } - BLOCK *block() { //get current block - return block_it.data (); - } - - private: - BLOCK_IT block_it; //iterators - ROW_IT row_it; - WERD_IT word_it; -}; - -//extern BOOL_VAR_H(wordit_linearc,FALSE,"Pass poly of linearc to Tess"); -WERD *make_pseudo_word( //make fake word - BLOCK_LIST *block_list, //blocks to check //block of selection +WERD *make_pseudo_word(PAGE_RES* page_res, // blocks to check TBOX &selection_box, BLOCK *&pseudo_block, - ROW *&pseudo_row //row of selection - ); + ROW *&pseudo_row); + #endif diff --git a/ccstruct/Makefile.am b/ccstruct/Makefile.am index 49ba051e74..5b6ef71de4 100644 --- a/ccstruct/Makefile.am +++ b/ccstruct/Makefile.am @@ -3,28 +3,26 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/ccutil -I$(top_srcdir)/cutil \ -I$(top_srcdir)/image -I$(top_srcdir)/viewer -EXTRA_DIST = ccstruct.vcproj - include_HEADERS = \ - blckerr.h blobbox.h blobs.h blread.h ccstruct.h coutln.h crakedge.h \ - detlinefit.h genblob.h hpddef.h hpdsizes.h ipoints.h \ - labls.h linlsq.h lmedsq.h mod128.h normalis.h \ + blckerr.h blobbox.h blobs.h blread.h boxword.h ccstruct.h coutln.h crakedge.h \ + detlinefit.h dppoint.h genblob.h hpddef.h hpdsizes.h ipoints.h \ + linlsq.h matrix.h mod128.h normalis.h \ ocrblock.h ocrrow.h otsuthr.h \ pageres.h pdblock.h points.h polyaprx.h polyblk.h \ - polyblob.h polyvert.h poutline.h \ + polyblob.h polyvert.h poutline.h publictypes.h \ quadlsq.h quadratc.h quspline.h ratngs.h rect.h rejctmap.h \ - statistc.h stepblob.h vecfuncs.h werd.h + seam.h split.h statistc.h stepblob.h vecfuncs.h werd.h lib_LTLIBRARIES = libtesseract_ccstruct.la libtesseract_ccstruct_la_SOURCES = \ - blobbox.cpp blobs.cpp blread.cpp callcpp.cpp ccstruct.cpp coutln.cpp \ - detlinefit.cpp genblob.cpp \ - labls.cpp linlsq.cpp lmedsq.cpp mod128.cpp normalis.cpp \ + blobbox.cpp blobs.cpp blread.cpp boxword.cpp callcpp.cpp ccstruct.cpp coutln.cpp \ + detlinefit.cpp dppoint.cpp genblob.cpp \ + linlsq.cpp matrix.cpp mod128.cpp normalis.cpp \ ocrblock.cpp ocrrow.cpp otsuthr.cpp \ pageres.cpp pdblock.cpp points.cpp polyaprx.cpp polyblk.cpp \ - polyblob.cpp polyvert.cpp poutline.cpp \ + polyblob.cpp polyvert.cpp poutline.cpp publictypes.cpp \ quadlsq.cpp quadratc.cpp quspline.cpp ratngs.cpp rect.cpp rejctmap.cpp \ - statistc.cpp stepblob.cpp \ + seam.cpp split.cpp statistc.cpp stepblob.cpp \ vecfuncs.cpp werd.cpp libtesseract_ccstruct_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) diff --git a/ccstruct/Makefile.in b/ccstruct/Makefile.in index d101e209a3..6cb1deda83 100644 --- a/ccstruct/Makefile.in +++ b/ccstruct/Makefile.in @@ -72,12 +72,13 @@ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) libtesseract_ccstruct_la_LIBADD = am_libtesseract_ccstruct_la_OBJECTS = blobbox.lo blobs.lo blread.lo \ - callcpp.lo ccstruct.lo coutln.lo detlinefit.lo genblob.lo \ - labls.lo linlsq.lo lmedsq.lo mod128.lo normalis.lo ocrblock.lo \ - ocrrow.lo otsuthr.lo pageres.lo pdblock.lo points.lo \ - polyaprx.lo polyblk.lo polyblob.lo polyvert.lo poutline.lo \ - quadlsq.lo quadratc.lo quspline.lo ratngs.lo rect.lo \ - rejctmap.lo statistc.lo stepblob.lo vecfuncs.lo werd.lo + boxword.lo callcpp.lo ccstruct.lo coutln.lo detlinefit.lo \ + dppoint.lo genblob.lo linlsq.lo matrix.lo mod128.lo \ + normalis.lo ocrblock.lo ocrrow.lo otsuthr.lo pageres.lo \ + pdblock.lo points.lo polyaprx.lo polyblk.lo polyblob.lo \ + polyvert.lo poutline.lo publictypes.lo quadlsq.lo quadratc.lo \ + quspline.lo ratngs.lo rect.lo rejctmap.lo seam.lo split.lo \ + statistc.lo stepblob.lo vecfuncs.lo werd.lo libtesseract_ccstruct_la_OBJECTS = \ $(am_libtesseract_ccstruct_la_OBJECTS) libtesseract_ccstruct_la_LINK = $(LIBTOOL) --tag=CXX \ @@ -252,7 +253,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -273,27 +273,26 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/ccutil -I$(top_srcdir)/cutil \ -I$(top_srcdir)/image -I$(top_srcdir)/viewer -EXTRA_DIST = ccstruct.vcproj include_HEADERS = \ - blckerr.h blobbox.h blobs.h blread.h ccstruct.h coutln.h crakedge.h \ - detlinefit.h genblob.h hpddef.h hpdsizes.h ipoints.h \ - labls.h linlsq.h lmedsq.h mod128.h normalis.h \ + blckerr.h blobbox.h blobs.h blread.h boxword.h ccstruct.h coutln.h crakedge.h \ + detlinefit.h dppoint.h genblob.h hpddef.h hpdsizes.h ipoints.h \ + linlsq.h matrix.h mod128.h normalis.h \ ocrblock.h ocrrow.h otsuthr.h \ pageres.h pdblock.h points.h polyaprx.h polyblk.h \ - polyblob.h polyvert.h poutline.h \ + polyblob.h polyvert.h poutline.h publictypes.h \ quadlsq.h quadratc.h quspline.h ratngs.h rect.h rejctmap.h \ - statistc.h stepblob.h vecfuncs.h werd.h + seam.h split.h statistc.h stepblob.h vecfuncs.h werd.h lib_LTLIBRARIES = libtesseract_ccstruct.la libtesseract_ccstruct_la_SOURCES = \ - blobbox.cpp blobs.cpp blread.cpp callcpp.cpp ccstruct.cpp coutln.cpp \ - detlinefit.cpp genblob.cpp \ - labls.cpp linlsq.cpp lmedsq.cpp mod128.cpp normalis.cpp \ + blobbox.cpp blobs.cpp blread.cpp boxword.cpp callcpp.cpp ccstruct.cpp coutln.cpp \ + detlinefit.cpp dppoint.cpp genblob.cpp \ + linlsq.cpp matrix.cpp mod128.cpp normalis.cpp \ ocrblock.cpp ocrrow.cpp otsuthr.cpp \ pageres.cpp pdblock.cpp points.cpp polyaprx.cpp polyblk.cpp \ - polyblob.cpp polyvert.cpp poutline.cpp \ + polyblob.cpp polyvert.cpp poutline.cpp publictypes.cpp \ quadlsq.cpp quadratc.cpp quspline.cpp ratngs.cpp rect.cpp rejctmap.cpp \ - statistc.cpp stepblob.cpp \ + seam.cpp split.cpp statistc.cpp stepblob.cpp \ vecfuncs.cpp werd.cpp libtesseract_ccstruct_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) @@ -374,14 +373,15 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blobbox.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blobs.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blread.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/boxword.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callcpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccstruct.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coutln.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/detlinefit.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dppoint.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genblob.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/labls.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/linlsq.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lmedsq.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrix.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mod128.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/normalis.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ocrblock.Plo@am__quote@ @@ -395,12 +395,15 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/polyblob.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/polyvert.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poutline.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/publictypes.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/quadlsq.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/quadratc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/quspline.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ratngs.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rect.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rejctmap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/seam.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/split.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/statistc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stepblob.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vecfuncs.Plo@am__quote@ diff --git a/ccstruct/blobbox.cpp b/ccstruct/blobbox.cpp index 1872cb0221..f8dff991ab 100644 --- a/ccstruct/blobbox.cpp +++ b/ccstruct/blobbox.cpp @@ -18,17 +18,45 @@ **********************************************************************/ #include "mfcpch.h" -#include "blobbox.h" +#include "blobbox.h" +#include "helpers.h" #define PROJECTION_MARGIN 10 //arbitrary #define EXTERN -EXTERN double_VAR (textord_error_weight, 3, -"Weighting for error in believability"); -EXTERN BOOL_VAR (pitsync_projection_fix, TRUE, -"Fix bug in projection profile"); - ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK) + +// Upto 30 degrees is allowed for rotations of diacritic blobs. +const double kCosSmallAngle = 0.866; +// Min aspect ratio for a joined word to indicate an obvious flow direction. +const double kDefiniteAspectRatio = 2.0; +// Multiple of short length in perimeter to make a joined word. +const double kComplexShapePerimeterRatio = 1.5; + +void BLOBNBOX::rotate(FCOORD rotation) { + cblob_ptr->rotate(rotation); + rotate_box(rotation); + compute_bounding_box(); +} + +// Rotate the box by the angle given by rotation. +// If the blob is a diacritic, then only small rotations for skew +// correction can be applied. +void BLOBNBOX::rotate_box(FCOORD rotation) { + if (IsDiacritic()) { + ASSERT_HOST(rotation.x() >= kCosSmallAngle) + ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_); + ICOORD bottom_pt(top_pt.x(), base_char_bottom_); + top_pt.rotate(rotation); + base_char_top_ = top_pt.y(); + bottom_pt.rotate(rotation); + base_char_bottom_ = bottom_pt.y(); + box.rotate(rotation); + } else { + box.rotate(rotation); + set_diacritic_box(box); + } +} /********************************************************************** * BLOBNBOX::merge * @@ -38,10 +66,22 @@ void BLOBNBOX::merge( //merge blobs BLOBNBOX *nextblob //blob to join with ) { box += nextblob->box; //merge boxes + set_diacritic_box(box); nextblob->joined = TRUE; } +// Merge this with other, taking the outlines from other. +// Other is not deleted, but left for the caller to handle. +void BLOBNBOX::really_merge(BLOBNBOX* other) { + if (cblob_ptr != NULL && other->cblob_ptr != NULL) { + C_OUTLINE_IT ol_it(cblob_ptr->out_list()); + ol_it.add_list_after(other->cblob_ptr->out_list()); + } + compute_bounding_box(); +} + + /********************************************************************** * BLOBNBOX::chop * @@ -88,10 +128,7 @@ void BLOBNBOX::chop( //chop blobs rightx, /*rotation, */ test_ymin, test_ymax); blob_it.forward (); - if (test_ymin < ymin) - ymin = test_ymin; - if (test_ymax > ymax) - ymax = test_ymax; + UpdateRange(test_ymin, test_ymax, &ymin, &ymax); } while (blob != end_it->data ()); if (ymin < ymax) { @@ -107,6 +144,8 @@ void BLOBNBOX::chop( //chop blobs //box is all it has newblob->box = TBOX (bl, tr); //stay on current + newblob->base_char_top_ = tr.y(); + newblob->base_char_bottom_ = bl.y(); end_it->add_after_stay_put (newblob); } } @@ -114,6 +153,201 @@ void BLOBNBOX::chop( //chop blobs } } +// Returns the box gaps between this and its neighbours_ in an array +// indexed by BlobNeighbourDir. +void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const { + for (int dir = 0; dir < BND_COUNT; ++dir) { + gaps[dir] = MAX_INT16; + BLOBNBOX* neighbour = neighbours_[dir]; + if (neighbour != NULL) { + TBOX n_box = neighbour->bounding_box(); + if (dir == BND_LEFT || dir == BND_RIGHT) { + gaps[dir] = box.x_gap(n_box); + } else { + gaps[dir] = box.y_gap(n_box); + } + } + } +} +// Returns the min and max horizontal and vertical gaps (from NeighbourGaps) +// modified so that if the max exceeds the max dimension of the blob, and +// the min is less, the max is replaced with the min. +// The objective is to catch cases where there is only a single neighbour +// and avoid reporting the other gap as a ridiculously large number +void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max, + int* v_min, int* v_max) const { + int max_dimension = MAX(box.width(), box.height()); + int gaps[BND_COUNT]; + NeighbourGaps(gaps); + *h_min = MIN(gaps[BND_LEFT], gaps[BND_RIGHT]); + *h_max = MAX(gaps[BND_LEFT], gaps[BND_RIGHT]); + if (*h_max > max_dimension && *h_min < max_dimension) *h_max = *h_min; + *v_min = MIN(gaps[BND_ABOVE], gaps[BND_BELOW]); + *v_max = MAX(gaps[BND_ABOVE], gaps[BND_BELOW]); + if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min; +} + +// Returns positive if there is at least one side neighbour that has a similar +// stroke width and is not on the other side of a rule line. +int BLOBNBOX::GoodTextBlob() const { + int score = 0; + for (int dir = 0; dir < BND_COUNT; ++dir) { + BlobNeighbourDir bnd = static_cast(dir); + if (good_stroke_neighbour(bnd)) + ++score; + } + return score; +} + +// Returns true, and sets vert_possible/horz_possible if the blob has some +// feature that makes it individually appear to flow one way. +// eg if it has a high aspect ratio, yet has a complex shape, such as a +// joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc. +bool BLOBNBOX::DefiniteIndividualFlow() { + int box_perimeter = 2 * (box.height() + box.width()); + if (box.width() > box.height() * kDefiniteAspectRatio) { + // Attempt to distinguish a wide joined word from a dash. + // If it is a dash, then its perimeter is approximately + // 2 * (box width + stroke width), but more if the outline is noisy, + // so perimeter - 2*(box width + stroke width) should be close to zero. + // A complex shape such as a joined word should have a much larger value. + int perimeter = cblob()->perimeter(); + if (vert_stroke_width() > 0) + perimeter -= 2 * vert_stroke_width(); + else + perimeter -= 4 * cblob()->area() / perimeter; + perimeter -= 2 * box.width(); + // Use a multiple of the box perimeter as a threshold. + if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { + set_vert_possible(false); + set_horz_possible(true); + return true; + } + } + if (box.height() > box.width() * kDefiniteAspectRatio) { + // As above, but for a putative vertical word vs a I/1/l. + int perimeter = cblob()->perimeter(); + if (horz_stroke_width() > 0) + perimeter -= 2 * horz_stroke_width(); + else + perimeter -= 4 * cblob()->area() / perimeter; + perimeter -= 2 * box.height(); + if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { + set_vert_possible(true); + set_horz_possible(false); + return true; + } + } + return false; +} + +// Returns true if there is no tabstop violation in merging this and other. +bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX& other) const { + if (box.left() < other.box.left() && box.left() < other.left_rule_) + return false; + if (other.box.left() < box.left() && other.box.left() < left_rule_) + return false; + if (box.right() > other.box.right() && box.right() > other.right_rule_) + return false; + if (other.box.right() > box.right() && other.box.right() > right_rule_) + return false; + return true; +} + +// Returns true if other has a similar stroke width to this. +bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other, + double fractional_tolerance, + double constant_tolerance) const { + // The perimeter-based width is used as a backup in case there is + // no information in the blob. + double p_width = area_stroke_width(); + double n_p_width = other.area_stroke_width(); + float h_tolerance = horz_stroke_width_ * fractional_tolerance + + constant_tolerance; + float v_tolerance = vert_stroke_width_ * fractional_tolerance + + constant_tolerance; + double p_tolerance = p_width * fractional_tolerance + + constant_tolerance; + bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f; + bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f; + bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, + other.horz_stroke_width_, h_tolerance); + bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_, + other.vert_stroke_width_, v_tolerance); + bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance); + // For a match, at least one of the horizontal and vertical widths + // must match, and the other one must either match or be zero. + // Only if both are zero will we look at the perimeter metric. + return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero)); +} + +// Returns a bounding box of the outline contained within the +// given horizontal range. +TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) { + FCOORD no_rotation(1.0f, 0.0f); + float top, bottom; + if (cblob_ptr != NULL) { + find_cblob_limits(cblob_ptr, static_cast(left), + static_cast(right), no_rotation, + bottom, top); + } else { + find_blob_limits(blob_ptr, static_cast(left), + static_cast(right), no_rotation, + bottom, top); + } + + if (top < bottom) { + top = box.top(); + bottom = box.bottom(); + } + FCOORD bot_left(left, bottom); + FCOORD top_right(right, top); + TBOX shrunken_box(bot_left); + TBOX shrunken_box2(top_right); + shrunken_box += shrunken_box2; + return shrunken_box; +} + +#ifndef GRAPHICS_DISABLED +ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, + BlobTextFlowType flow_type) { + switch (region_type) { + case BRT_HLINE: + return ScrollView::BROWN; + case BRT_VLINE: + return ScrollView::DARK_GREEN; + case BRT_RECTIMAGE: + return ScrollView::RED; + case BRT_POLYIMAGE: + return ScrollView::ORANGE; + case BRT_UNKNOWN: + return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE; + case BRT_VERT_TEXT: + if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) + return ScrollView::GREEN; + if (flow_type == BTFT_CHAIN) + return ScrollView::LIME_GREEN; + return ScrollView::YELLOW; + case BRT_TEXT: + if (flow_type == BTFT_STRONG_CHAIN) + return ScrollView::BLUE; + if (flow_type == BTFT_TEXT_ON_IMAGE) + return ScrollView::LIGHT_BLUE; + if (flow_type == BTFT_CHAIN) + return ScrollView::MEDIUM_BLUE; + if (flow_type == BTFT_LEADER) + return ScrollView::WHEAT; + return ScrollView::MAGENTA; + default: + return ScrollView::GREY; + } +} + +// Keep in sync with BlobRegionType. +ScrollView::Color BLOBNBOX::BoxColor() const { + return TextlineColor(region_type_, flow_); +} +#endif /********************************************************************** * find_blob_limits @@ -152,26 +386,15 @@ void find_blob_limits( //get y limits if ((pos.x () < leftx && pos.x () + vec.x () > leftx) || (pos.x () > leftx && pos.x () + vec.x () < leftx)) { testy = pos.y () + vec.y () * (leftx - pos.x ()) / vec.x (); - //intercept of boundary - if (testy < ymin) - ymin = testy; - if (testy > ymax) - ymax = testy; + UpdateRange(testy, &ymin, &ymax); } if (pos.x () >= leftx && pos.x () <= rightx) { - if (pos.y () > ymax) - ymax = pos.y (); - if (pos.y () < ymin) - ymin = pos.y (); + UpdateRange(pos.y(), &ymin, &ymax); } if ((pos.x () > rightx && pos.x () + vec.x () < rightx) || (pos.x () < rightx && pos.x () + vec.x () > rightx)) { testy = pos.y () + vec.y () * (rightx - pos.x ()) / vec.x (); - //intercept of boundary - if (testy < ymin) - ymin = testy; - if (testy > ymax) - ymax = testy; + UpdateRange(testy, &ymin, &ymax); } } } @@ -208,10 +431,7 @@ void find_cblob_limits( //get y limits for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { //inside if (pos.x () >= leftx && pos.x () <= rightx) { - if (pos.y () > ymax) - ymax = pos.y (); - if (pos.y () < ymin) - ymin = pos.y (); + UpdateRange(pos.y(), &ymin, &ymax); } vec = outline->step (stepindex); vec.rotate (rotation); @@ -249,10 +469,7 @@ void find_cblob_vlimits( //get y limits for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { //inside if (pos.x () >= leftx && pos.x () <= rightx) { - if (pos.y () > ymax) - ymax = pos.y (); - if (pos.y () < ymin) - ymin = pos.y (); + UpdateRange(pos.y(), &ymin, &ymax); } vec = outline->step (stepindex); pos += vec; //move to next @@ -289,10 +506,7 @@ void find_cblob_hlimits( //get x limits for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { //inside if (pos.y () >= bottomy && pos.y () <= topy) { - if (pos.x () > xmax) - xmax = pos.x (); - if (pos.x () < xmin) - xmin = pos.x (); + UpdateRange(pos.x(), &xmin, &xmax); } vec = outline->step (stepindex); pos += vec; //move to next @@ -351,7 +565,7 @@ PBLOB *rotate_cblob( //rotate it OUTLINE_IT out_it; POLYPT_IT poly_it; //outline pts - copy = new PBLOB (blob, xheight); + copy = new PBLOB (blob); out_it.set_to_list (copy->out_list ()); for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { //get points @@ -458,7 +672,12 @@ BLOBNBOX * blob, //first blob float top, //corrected top float bottom, //of row float row_size //ideal -): y_min(bottom), y_max(top), initial_y_min(bottom), num_repeated_sets_(-1) { +) { + clear(); + y_min = bottom; + y_max = top; + initial_y_min = bottom; + float diff; //in size BLOBNBOX_IT it = &blobs; //list of blobs @@ -572,6 +791,46 @@ void TO_ROW::compute_vertical_projection() { //project whole row } +/********************************************************************** + * TO_ROW::clear + * + * Zero out all scalar members. + **********************************************************************/ +void TO_ROW::clear() { + all_caps = 0; + used_dm_model = 0; + projection_left = 0; + projection_right = 0; + pitch_decision = PITCH_DUNNO; + fixed_pitch = 0.0; + fp_space = 0.0; + fp_nonsp = 0.0; + pr_space = 0.0; + pr_nonsp = 0.0; + spacing = 0.0; + xheight = 0.0; + xheight_evidence = 0; + ascrise = 0.0; + descdrop = 0.0; + min_space = 0; + max_nonspace = 0; + space_threshold = 0; + kern_size = 0.0; + space_size = 0.0; + y_min = 0.0; + y_max = 0.0; + initial_y_min = 0.0; + m = 0.0; + c = 0.0; + error = 0.0; + para_c = 0.0; + para_error = 0.0; + y_origin = 0.0; + credibility = 0.0; + num_repeated_sets_ = -1; +} + + /********************************************************************** * vertical_blob_projection * @@ -722,16 +981,9 @@ void vertical_coutline_projection( //project outlines for (stepindex = 0; stepindex < length; stepindex++) { step = outline->step (stepindex); if (step.x () > 0) { - if (pitsync_projection_fix) - stats->add (pos.x (), -pos.y ()); - else - stats->add (pos.x (), pos.y ()); - } - else if (step.x () < 0) { - if (pitsync_projection_fix) - stats->add (pos.x () - 1, pos.y ()); - else - stats->add (pos.x () - 1, -pos.y ()); + stats->add (pos.x (), -pos.y ()); + } else if (step.x () < 0) { + stats->add (pos.x () - 1, pos.y ()); } pos += step; } @@ -751,6 +1003,7 @@ void vertical_coutline_projection( //project outlines TO_BLOCK::TO_BLOCK( //make a block BLOCK *src_block //real block ) { + clear(); block = src_block; } @@ -767,6 +1020,32 @@ static void clear_blobnboxes(BLOBNBOX_LIST* boxes) { } } +/********************************************************************** + * TO_BLOCK::clear + * + * Zero out all scalar members. + **********************************************************************/ +void TO_BLOCK::clear() { + block = NULL; + pitch_decision = PITCH_DUNNO; + line_spacing = 0.0; + line_size = 0.0; + max_blob_size = 0.0; + baseline_offset = 0.0; + xheight = 0.0; + fixed_pitch = 0.0; + kern_size = 0.0; + space_size = 0.0; + min_space = 0; + max_nonspace = 0; + fp_space = 0.0; + fp_nonsp = 0.0; + pr_space = 0.0; + pr_nonsp = 0.0; + key_row = NULL; +} + + TO_BLOCK::~TO_BLOCK() { // Any residual BLOBNBOXes at this stage own their blobs, so delete them. clear_blobnboxes(&blobs); @@ -802,6 +1081,4 @@ void plot_blob_list(ScrollView* win, // window to draw in it.data()->plot(win, body_colour, child_colour); } } - #endif //GRAPHICS_DISABLED - diff --git a/ccstruct/blobbox.h b/ccstruct/blobbox.h index 79c2cc3013..63056af308 100644 --- a/ccstruct/blobbox.h +++ b/ccstruct/blobbox.h @@ -20,16 +20,12 @@ #ifndef BLOBBOX_H #define BLOBBOX_H -#include "varable.h" #include "clst.h" #include "elst2.h" #include "werd.h" #include "ocrblock.h" #include "statistc.h" -extern double_VAR_H (textord_error_weight, 3, -"Weighting for error in believability"); - enum PITCH_TYPE { PITCH_DUNNO, //insufficient data @@ -53,10 +49,12 @@ enum TabType { // The possible region types of a BLOBNBOX. // Note: keep all the text types > BRT_UNKNOWN and all the image types less. -// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor below. +// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the +// *Type static functions below. enum BlobRegionType { BRT_NOISE, // Neither text nor image. BRT_HLINE, // Horizontal separator line. + BRT_VLINE, // Vertical separator line. BRT_RECTIMAGE, // Rectangular image. BRT_POLYIMAGE, // Non-rectangular image. BRT_UNKNOWN, // Not determined yet. @@ -66,6 +64,46 @@ enum BlobRegionType { BRT_COUNT // Number of possibilities. }; +// enum for elements of arrays that refer to neighbours. +enum BlobNeighbourDir { + BND_LEFT, + BND_BELOW, + BND_RIGHT, + BND_ABOVE, + BND_COUNT +}; + +// BlobTextFlowType indicates the quality of neighbouring information +// related to a chain of connected components, either horizontally or +// vertically. Also used by ColPartition for the collection of blobs +// within, which should all have the same value in most cases. +enum BlobTextFlowType { + BTFT_NONE, // No text flow set yet. + BTFT_NONTEXT, // Flow too poor to be likely text. + BTFT_NEIGHBOURS, // Neighbours support flow in this direction. + BTFT_CHAIN, // There is a weak chain of text in this direction. + BTFT_STRONG_CHAIN, // There is a strong chain of text in this direction. + BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image. + BTFT_LEADER, // Leader dots/dashes etc. + BTFT_COUNT +}; + +// Returns true if type1 dominates type2 in a merge. Mostly determined by the +// ordering of the enum, but NONTEXT dominates everything else, and LEADER +// dominates nothing. +// The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that +// this cannot be true if t1 == t2, so the result is undefined. +inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) { + // NONTEXT dominates everything. + if (type1 == BTFT_NONTEXT) return true; + if (type2 == BTFT_NONTEXT) return false; + // LEADER always loses. + if (type1 == BTFT_LEADER) return false; + if (type2 == BTFT_LEADER) return true; + // With those out of the way, the ordering of the enum determines the result. + return type1 >= type2; +} + namespace tesseract { class ColPartition; } @@ -76,46 +114,84 @@ class BLOBNBOX:public ELIST_LINK { public: BLOBNBOX() { - blob_ptr = NULL; - cblob_ptr = NULL; - area = 0; - Init(); + ConstructionInit(); } explicit BLOBNBOX(PBLOB *srcblob) { + box = srcblob->bounding_box(); + ConstructionInit(); blob_ptr = srcblob; - cblob_ptr = NULL; - box = srcblob->bounding_box (); - area = (int) srcblob->area (); - Init(); + area = static_cast(srcblob->area()); } explicit BLOBNBOX(C_BLOB *srcblob) { - blob_ptr = NULL; + box = srcblob->bounding_box(); + ConstructionInit(); cblob_ptr = srcblob; - box = srcblob->bounding_box (); - area = (int) srcblob->area (); - Init(); + area = static_cast(srcblob->area()); } - - void rotate_box(FCOORD vec) { - box.rotate(vec); + static BLOBNBOX* RealBlob(C_OUTLINE* outline) { + C_BLOB* blob = new C_BLOB(outline); + return new BLOBNBOX(blob); } + + void rotate_box(FCOORD rotation); + void rotate(FCOORD rotation); void translate_box(ICOORD v) { - box.move(v); + if (IsDiacritic()) { + box.move(v); + base_char_top_ += v.y(); + base_char_bottom_ += v.y(); + } else { + box.move(v); + set_diacritic_box(box); + } } void merge(BLOBNBOX *nextblob); + void really_merge(BLOBNBOX* other); void chop( // fake chop blob BLOBNBOX_IT *start_it, // location of this BLOBNBOX_IT *blob_it, // iterator FCOORD rotation, // for landscape float xheight); // line height + void NeighbourGaps(int gaps[BND_COUNT]) const; + void MinMaxGapsClipped(int* h_min, int* h_max, + int* v_min, int* v_max) const; + int GoodTextBlob() const; + + // Returns true, and sets vert_possible/horz_possible if the blob has some + // feature that makes it individually appear to flow one way. + // eg if it has a high aspect ratio, yet has a complex shape, such as a + // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1. + bool DefiniteIndividualFlow(); + + // Returns true if there is no tabstop violation in merging this and other. + bool ConfirmNoTabViolation(const BLOBNBOX& other) const; + + // Returns true if other has a similar stroke width to this. + bool MatchingStrokeWidth(const BLOBNBOX& other, + double fractional_tolerance, + double constant_tolerance) const; + + // Returns a bounding box of the outline contained within the + // given horizontal range. + TBOX BoundsWithinLimits(int left, int right); + // Simple accessors. const TBOX& bounding_box() const { return box; } + // Set the bounding box. Use with caution. + // Normally use compute_bounding_box instead. + void set_bounding_box(const TBOX& new_box) { + box = new_box; + base_char_top_ = box.top(); + base_char_bottom_ = box.bottom(); + } void compute_bounding_box() { box = cblob_ptr != NULL ? cblob_ptr->bounding_box() : blob_ptr->bounding_box(); + base_char_top_ = box.top(); + base_char_bottom_ = box.bottom(); } const TBOX& reduced_box() const { return red_box; @@ -163,6 +239,24 @@ class BLOBNBOX:public ELIST_LINK void set_region_type(BlobRegionType new_type) { region_type_ = new_type; } + BlobTextFlowType flow() const { + return flow_; + } + void set_flow(BlobTextFlowType value) { + flow_ = value; + } + bool vert_possible() const { + return vert_possible_; + } + void set_vert_possible(bool value) { + vert_possible_ = value; + } + bool horz_possible() const { + return horz_possible_; + } + void set_horz_possible(bool value) { + horz_possible_ = value; + } int left_rule() const { return left_rule_; } @@ -199,40 +293,80 @@ class BLOBNBOX:public ELIST_LINK void set_vert_stroke_width(float width) { vert_stroke_width_ = width; } + float area_stroke_width() const { + return area_stroke_width_; + } tesseract::ColPartition* owner() const { return owner_; } void set_owner(tesseract::ColPartition* new_owner) { owner_ = new_owner; } - void set_noise_flag(bool flag) { - noise_flag_ = flag; + bool leader_on_left() const { + return leader_on_left_; + } + void set_leader_on_left(bool flag) { + leader_on_left_ = flag; + } + bool leader_on_right() const { + return leader_on_right_; + } + void set_leader_on_right(bool flag) { + leader_on_right_ = flag; + } + BLOBNBOX* neighbour(BlobNeighbourDir n) const { + return neighbours_[n]; + } + bool good_stroke_neighbour(BlobNeighbourDir n) const { + return good_stroke_neighbours_[n]; + } + void set_neighbour(BlobNeighbourDir n, BLOBNBOX* neighbour, bool good) { + neighbours_[n] = neighbour; + good_stroke_neighbours_[n] = good; } - bool noise_flag() const { - return noise_flag_; + bool IsDiacritic() const { + return base_char_top_ != box.top() || base_char_bottom_ != box.bottom(); + } + int base_char_top() const { + return base_char_top_; + } + int base_char_bottom() const { + return base_char_bottom_; + } + void set_diacritic_box(const TBOX& diacritic_box) { + base_char_top_ = diacritic_box.top(); + base_char_bottom_ = diacritic_box.bottom(); + } + bool UniquelyVertical() const { + return vert_possible_ && !horz_possible_; + } + bool UniquelyHorizontal() const { + return horz_possible_ && !vert_possible_; + } + + // Returns true if the region type is text. + static bool IsTextType(BlobRegionType type) { + return type == BRT_TEXT || type == BRT_VERT_TEXT; + } + // Returns true if the region type is image. + static bool IsImageType(BlobRegionType type) { + return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE; + } + // Returns true if the region type is line. + static bool IsLineType(BlobRegionType type) { + return type == BRT_HLINE || type == BRT_VLINE; + } + // Returns true if the region type cannot be merged. + static bool UnMergeableType(BlobRegionType type) { + return IsLineType(type) || IsImageType(type); } + static ScrollView::Color TextlineColor(BlobRegionType region_type, + BlobTextFlowType flow_type); + #ifndef GRAPHICS_DISABLED // Keep in sync with BlobRegionType. - ScrollView::Color BoxColor() const { - switch (region_type_) { - case BRT_HLINE: - return ScrollView::YELLOW; - case BRT_RECTIMAGE: - return ScrollView::RED; - case BRT_POLYIMAGE: - return ScrollView::ORANGE; - case BRT_UNKNOWN: - return ScrollView::CYAN; - case BRT_VERT_TEXT: - return ScrollView::GREEN; - case BRT_TEXT: - return ScrollView::BLUE; - case BRT_NOISE: - default: - return ScrollView::GREY; - } - } + ScrollView::Color BoxColor() const; void plot(ScrollView* window, // window to draw in ScrollView::Color blob_colour, // for outer bits @@ -244,27 +378,53 @@ class BLOBNBOX:public ELIST_LINK } #endif - NEWDELETE2(BLOBNBOX) + NEWDELETE2(BLOBNBOX) - private: - // Initializes the bulk of the members to default values. - void Init() { + // Initializes the bulk of the members to default values for use at + // construction time. + void ConstructionInit() { + blob_ptr = NULL; + cblob_ptr = NULL; + area = 0; + area_stroke_width_ = 0.0f; + horz_stroke_width_ = 0.0f; + vert_stroke_width_ = 0.0f; + ReInit(); + } + // Initializes members set by StrokeWidth and beyond, without discarding + // stored area and strokewidth values, which are expensive to calculate. + void ReInit() { joined = false; reduced = false; repeated_set_ = 0; left_tab_type_ = TT_NONE; right_tab_type_ = TT_NONE; region_type_ = BRT_UNKNOWN; + flow_ = BTFT_NONE; left_rule_ = 0; right_rule_ = 0; left_crossing_rule_ = 0; right_crossing_rule_ = 0; - horz_stroke_width_ = 0.0f; - vert_stroke_width_ = 0.0f; + if (area_stroke_width_ == 0.0f && area > 0 && cblob() != NULL) + area_stroke_width_ = 2.0f * area / cblob()->perimeter(); owner_ = NULL; - noise_flag_ = false; + base_char_top_ = box.top(); + base_char_bottom_ = box.bottom(); + horz_possible_ = false; + vert_possible_ = false; + leader_on_left_ = false; + leader_on_right_ = false; + ClearNeighbours(); + } + + void ClearNeighbours() { + for (int n = 0; n < BND_COUNT; ++n) { + neighbours_[n] = NULL; + good_stroke_neighbours_[n] = false; + } } + private: PBLOB *blob_ptr; // poly blob C_BLOB *cblob_ptr; // edgestep blob TBOX box; // bounding box @@ -276,22 +436,32 @@ class BLOBNBOX:public ELIST_LINK TabType left_tab_type_; // Indicates tab-stop assessment TabType right_tab_type_; // Indicates tab-stop assessment BlobRegionType region_type_; // Type of region this blob belongs to + BlobTextFlowType flow_; // Quality of text flow. inT16 left_rule_; // x-coord of nearest but not crossing rule line inT16 right_rule_; // x-coord of nearest but not crossing rule line inT16 left_crossing_rule_; // x-coord of nearest or crossing rule line inT16 right_crossing_rule_; // x-coord of nearest or crossing rule line + inT16 base_char_top_; // y-coord of top/bottom of diacritic base, + inT16 base_char_bottom_; // if it exists else top/bottom of this blob. float horz_stroke_width_; // Median horizontal stroke width float vert_stroke_width_; // Median vertical stroke width + float area_stroke_width_; // Stroke width from area/perimeter ratio. tesseract::ColPartition* owner_; // Who will delete me when I am not needed - // Was the blob flagged as noise in the initial filtering step - bool noise_flag_; + BLOBNBOX* neighbours_[BND_COUNT]; + bool good_stroke_neighbours_[BND_COUNT]; + bool horz_possible_; // Could be part of horizontal flow. + bool vert_possible_; // Could be part of vertical flow. + bool leader_on_left_; // There is a leader to the left. + bool leader_on_right_; // There is a leader to the right. }; -class TO_ROW:public ELIST2_LINK +class TO_ROW: public ELIST2_LINK { public: + static const int kErrorWeight = 3; + TO_ROW() { - num_repeated_sets_ = -1; + clear(); } //empty TO_ROW( //constructor BLOBNBOX *blob, //from first blob @@ -359,7 +529,7 @@ class TO_ROW:public ELIST2_LINK para_c = new_c; para_error = new_error; credibility = - (float) (blobs.length () - textord_error_weight * new_error); + (float) (blobs.length () - kErrorWeight * new_error); y_origin = (float) (new_c / sqrt (1 + gradient * gradient)); //real intercept } @@ -413,6 +583,8 @@ class TO_ROW:public ELIST2_LINK STATS projection; // vertical projection private: + void clear(); // clear all values to reasonable defaults + BLOBNBOX_LIST blobs; //blobs in row float y_min; //coords float y_max; @@ -432,16 +604,45 @@ ELIST2IZEH (TO_ROW) class TO_BLOCK:public ELIST_LINK { public: - TO_BLOCK() { + TO_BLOCK() : pitch_decision(PITCH_DUNNO) { + clear(); } //empty TO_BLOCK( //constructor BLOCK *src_block); //real block ~TO_BLOCK(); + void clear(); // clear all scalar members. + TO_ROW_LIST *get_rows() { //access function return &row_list; } + // Rotate all the blobnbox lists and the underlying block. Then update the + // median size statistic from the blobs list. + void rotate(const FCOORD& rotation) { + BLOBNBOX_LIST* blobnbox_list[] = {&blobs, &underlines, &noise_blobs, + &small_blobs, &large_blobs, NULL}; + for (BLOBNBOX_LIST** list = blobnbox_list; *list != NULL; ++list) { + BLOBNBOX_IT it(*list); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->rotate(rotation); + } + } + // Rotate the block + ASSERT_HOST(block->poly_block() != NULL); + block->rotate(rotation); + // Update the median size statistic from the blobs list. + STATS widths(0, block->bounding_box().width()); + STATS heights(0, block->bounding_box().height()); + BLOBNBOX_IT blob_it(&blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + widths.add(blob_it.data()->bounding_box().width(), 1); + heights.add(blob_it.data()->bounding_box().height(), 1); + } + block->set_median_size(static_cast(widths.median() + 0.5), + static_cast(heights.median() + 0.5)); + } + void print_rows() { //debug info TO_ROW_IT row_it = &row_list; TO_ROW *row; @@ -468,6 +669,11 @@ class TO_BLOCK:public ELIST_LINK BLOCK *block; //real block PITCH_TYPE pitch_decision; //how strong is decision float line_spacing; //estimate + // line_size is a lower-bound estimate of the font size in pixels of + // the text in the block (with ascenders and descenders), being a small + // (1.25) multiple of the median height of filtered blobs. + // In most cases the font size will be bigger, but it will be closer + // if the text is allcaps, or in a no-x-height script. float line_size; //estimate float max_blob_size; //line assignment limit float baseline_offset; //phase shift diff --git a/ccstruct/blobs.cpp b/ccstruct/blobs.cpp index 73fd09e11b..420e4e4d61 100644 --- a/ccstruct/blobs.cpp +++ b/ccstruct/blobs.cpp @@ -30,11 +30,434 @@ #include "blobs.h" #include "cutil.h" #include "emalloc.h" +#include "helpers.h" +#include "ndminx.h" +#include "normalis.h" +#include "ocrrow.h" +#include "points.h" +#include "polyaprx.h" #include "structures.h" +#include "werd.h" + +// A Vector representing the "vertical" direction when measuring the +// divisiblity of blobs into multiple blobs just by separating outlines. +// See divisible_blob below for the use. +const TPOINT kDivisibleVerticalUpright = {0, 1}; +// A vector representing the "vertical" direction for italic text for use +// when separating outlines. Using it actually deteriorates final accuracy, +// so it is only used for ApplyBoxes chopping to get a better segmentation. +const TPOINT kDivisibleVerticalItalic = {1, 5}; /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ +// Consume the circular list of EDGEPTs to make a TESSLINE. +TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) { + TESSLINE* result = new TESSLINE; + result->loop = outline; + result->SetupFromPos(); + return result; +} + +// Copies the data and the outline, but leaves next untouched. +void TESSLINE::CopyFrom(const TESSLINE& src) { + Clear(); + topleft = src.topleft; + botright = src.botright; + start = src.start; + is_hole = src.is_hole; + if (src.loop != NULL) { + EDGEPT* prevpt = NULL; + EDGEPT* newpt = NULL; + EDGEPT* srcpt = src.loop; + do { + newpt = new EDGEPT(*srcpt); + if (prevpt == NULL) { + loop = newpt; + } else { + newpt->prev = prevpt; + prevpt->next = newpt; + } + prevpt = newpt; + srcpt = srcpt->next; + } while (srcpt != src.loop); + loop->prev = newpt; + newpt->next = loop; + } +} + +// Deletes owned data. +void TESSLINE::Clear() { + if (loop == NULL) + return; + + EDGEPT* this_edge = loop; + do { + EDGEPT* next_edge = this_edge->next; + delete this_edge; + this_edge = next_edge; + } while (this_edge != loop); + loop = NULL; +} + +// Rotates by the given rotation in place. +void TESSLINE::Rotate(const FCOORD rot) { + EDGEPT* pt = loop; + do { + int tmp = static_cast(floor(pt->pos.x * rot.x() - + pt->pos.y * rot.y() + 0.5)); + pt->pos.y = static_cast(floor(pt->pos.y * rot.x() + + pt->pos.x * rot.y() + 0.5)); + pt->pos.x = tmp; + pt = pt->next; + } while (pt != loop); + SetupFromPos(); +} + +// Moves by the given vec in place. +void TESSLINE::Move(const ICOORD vec) { + EDGEPT* pt = loop; + do { + pt->pos.x += vec.x(); + pt->pos.y += vec.y(); + pt = pt->next; + } while (pt != loop); + SetupFromPos(); +} + +// Scales by the given factor in place. +void TESSLINE::Scale(float factor) { + EDGEPT* pt = loop; + do { + pt->pos.x = static_cast(floor(pt->pos.x * factor + 0.5)); + pt->pos.y = static_cast(floor(pt->pos.y * factor + 0.5)); + pt = pt->next; + } while (pt != loop); + SetupFromPos(); +} + +// Sets up the start and vec members of the loop from the pos members. +void TESSLINE::SetupFromPos() { + EDGEPT* pt = loop; + do { + pt->vec.x = pt->next->pos.x - pt->pos.x; + pt->vec.y = pt->next->pos.y - pt->pos.y; + pt = pt->next; + } while (pt != loop); + start = pt->pos; + ComputeBoundingBox(); +} + +// Recomputes the bounding box from the points in the loop. +void TESSLINE::ComputeBoundingBox() { + int minx = MAX_INT32; + int miny = MAX_INT32; + int maxx = -MAX_INT32; + int maxy = -MAX_INT32; + + // Find boundaries. + start = loop->pos; + EDGEPT* this_edge = loop; + do { + if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { + if (this_edge->pos.x < minx) + minx = this_edge->pos.x; + if (this_edge->pos.y < miny) + miny = this_edge->pos.y; + if (this_edge->pos.x > maxx) + maxx = this_edge->pos.x; + if (this_edge->pos.y > maxy) + maxy = this_edge->pos.y; + } + this_edge = this_edge->next; + } while (this_edge != loop); + // Reset bounds. + topleft.x = minx; + topleft.y = maxy; + botright.x = maxx; + botright.y = miny; +} + +// Computes the min and max cross product of the outline points with the +// given vec and returns the results in min_xp and max_xp. Geometrically +// this is the left and right edge of the outline perpendicular to the +// given direction, but to get the distance units correct, you would +// have to divide by the modulus of vec. +void TESSLINE::MinMaxCrossProduct(const TPOINT vec, + int* min_xp, int* max_xp) const { + *min_xp = MAX_INT32; + *max_xp = MIN_INT32; + EDGEPT* this_edge = loop; + do { + if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { + int product = CROSS(this_edge->pos, vec); + UpdateRange(product, min_xp, max_xp); + } + this_edge = this_edge->next; + } while (this_edge != loop); +} + +TBOX TESSLINE::bounding_box() const { + return TBOX(topleft.x, botright.y, botright.x, topleft.y); +} + +void TESSLINE::plot(ScrollView* window, ScrollView::Color color, + ScrollView::Color child_color) { + if (is_hole) + window->Pen(child_color); + else + window->Pen(color); + window->SetCursor(start.x, start.y); + EDGEPT* pt = loop; + do { + bool prev_hidden = pt->IsHidden(); + pt = pt->next; + if (prev_hidden) + window->SetCursor(pt->pos.x, pt->pos.y); + else + window->DrawTo(pt->pos.x, pt->pos.y); + } while (pt != loop); +} + +// Iterate the given list of outlines, converting to TESSLINE by polygonal +// approximation and recursively any children, returning the current tail +// of the resulting list of TESSLINEs. +static TESSLINE** ApproximateOutlineList(C_OUTLINE_LIST* outlines, + bool children, + TESSLINE** tail) { + C_OUTLINE_IT ol_it(outlines); + for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { + C_OUTLINE* outline = ol_it.data(); + TESSLINE* tessline = ApproximateOutline(outline); + tessline->is_hole = children; + *tail = tessline; + tail = &tessline->next; + if (!outline->child()->empty()) { + tail = ApproximateOutlineList(outline->child(), true, tail); + } + } + return tail; +} + +// Factory to build a TBLOB from a C_BLOB with polygonal +// approximation along the way. +TBLOB* TBLOB::PolygonalCopy(C_BLOB* src) { + C_OUTLINE_IT ol_it = src->out_list(); + TBLOB* tblob = new TBLOB; + ApproximateOutlineList(src->out_list(), false, &tblob->outlines); + return tblob; +} + +// Copies the data and the outline, but leaves next untouched. +void TBLOB::CopyFrom(const TBLOB& src) { + Clear(); + TESSLINE* prev_outline = NULL; + for (TESSLINE* srcline = src.outlines; srcline != NULL; + srcline = srcline->next) { + TESSLINE* new_outline = new TESSLINE(*srcline); + if (outlines == NULL) + outlines = new_outline; + else + prev_outline->next = new_outline; + prev_outline = new_outline; + } +} + +// Deletes owned data. +void TBLOB::Clear() { + for (TESSLINE* next_outline = NULL; outlines != NULL; + outlines = next_outline) { + next_outline = outlines->next; + delete outlines; + } +} + +// Rotates by the given rotation in place. +void TBLOB::Rotate(const FCOORD rotation) { + for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { + outline->Rotate(rotation); + } +} + +// Moves by the given vec in place. +void TBLOB::Move(const ICOORD vec) { + for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { + outline->Move(vec); + } +} + +// Scales by the given factor in place. +void TBLOB::Scale(float factor) { + for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { + outline->Scale(factor); + } +} + +// Recomputes the bounding boxes of the outlines. +void TBLOB::ComputeBoundingBoxes() { + for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { + outline->ComputeBoundingBox(); + } +} + +// Returns the number of outlines. +int TBLOB::NumOutlines() const { + int result = 0; + for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) + ++result; + return result; +} + +TBOX TBLOB::bounding_box() const { + TPOINT topleft; + TPOINT botright; + blob_bounding_box(this, &topleft, &botright); + TBOX box(topleft.x, botright.y, botright.x, topleft.y); + return box; +} + +void TBLOB::plot(ScrollView* window, ScrollView::Color color, + ScrollView::Color child_color) { + for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) + outline->plot(window, color, child_color); +} + +// Factory to build a TWERD from a (C_BLOB) WERD, with polygonal +// approximation along the way. +TWERD* TWERD::PolygonalCopy(WERD* src) { + TWERD* tessword = new TWERD; + tessword->latin_script = src->flag(W_SCRIPT_IS_LATIN); + C_BLOB_IT b_it(src->cblob_list()); + TBLOB *tail = NULL; + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + C_BLOB* blob = b_it.data(); + TBLOB* tblob = TBLOB::PolygonalCopy(blob); + if (tail == NULL) { + tessword->blobs = tblob; + } else { + tail->next = tblob; + } + tail = tblob; + } + return tessword; +} + +// Normalize in-place and record the normalization in the DENORM. +void TWERD::Normalize(ROW* row, float x_height, bool numeric_mode, + DENORM* denorm) { + TBOX word_box = bounding_box(); + DENORM antidote((word_box.left() + word_box.right()) / 2.0, + kBlnXHeight / x_height, row); + if (row == NULL) { + antidote = DENORM(antidote.origin(), antidote.scale(), 0.0, + word_box.bottom(), 0, NULL, false, NULL); + } + int num_segments = 0; + DENORM_SEG *segs = new DENORM_SEG[NumBlobs()]; + for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { + TBOX blob_box = blob->bounding_box(); + ICOORD translation(-static_cast(floor(antidote.origin() + 0.5)), + -blob_box.bottom()); + float factor = antidote.scale(); + if (numeric_mode) { + factor = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), + factor, factor * 1.5f); + segs[num_segments].xstart = blob->bounding_box().left(); + segs[num_segments].ycoord = blob_box.bottom(); + segs[num_segments++].scale_factor = factor; + } else { + float blob_x_center = (blob_box.left() + blob_box.right()) / 2.0; + float y_shift = antidote.yshift_at_orig_x(blob_x_center); + translation.set_y(-static_cast(floor(y_shift + 0.5))); + } + blob->Move(translation); + blob->Scale(factor); + blob->Move(ICOORD(0, kBlnBaselineOffset)); + } + if (num_segments > 0) { + antidote.set_segments(segs, num_segments); + } + delete [] segs; + if (denorm != NULL) + *denorm = antidote; +} + +// Copies the data and the blobs, but leaves next untouched. +void TWERD::CopyFrom(const TWERD& src) { + Clear(); + latin_script = src.latin_script; + TBLOB* prev_blob = NULL; + for (TBLOB* srcblob = src.blobs; srcblob != NULL; srcblob = srcblob->next) { + TBLOB* new_blob = new TBLOB(*srcblob); + if (blobs == NULL) + blobs = new_blob; + else + prev_blob->next = new_blob; + prev_blob = new_blob; + } +} + +// Deletes owned data. +void TWERD::Clear() { + for (TBLOB* next_blob = NULL; blobs != NULL; blobs = next_blob) { + next_blob = blobs->next; + delete blobs; + } +} + +// Recomputes the bounding boxes of the blobs. +void TWERD::ComputeBoundingBoxes() { + for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { + blob->ComputeBoundingBoxes(); + } +} + +TBOX TWERD::bounding_box() const { + TBOX result; + for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { + TBOX box = blob->bounding_box(); + result += box; + } + return result; +} + +// Merges the blobs from start to end, not including end, and deletes +// the blobs between start and end. +void TWERD::MergeBlobs(int start, int end) { + TBLOB* blob = blobs; + for (int i = 0; i < start && blob != NULL; ++i) + blob = blob->next; + if (blob == NULL || blob->next == NULL) + return; + TBLOB* next_blob = blob->next; + TESSLINE* outline = blob->outlines; + for (int i = start + 1; i < end && next_blob != NULL; ++i) { + // Take the outlines from the next blob. + if (outline == NULL) { + blob->outlines = next_blob->outlines; + outline = blob->outlines; + } else { + while (outline->next != NULL) + outline = outline->next; + outline->next = next_blob->outlines; + next_blob->outlines = NULL; + } + // Delete the next blob and move on. + TBLOB* dead_blob = next_blob; + next_blob = next_blob->next; + blob->next = next_blob; + delete dead_blob; + } +} + +void TWERD::plot(ScrollView* window) { + ScrollView::Color color = WERD::NextColor(ScrollView::BLACK); + for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { + blob->plot(window, color, ScrollView::BROWN); + color = WERD::NextColor(color); + } +} + /********************************************************************** * blob_origin * @@ -61,32 +484,23 @@ void blob_origin(TBLOB *blob, /*blob to compute on */ * max coordinate value of the bounding boxes of all the top-level * outlines in the box. **********************************************************************/ -void blob_bounding_box(TBLOB *blob, /*blob to compute on */ - register TPOINT *topleft, /*bounding box */ - register TPOINT *botright) { - register TESSLINE *outline; /*current outline */ +void blob_bounding_box(const TBLOB *blob, // blob to compute on. + TPOINT *topleft, // bounding box. + TPOINT *botright) { + register TESSLINE *outline; // Current outline. if (blob == NULL || blob->outlines == NULL) { topleft->x = topleft->y = 0; - *botright = *topleft; /*default value */ - } - else { + *botright = *topleft; // Default value. + } else { outline = blob->outlines; *topleft = outline->topleft; *botright = outline->botright; for (outline = outline->next; outline != NULL; outline = outline->next) { - if (outline->topleft.x < topleft->x) - /*find extremes */ - topleft->x = outline->topleft.x; - if (outline->botright.x > botright->x) - /*find extremes */ - botright->x = outline->botright.x; - if (outline->topleft.y > topleft->y) - /*find extremes */ - topleft->y = outline->topleft.y; - if (outline->botright.y < botright->y) - /*find extremes */ - botright->y = outline->botright.y; + UpdateRange(outline->topleft.x, outline->botright.x, + &topleft->x, &botright->x); + UpdateRange(outline->botright.y, outline->topleft.y, + &botright->y, &topleft->y); } } } @@ -100,11 +514,10 @@ void blob_bounding_box(TBLOB *blob, /*blob to compute on */ void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright) { TPOINT tl; TPOINT br; - TBLOB *blob; /* Start with first blob */ blob_bounding_box(blobs, topleft, botright); - iterate_blobs(blob, blobs) { + for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { blob_bounding_box(blob, &tl, &br); if (tl.x < topleft->x) @@ -148,7 +561,6 @@ WIDTH_RECORD *blobs_widths(TBLOB *blobs) { /*blob to compute on */ WIDTH_RECORD *width_record; TPOINT topleft; /*bounding box */ TPOINT botright; - TBLOB *blob; /*blob to compute on */ int i = 0; int blob_end; int num_blobs = count_blobs (blobs); @@ -162,7 +574,7 @@ WIDTH_RECORD *blobs_widths(TBLOB *blobs) { /*blob to compute on */ /* First width */ blob_end = botright.x; - iterate_blobs (blob, blobs->next) { + for (TBLOB* blob = blobs->next; blob != NULL; blob = blob->next) { blob_bounding_box(blob, &topleft, &botright); width_record->widths[i++] = topleft.x - blob_end; width_record->widths[i++] = botright.x - topleft.x; @@ -178,70 +590,102 @@ WIDTH_RECORD *blobs_widths(TBLOB *blobs) { /*blob to compute on */ * Return a count of the number of blobs attached to this one. **********************************************************************/ int count_blobs(TBLOB *blobs) { - TBLOB *b; int x = 0; - iterate_blobs (b, blobs) x++; - return (x); + for (TBLOB* b = blobs; b != NULL; b = b->next) + x++; + return x; } - /********************************************************************** - * delete_word + * divisible_blob * - * Reclaim the memory taken by this word structure and all of its - * lower level structures. + * Returns true if the blob contains multiple outlines than can be + * separated using divide_blobs. Sets the location to be used in the + * call to divide_blobs. **********************************************************************/ -void delete_word(TWERD *word) { - TBLOB *blob; - TBLOB *nextblob; - TESSLINE *outline; - TESSLINE *nextoutline; - TESSLINE *child; - TESSLINE *nextchild; - - for (blob = word->blobs; blob; blob = nextblob) { - nextblob = blob->next; - - for (outline = blob->outlines; outline; outline = nextoutline) { - nextoutline = outline->next; - - delete_edgepts (outline->loop); - - for (child = outline->child; child; child = nextchild) { - nextchild = child->next; - - delete_edgepts (child->loop); - - oldoutline(child); +bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location) { + if (blob->outlines == NULL || blob->outlines->next == NULL) + return false; // Need at least 2 outlines for it to be possible. + int max_gap = 0; + TPOINT vertical = italic_blob ? kDivisibleVerticalItalic + : kDivisibleVerticalUpright; + for (TESSLINE* outline1 = blob->outlines; outline1 != NULL; + outline1 = outline1->next) { + if (outline1->is_hole) + continue; // Holes do not count as separable. + TPOINT mid_pt1 = {(outline1->topleft.x + outline1->botright.x) / 2, + (outline1->topleft.y + outline1->botright.y) / 2}; + int mid_prod1 = CROSS(mid_pt1, vertical); + int min_prod1, max_prod1; + outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1); + for (TESSLINE* outline2 = outline1->next; outline2 != NULL; + outline2 = outline2->next) { + if (outline2->is_hole) + continue; // Holes do not count as separable. + TPOINT mid_pt2 = { (outline2->topleft.x + outline2->botright.x) / 2, + (outline2->topleft.y + outline2->botright.y) / 2}; + int mid_prod2 = CROSS(mid_pt2, vertical); + int min_prod2, max_prod2; + outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2); + int mid_gap = abs(mid_prod2 - mid_prod1); + int overlap = MIN(max_prod1, max_prod2) - MAX(min_prod1, min_prod2); + if (mid_gap - overlap / 2 > max_gap) { + max_gap = mid_gap - overlap / 2; + *location = mid_pt1; + *location += mid_pt2; + *location /= 2; } - oldoutline(outline); } - oldblob(blob); } - if (word->correct != NULL) - strfree (word->correct); /* Reclaim memory */ - oldword(word); + // Use the y component of the vertical vector as an approximation to its + // length. + return max_gap > vertical.y; } - /********************************************************************** - * delete_edgepts + * divide_blobs * - * Delete a list of EDGEPT structures. + * Create two blobs by grouping the outlines in the appropriate blob. + * The outlines that are beyond the location point are moved to the + * other blob. The ones whose x location is less than that point are + * retained in the original blob. **********************************************************************/ -void delete_edgepts(register EDGEPT *edgepts) { - register EDGEPT *this_edge; - register EDGEPT *next_edge; - - if (edgepts == NULL) - return; - - this_edge = edgepts; - do { - next_edge = this_edge->next; - oldedgept(this_edge); - this_edge = next_edge; +void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + const TPOINT& location) { + TPOINT vertical = italic_blob ? kDivisibleVerticalItalic + : kDivisibleVerticalUpright; + TESSLINE *outline1 = NULL; + TESSLINE *outline2 = NULL; + + TESSLINE *outline = blob->outlines; + blob->outlines = NULL; + int location_prod = CROSS(location, vertical); + + while (outline != NULL) { + TPOINT mid_pt = {(outline->topleft.x + outline->botright.x) / 2, + (outline->topleft.y + outline->botright.y) / 2}; + int mid_prod = CROSS(mid_pt, vertical); + if (mid_prod < location_prod) { + // Outline is in left blob. + if (outline1) + outline1->next = outline; + else + blob->outlines = outline; + outline1 = outline; + } else { + // Outline is in right blob. + if (outline2) + outline2->next = outline; + else + other_blob->outlines = outline; + outline2 = outline; + } + outline = outline->next; } - while (this_edge != edgepts); + + if (outline1) + outline1->next = NULL; + if (outline2) + outline2->next = NULL; } diff --git a/ccstruct/blobs.h b/ccstruct/blobs.h index 16c64b423a..c55ed56890 100644 --- a/ccstruct/blobs.h +++ b/ccstruct/blobs.h @@ -29,18 +29,225 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ -#include "vecfuncs.h" -#include "tessclas.h" +#include "rect.h" +#include "vecfuncs.h" + +class C_BLOB; +class DENORM; +class ROW; +class WERD; /*---------------------------------------------------------------------- T y p e s ----------------------------------------------------------------------*/ +#define EDGEPTFLAGS 4 /*concavity,length etc. */ + typedef struct { /* Widths of pieces */ int num_chars; int widths[1]; } WIDTH_RECORD; +struct TPOINT { + void operator+=(const TPOINT& other) { + x += other.x; + y += other.y; + } + void operator/=(int divisor) { + x /= divisor; + y /= divisor; + } + + inT16 x; // absolute x coord. + inT16 y; // absolute y coord. +}; +typedef TPOINT VECTOR; // structure for coordinates. + +struct EDGEPT { + EDGEPT() : next(NULL), prev(NULL) { + memset(flags, 0, EDGEPTFLAGS * sizeof(flags[0])); + } + EDGEPT(const EDGEPT& src) : next(NULL), prev(NULL) { + CopyFrom(src); + } + EDGEPT& operator=(const EDGEPT& src) { + CopyFrom(src); + return *this; + } + // Copies the data elements, but leaves the pointers untouched. + void CopyFrom(const EDGEPT& src) { + pos = src.pos; + vec = src.vec; + memcpy(flags, src.flags, EDGEPTFLAGS * sizeof(flags[0])); + } + // Accessors to hide or reveal a cut edge from feature extractors. + void Hide() { + flags[0] = true; + } + void Reveal() { + flags[0] = false; + } + bool IsHidden() const { + return flags[0] != 0; + } + + TPOINT pos; // position + VECTOR vec; // vector to next point + // TODO(rays) Remove flags and replace with + // is_hidden, runlength, dir, and fixed. The only use + // of the flags other than is_hidden is in polyaprx.cpp. + char flags[EDGEPTFLAGS]; // concavity, length etc + EDGEPT* next; // anticlockwise element + EDGEPT* prev; // clockwise element +}; + +struct TESSLINE { + TESSLINE() : is_hole(false), loop(NULL), next(NULL) {} + TESSLINE(const TESSLINE& src) : loop(NULL), next(NULL) { + CopyFrom(src); + } + ~TESSLINE() { + Clear(); + } + TESSLINE& operator=(const TESSLINE& src) { + CopyFrom(src); + return *this; + } + // Consume the circular list of EDGEPTs to make a TESSLINE. + static TESSLINE* BuildFromOutlineList(EDGEPT* outline); + // Copies the data and the outline, but leaves next untouched. + void CopyFrom(const TESSLINE& src); + // Deletes owned data. + void Clear(); + // Rotates by the given rotation in place. + void Rotate(const FCOORD rotation); + // Moves by the given vec in place. + void Move(const ICOORD vec); + // Scales by the given factor in place. + void Scale(float factor); + // Sets up the start and vec members of the loop from the pos members. + void SetupFromPos(); + // Recomputes the bounding box from the points in the loop. + void ComputeBoundingBox(); + // Computes the min and max cross product of the outline points with the + // given vec and returns the results in min_xp and max_xp. Geometrically + // this is the left and right edge of the outline perpendicular to the + // given direction, but to get the distance units correct, you would + // have to divide by the modulus of vec. + void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const; + + TBOX bounding_box() const; + // Returns true if the point is contained within the outline box. + bool Contains(const TPOINT& pt) { + return topleft.x <= pt.x && pt.x <= botright.x && + botright.y <= pt.y && pt.y <= topleft.y; + } + + void plot(ScrollView* window, ScrollView::Color color, + ScrollView::Color child_color); + + int BBArea() const { + return (botright.x - topleft.x) * (topleft.y - botright.y); + } + + TPOINT topleft; // Top left of loop. + TPOINT botright; // Bottom right of loop. + TPOINT start; // Start of loop. + bool is_hole; // True if this is a hole/child outline. + EDGEPT *loop; // Edgeloop. + TESSLINE *next; // Next outline in blob. +}; // Outline structure. + +struct TBLOB { + TBLOB() : outlines(NULL), next(NULL) {} + TBLOB(const TBLOB& src) : outlines(NULL), next(NULL) { + CopyFrom(src); + } + ~TBLOB() { + Clear(); + } + TBLOB& operator=(const TBLOB& src) { + CopyFrom(src); + return *this; + } + // Factory to build a TBLOB from a C_BLOB with polygonal + // approximation along the way. + static TBLOB* PolygonalCopy(C_BLOB* src); + // Copies the data and the outlines, but leaves next untouched. + void CopyFrom(const TBLOB& src); + // Deletes owned data. + void Clear(); + // Rotates by the given rotation in place. + void Rotate(const FCOORD rotation); + // Moves by the given vec in place. + void Move(const ICOORD vec); + // Scales by the given factor in place. + void Scale(float factor); + // Recomputes the bounding boxes of the outlines. + void ComputeBoundingBoxes(); + + // Returns the number of outlines. + int NumOutlines() const; + + TBOX bounding_box() const; + + void plot(ScrollView* window, ScrollView::Color color, + ScrollView::Color child_color); + + int BBArea() const { + int total_area = 0; + for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) + total_area += outline->BBArea(); + return total_area; + } + + TESSLINE *outlines; // List of outlines in blob. + TBLOB *next; // Next blob in block. +}; // Blob structure. + +int count_blobs(TBLOB *blobs); + +struct TWERD { + TWERD() : blobs(NULL), latin_script(false), next(NULL) {} + TWERD(const TWERD& src) : blobs(NULL), next(NULL) { + CopyFrom(src); + } + ~TWERD() { + Clear(); + } + TWERD& operator=(const TWERD& src) { + CopyFrom(src); + return *this; + } + // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal + // approximation along the way. + static TWERD* PolygonalCopy(WERD* src); + // Normalize in-place and record the normalization in the DENORM. + void Normalize(ROW* row, float x_height, bool numeric_mode, DENORM* denorm); + // Copies the data and the blobs, but leaves next untouched. + void CopyFrom(const TWERD& src); + // Deletes owned data. + void Clear(); + // Recomputes the bounding boxes of the blobs. + void ComputeBoundingBoxes(); + + // Returns the number of blobs in the word. + int NumBlobs() const { + return count_blobs(blobs); + } + TBOX bounding_box() const; + + // Merges the blobs from start to end, not including end, and deletes + // the blobs between start and end. + void MergeBlobs(int start, int end); + + void plot(ScrollView* window); + + TBLOB* blobs; // blobs in word. + bool latin_script; // This word is in a latin-based script. + TWERD* next; // next word. +}; + /*---------------------------------------------------------------------- M a c r o s ----------------------------------------------------------------------*/ @@ -55,13 +262,17 @@ if (w) memfree (w) /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ +// TODO(rays) This will become a member of TBLOB when TBLOB's definition +// moves to blobs.h +TBOX TBLOB_bounding_box(const TBLOB* blob); + void blob_origin(TBLOB *blob, /*blob to compute on */ TPOINT *origin); /*return value */ /*blob to compute on */ -void blob_bounding_box(TBLOB *blob, - register TPOINT *topleft, /*bounding box */ - register TPOINT *botright); +void blob_bounding_box(const TBLOB *blob, + TPOINT *topleft, // Bounding box. + TPOINT *botright); void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright); @@ -71,49 +282,9 @@ void blobs_origin(TBLOB *blobs, /*blob to compute on */ /*blob to compute on */ WIDTH_RECORD *blobs_widths(TBLOB *blobs); -int count_blobs(TBLOB *blobs); - -void delete_word(TWERD *word); - -void delete_edgepts(register EDGEPT *edgepts); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* blobs.c -void blob_origin - _ARGS((BLOB *blob, - TPOINT *origin)); - -void blob_bounding_box - _ARGS((BLOB *blob, - TPOINT *topleft, - TPOINT *botright)); - -void blobs_bounding_box - _ARGS((BLOB *blobs, - TPOINT *topleft, - TPOINT *botright)); - -void blobs_origin - _ARGS((BLOB *blobs, - TPOINT *origin)); - -WIDTH_RECORD *blobs_widths - _ARGS((BLOB *blobs)); - -int count_blobs - _ARGS((BLOB *blobs)); +bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location); -void delete_word - _ARGS((TWERD *word)); +void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + const TPOINT& location); -void delete_edgepts - _ARGS((EDGEPT *edgepts)); -#undef _ARGS -*/ #endif diff --git a/ccstruct/blread.h b/ccstruct/blread.h index 5500a76ffa..3969d00230 100644 --- a/ccstruct/blread.h +++ b/ccstruct/blread.h @@ -20,7 +20,7 @@ #ifndef BLREAD_H #define BLREAD_H -#include "varable.h" +#include "params.h" #include "ocrblock.h" bool read_unlv_file( //print list of sides diff --git a/ccstruct/boxword.cpp b/ccstruct/boxword.cpp new file mode 100644 index 0000000000..4ce33e4609 --- /dev/null +++ b/ccstruct/boxword.cpp @@ -0,0 +1,214 @@ +/////////////////////////////////////////////////////////////////////// +// File: boxword.h +// Description: Class to represent the bounding boxes of the output. +// Author: Ray Smith +// Created: Tue May 25 14:18:14 PDT 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "blobs.h" +#include "boxword.h" +#include "normalis.h" +#include "ocrblock.h" +#include "pageres.h" + +namespace tesseract { + +// Clip output boxes to input blob boxes for bounds that are within this +// tolerance. Otherwise, the blob may be chopped and we have to just use +// the word bounding box. +const int kBoxClipTolerance = 2; + +BoxWord::BoxWord() : length_(0) { +} + +BoxWord::BoxWord(const BoxWord& src) { + CopyFrom(src); +} + +BoxWord::~BoxWord() { +} + +BoxWord& BoxWord::operator=(const BoxWord& src) { + CopyFrom(src); + return *this; +} + +void BoxWord::CopyFrom(const BoxWord& src) { + bbox_ = src.bbox_; + length_ = src.length_; + boxes_.clear(); + boxes_.reserve(length_); + for (int i = 0; i < length_; ++i) + boxes_.push_back(src.boxes_[i]); +} + +// Factory to build a BoxWord from a TWERD and the DENORM to switch +// back to original image coordinates. +// If the denorm is not NULL, then the output is denormalized and rotated +// back to the original image coordinates. +BoxWord* BoxWord::CopyFromNormalized(const DENORM* denorm, + TWERD* tessword) { + const BLOCK* block = denorm != NULL ? denorm->block() : NULL; + BoxWord* boxword = new BoxWord(); + // Count the blobs. + boxword->length_ = 0; + for (TBLOB* tblob = tessword->blobs; tblob != NULL; tblob = tblob->next) + ++boxword->length_; + // Allocate memory. + boxword->boxes_.reserve(boxword->length_); + + for (TBLOB* tblob = tessword->blobs; tblob != NULL; tblob = tblob->next) { + TBOX blob_box; + for (TESSLINE* outline = tblob->outlines; outline != NULL; + outline = outline->next) { + EDGEPT* edgept = outline->loop; + // Iterate over the edges. + do { + if (!edgept->IsHidden() || !edgept->prev->IsHidden()) { + ICOORD pos(edgept->pos.x, edgept->pos.y); + if (denorm != NULL) { + FCOORD denormed(denorm->x(edgept->pos.x), + denorm->y(edgept->pos.y, edgept->pos.x)); + if (block != NULL) + denormed.rotate(block->re_rotation()); + pos.set_x(static_cast(floor(denormed.x() + 0.5))); + pos.set_y(static_cast(floor(denormed.y() + 0.5))); + } + TBOX pt_box(pos, pos); + blob_box += pt_box; + } + edgept = edgept->next; + } while (edgept != outline->loop); + } + boxword->boxes_.push_back(blob_box); + } + boxword->ComputeBoundingBox(); + return boxword; +} + +BoxWord* BoxWord::CopyFromPBLOBs(PBLOB_LIST* blobs) { + BoxWord* boxword = new BoxWord(); + // Count the blobs. + boxword->length_ = blobs->length(); + // Allocate memory. + boxword->boxes_.reserve(boxword->length_); + // Copy the boxes. + PBLOB_IT pb_it(blobs); + int i = 0; + for (pb_it.mark_cycle_pt(); !pb_it.cycled_list(); pb_it.forward(), ++i) { + boxword->boxes_.push_back(pb_it.data()->bounding_box()); + } + boxword->ComputeBoundingBox(); + return boxword; +} + +// Clean up the bounding boxes from the polygonal approximation by +// expanding slightly, then clipping to the blobs from the original_word +// that overlap. If not null, the block provides the inverse rotation. +void BoxWord::ClipToOriginalWord(const BLOCK* block, WERD* original_word) { + for (int i = 0; i < length_; ++i) { + TBOX box = boxes_[i]; + // Expand by a single pixel, as the poly approximation error is 1 pixel. + box = TBOX(box.left() - 1, box.bottom() - 1, + box.right() + 1, box.top() + 1); + // Now find the original box that matches. + TBOX original_box; + C_BLOB_IT b_it(original_word->cblob_list()); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + TBOX blob_box = b_it.data()->bounding_box(); + if (block != NULL) + blob_box.rotate(block->re_rotation()); + if (blob_box.major_overlap(box)) { + original_box += blob_box; + } + } + if (!original_box.null_box()) { + if (NearlyEqual(original_box.left(), box.left(), kBoxClipTolerance)) + box.set_left(original_box.left()); + if (NearlyEqual(original_box.right(), box.right(), + kBoxClipTolerance)) + box.set_right(original_box.right()); + if (NearlyEqual(original_box.top(), box.top(), kBoxClipTolerance)) + box.set_top(original_box.top()); + if (NearlyEqual(original_box.bottom(), box.bottom(), + kBoxClipTolerance)) + box.set_bottom(original_box.bottom()); + } + boxes_[i] = box.intersection(original_word->bounding_box()); + } + ComputeBoundingBox(); +} + +// Merges the boxes from start to end, not including end, and deletes +// the boxes between start and end. +void BoxWord::MergeBoxes(int start, int end) { + start = ClipToRange(start, 0, length_); + end = ClipToRange(end, 0, length_); + if (end <= start + 1) + return; + for (int i = start + 1; i < end; ++i) { + boxes_[start] += boxes_[i]; + } + int shrinkage = end - 1 - start; + length_ -= shrinkage; + for (int i = start + 1; i < length_; ++i) + boxes_[i] = boxes_[i + shrinkage]; + boxes_.truncate(length_); +} + +// Inserts a new box before the given index. +// Recomputes the bounding box. +void BoxWord::InsertBox(int index, const TBOX& box) { + if (index < length_) + boxes_.insert(box, index); + else + boxes_.push_back(box); + length_ = boxes_.size(); + ComputeBoundingBox(); +} + +// Deletes the box with the given index, and shuffles up the rest. +// Recomputes the bounding box. +void BoxWord::DeleteBox(int index) { + ASSERT_HOST(0 <= index && index < length_); + boxes_.remove(index); + --length_; + ComputeBoundingBox(); +} + +// Computes the bounding box of the word. +void BoxWord::ComputeBoundingBox() { + bbox_ = TBOX(); + for (int i = 0; i < length_; ++i) + bbox_ += boxes_[i]; +} + +// This and other putatively are the same, so call the (permanent) callback +// for each blob index where the bounding boxes match. +// The callback is deleted on completion. +void BoxWord::ProcessMatchedBlobs(const TWERD& other, + TessCallback1* cb) const { + TBLOB* blob = other.blobs; + for (int i = 0; i < length_ && blob != NULL; ++i, blob = blob->next) { + TBOX blob_box = blob->bounding_box(); + if (blob_box == boxes_[i]) + cb->Run(i); + } + delete cb; +} + +} // namespace tesseract. + + diff --git a/ccstruct/boxword.h b/ccstruct/boxword.h new file mode 100644 index 0000000000..b8fb18ef9e --- /dev/null +++ b/ccstruct/boxword.h @@ -0,0 +1,98 @@ +/////////////////////////////////////////////////////////////////////// +// File: boxword.h +// Description: Class to represent the bounding boxes of the output. +// Author: Ray Smith +// Created: Tue May 25 14:18:14 PDT 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CSTRUCT_BOXWORD_H__ +#define TESSERACT_CSTRUCT_BOXWORD_H__ + +#include "genericvector.h" +#include "rect.h" + +class BLOCK; +class DENORM; +class PBLOB_LIST; +struct TWERD; +class WERD; +class WERD_RES; + +namespace tesseract { + +// Class to hold an array of bounding boxes for an output word and +// the bounding box of the whole word. +class BoxWord { + public: + BoxWord(); + explicit BoxWord(const BoxWord& src); + ~BoxWord(); + + BoxWord& operator=(const BoxWord& src); + + void CopyFrom(const BoxWord& src); + + // Factory to build a BoxWord from a TWERD and the DENORM to switch + // back to original image coordinates. + // If the denorm is not NULL, then the output is denormalized and rotated + // back to the original image coordinates. + static BoxWord* CopyFromNormalized(const DENORM* denorm, + TWERD* tessword); + static BoxWord* CopyFromPBLOBs(PBLOB_LIST* blobs); + + // Clean up the bounding boxes from the polygonal approximation by + // expanding slightly, then clipping to the blobs from the original_word + // that overlap. If not null, the block provides the inverse rotation. + void ClipToOriginalWord(const BLOCK* block, WERD* original_word); + + // Merges the boxes from start to end, not including end, and deletes + // the boxes between start and end. + void MergeBoxes(int start, int end); + + // Inserts a new box before the given index. + // Recomputes the bounding box. + void InsertBox(int index, const TBOX& box); + + // Deletes the box with the given index, and shuffles up the rest. + // Recomputes the bounding box. + void DeleteBox(int index); + + // This and other putatively are the same, so call the (permanent) callback + // for each blob index where the bounding boxes match. + // The callback is deleted on completion. + void ProcessMatchedBlobs(const TWERD& other, TessCallback1* cb) const; + + const TBOX& bounding_box() const { + return bbox_; + } + const int length() const { + return length_; + } + const TBOX& BlobBox(int index) const { + return boxes_[index]; + } + + private: + void ComputeBoundingBox(); + + TBOX bbox_; + int length_; + GenericVector boxes_; +}; + +} // namespace tesseract. + + +#endif // TESSERACT_CSTRUCT_BOXWORD_H__ diff --git a/ccstruct/callcpp.cpp b/ccstruct/callcpp.cpp index 9894bcead0..c0cc089735 100644 --- a/ccstruct/callcpp.cpp +++ b/ccstruct/callcpp.cpp @@ -18,20 +18,18 @@ **********************************************************************/ #include "mfcpch.h" -#include "errcode.h" +#include "errcode.h" #ifdef __UNIX__ -#include +#include #include #endif -#include -#include "memry.h" -#include "scrollview.h" -//#include "evnts.h" -#include "varable.h" -#include "callcpp.h" -#include "tprintf.h" -//#include "strace.h" -#include "host.h" +#include +#include "memry.h" +#include "scrollview.h" +#include "params.h" +#include "callcpp.h" +#include "tprintf.h" +#include "host.h" #include "unichar.h" // Include automatically generated configuration file if running autoconf. @@ -39,30 +37,6 @@ #include "config_auto.h" #endif -//extern "C" { - -INT_VAR (tess_cp_mapping0, 0, "Mappings for class pruner distance"); -INT_VAR (tess_cp_mapping1, 1, "Mappings for class pruner distance"); -INT_VAR (tess_cp_mapping2, 2, "Mappings for class pruner distance"); -INT_VAR (tess_cp_mapping3, 3, "Mappings for class pruner distance"); -INT_VAR (record_matcher_output, 0, "Record detailed matcher info"); -INT_VAR (il1_adaption_test, 0, "Dont adapt to i/I at beginning of word"); -double_VAR (permuter_pending_threshold, 0.0, -"Worst conf for using pending dictionary"); -//Global matcher info from the class pruner. -inT32 cp_maps[4]; -//Global info to control writes of matcher info -char blob_answer[UNICHAR_LEN + 1]; //correct char -char *word_answer; //correct word -inT32 bits_in_states; //no of bits in states - -void setup_cp_maps() { - cp_maps[0] = tess_cp_mapping0; - cp_maps[1] = tess_cp_mapping1; - cp_maps[2] = tess_cp_mapping2; - cp_maps[3] = tess_cp_mapping3; -} - void cprintf ( //Trace printf const char *format, ... //special message @@ -172,6 +146,3 @@ void reverse16(void *ptr) { *cptr = *(cptr + 1); *(cptr + 1) = tmp; } - - -//}; diff --git a/ccstruct/ccstruct.cpp b/ccstruct/ccstruct.cpp index a3934d9ce6..013463d55d 100644 --- a/ccstruct/ccstruct.cpp +++ b/ccstruct/ccstruct.cpp @@ -19,6 +19,15 @@ #include "ccstruct.h" namespace tesseract { + +// APPROXIMATIONS of the fractions of the character cell taken by +// the descenders, ascenders, and x-height. +const double CCStruct::kDescenderFraction = 0.25; +const double CCStruct::kXHeightFraction = 0.5; +const double CCStruct::kAscenderFraction = 0.25; +const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction / + (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction); + CCStruct::CCStruct() : image_(this) { } diff --git a/ccstruct/ccstruct.h b/ccstruct/ccstruct.h index b143c19983..9564c39f3e 100644 --- a/ccstruct/ccstruct.h +++ b/ccstruct/ccstruct.h @@ -22,17 +22,21 @@ #include "cutil.h" #include "image.h" -class PBLOB; -class DENORM; -class WERD; -class BLOB_CHOICE_LIST; - namespace tesseract { class CCStruct : public CUtil { public: CCStruct(); ~CCStruct(); + // Globally accessible constants. + // APPROXIMATIONS of the fractions of the character cell taken by + // the descenders, ascenders, and x-height. + static const double kDescenderFraction; // = 0.25; + static const double kXHeightFraction; // = 0.5; + static const double kAscenderFraction; // = 0.25; + // Derived value giving the x-height as a fraction of cap-height. + static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender). + protected: Image image_; }; @@ -40,13 +44,5 @@ class CCStruct : public CUtil { class Tesseract; } // namespace tesseract -typedef void (tesseract::Tesseract::*POLY_MATCHER) - (PBLOB *, PBLOB *, PBLOB *, WERD *, - DENORM *, BLOB_CHOICE_LIST *, const char*); -/* - typedef void (tesseract::Tesseract::*POLY_TESTER) - (const STRING&, PBLOB *, DENORM *, BOOL8, char *, - inT32, BLOB_CHOICE_LIST *); -*/ #endif // TESSERACT_CCSTRUCT_CCSTRUCT_H__ diff --git a/ccstruct/coutln.cpp b/ccstruct/coutln.cpp index 04f0af11f2..68b2460ff2 100644 --- a/ccstruct/coutln.cpp +++ b/ccstruct/coutln.cpp @@ -18,11 +18,12 @@ **********************************************************************/ #include "mfcpch.h" -#include +#include #ifdef __UNIX__ -#include +#include #endif -#include "coutln.h" +#include "coutln.h" +#include "allheaders.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -620,6 +621,23 @@ void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) { } } +// Renders the outline to the given pix, with left and top being +// the coords of the upper-left corner of the pix. +void C_OUTLINE::render(int left, int top, Pix* pix) { + ICOORD pos = start; + for (int stepindex = 0; stepindex < stepcount; ++stepindex) { + ICOORD next_step = step(stepindex); + if (next_step.y() < 0) { + pixRasterop(pix, 0, top - pos.y(), pos.x() - left, 1, + PIX_NOT(PIX_DST), NULL, 0, 0); + } else if (next_step.y() > 0) { + pixRasterop(pix, 0, top - pos.y() - 1, pos.x() - left, 1, + PIX_NOT(PIX_DST), NULL, 0, 0); + } + pos += next_step; + } +} + /********************************************************************** * C_OUTLINE::plot * @@ -628,15 +646,14 @@ void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) { #ifndef GRAPHICS_DISABLED void C_OUTLINE::plot( //draw it - ScrollView* window, //window to draw in - ScrollView::Color colour //colour to draw in + ScrollView* window, // window to draw in + ScrollView::Color colour // colour to draw in ) const { - inT16 stepindex; //index to cstep - ICOORD pos; //current position - DIR128 stepdir; //direction of step - DIR128 oldstepdir; //previous stepdir + inT16 stepindex; // index to cstep + ICOORD pos; // current position + DIR128 stepdir; // direction of step - pos = start; //current position + pos = start; // current position window->Pen(colour); if (stepcount == 0) { window->Rectangle(box.left(), box.top(), box.right(), box.bottom()); @@ -645,19 +662,17 @@ void C_OUTLINE::plot( //draw it window->SetCursor(pos.x(), pos.y()); stepindex = 0; - stepdir = step_dir (0); //get direction while (stepindex < stepcount) { - do { - pos += step (stepindex); //step to next - stepindex++; //count steps - oldstepdir = stepdir; - //new direction - stepdir = step_dir (stepindex); + pos += step(stepindex); // step to next + stepdir = step_dir(stepindex); + stepindex++; // count steps + // merge straight lines + while (stepindex < stepcount && + stepdir.get_dir() == step_dir(stepindex).get_dir()) { + pos += step(stepindex); + stepindex++; } - while (stepindex < stepcount - && oldstepdir.get_dir () == stepdir.get_dir ()); - //merge straight lines - window->DrawTo(pos.x(), pos.y()); + window->DrawTo(pos.x(), pos.y()); } } #endif diff --git a/ccstruct/coutln.h b/ccstruct/coutln.h index 1174e6ae0a..4aa46ebb72 100644 --- a/ccstruct/coutln.h +++ b/ccstruct/coutln.h @@ -38,6 +38,7 @@ enum C_OUTLINE_FLAGS }; class DLLSYM C_OUTLINE; //forward declaration +struct Pix; ELISTIZEH_S (C_OUTLINE) class DLLSYM C_OUTLINE:public ELIST_LINK @@ -149,6 +150,10 @@ class DLLSYM C_OUTLINE:public ELIST_LINK // then this is extracted from *it, so an iteration can continue. void RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it); + // Renders the outline to the given pix, with left and top being + // the coords of the upper-left corner of the pix. + void render(int left, int top, Pix* pix); + void plot( //draw one ScrollView* window, //window to draw in ScrollView::Color colour) const; //colour to draw it diff --git a/ccstruct/detlinefit.cpp b/ccstruct/detlinefit.cpp index 2fed662209..1bd6533563 100644 --- a/ccstruct/detlinefit.cpp +++ b/ccstruct/detlinefit.cpp @@ -103,6 +103,104 @@ double DetLineFit::Fit(ICOORD* pt1, ICOORD* pt2) { return best_uq > 0.0 ? sqrt(best_uq) : best_uq; } +// Backwards compatible fit returning a gradient and constant. +// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this +// function in preference to the LMS class. +double DetLineFit::Fit(float* m, float* c) { + ICOORD start, end; + double error = Fit(&start, &end); + if (end.x() != start.x()) { + *m = static_cast(end.y() - start.y()) / (end.x() - start.x()); + *c = start.y() - *m * start.x(); + } else { + *m = 0.0f; + *c = 0.0f; + } + return error; +} + +// Helper function to compute a fictitious end point that is on a line +// of a given gradient through the given start. +ICOORD ComputeEndFromGradient(const ICOORD& start, double m) { + if (m > 1.0 || m < -1.0) { + // dy dominates. Force it to have the opposite sign of start.y() and + // compute dx based on dy being as large as possible + int dx = static_cast(floor(MAX_INT16 / m)); + if (dx < 0) ++dx; // Truncate towards 0. + if (start.y() > 0) dx = - dx; // Force dy to be opposite to start.y(). + // Constrain dx so the result fits in an inT16. + while (start.x() + dx > MAX_INT16 || start.x() + dx < -MAX_INT16) + dx /= 2; + if (-1 <= dx && dx <= 1) { + return ICOORD(start.x(), start.y() + 1); // Too steep for anything else. + } + int y = start.y() + static_cast(floor(dx * m + 0.5)); + ASSERT_HOST(-MAX_INT16 <= y && y <= MAX_INT16); + return ICOORD(start.x() + dx, y); + } else { + // dx dominates. Force it to have the opposite sign of start.x() and + // compute dy based on dx being as large as possible. + int dy = static_cast(floor(MAX_INT16 * m)); + if (dy < 0) ++dy; // Truncate towards 0. + if (start.x() > 0) dy = - dy; // Force dx to be opposite to start.x(). + // Constrain dy so the result fits in an inT16. + while (start.y() + dy > MAX_INT16 || start.y() + dy < -MAX_INT16) + dy /= 2; + if (-1 <= dy && dy <= 1) { + return ICOORD(start.x() + 1, start.y()); // Too flat for anything else. + } + int x = start.x() + static_cast(floor(dy / m + 0.5)); + ASSERT_HOST(-MAX_INT16 <= x && x <= MAX_INT16); + return ICOORD(x, start.y() + dy); + } +} + +// Backwards compatible constrained fit with a supplied gradient. +double DetLineFit::ConstrainedFit(double m, float* c) { + ICOORDELT_IT it(&pt_list_); + // Do something sensible with no points. + if (pt_list_.empty()) { + *c = 0.0f; + return 0.0; + } + // Count the points and find the first and last kNumEndPoints. + // Put the ends in a single array to make their use easier later. + ICOORD* pts[kNumEndPoints * 2]; + int pt_count = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + if (pt_count < kNumEndPoints) { + pts[pt_count] = it.data(); + pts[kNumEndPoints + pt_count] = pts[pt_count]; + } else { + for (int i = 1; i < kNumEndPoints; ++i) + pts[kNumEndPoints + i - 1] = pts[kNumEndPoints + i]; + pts[kNumEndPoints * 2 - 1] = it.data(); + } + ++pt_count; + } + while (pt_count < kNumEndPoints) { + pts[pt_count] = NULL; + pts[kNumEndPoints + pt_count++] = NULL; + } + int* distances = new int[pt_count]; + double best_uq = -1.0; + // Iterate each pair of points and find the best fitting line. + for (int i = 0; i < kNumEndPoints * 2; ++i) { + ICOORD* start = pts[i]; + if (start == NULL) continue; + ICOORD end = ComputeEndFromGradient(*start, m); + // Compute the upper quartile error from the line. + double dist = ComputeErrors(*start, end, distances); + if (dist < best_uq || best_uq < 0.0) { + best_uq = dist; + *c = start->y() - start->x() * m; + } + } + delete [] distances; + // Finally compute the square root to return the true distance. + return best_uq > 0.0 ? sqrt(best_uq) : best_uq; +} + // Comparator function used by the nth_item funtion. static int CompareInts(const void *p1, const void *p2) { const int* i1 = reinterpret_cast(p1); diff --git a/ccstruct/detlinefit.h b/ccstruct/detlinefit.h index 6a2279b4c4..9f43098ca5 100644 --- a/ccstruct/detlinefit.h +++ b/ccstruct/detlinefit.h @@ -67,6 +67,14 @@ class DetLineFit { // points, and the upper quartile error. double Fit(ICOORD* pt1, ICOORD* pt2); + // Backwards compatible fit returning a gradient and constant. + // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this + // function in preference to the LMS class. + double Fit(float* m, float* c); + + // Backwards compatible constrained fit with a supplied gradient. + double ConstrainedFit(double m, float* c); + private: double ComputeErrors(const ICOORD start, const ICOORD end, int* distances); diff --git a/ccstruct/dppoint.cpp b/ccstruct/dppoint.cpp new file mode 100644 index 0000000000..7325c9cb1e --- /dev/null +++ b/ccstruct/dppoint.cpp @@ -0,0 +1,98 @@ +/********************************************************************** + * File: dppoint.cpp + * Description: Simple generic dynamic programming class. + * Author: Ray Smith + * Created: Wed Mar 25 19:08:01 PDT 2009 + * + * (C) Copyright 2009, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "dppoint.h" +#include "tprintf.h" + +namespace tesseract { + +// Solve the dynamic programming problem for the given array of points, with +// the given size and cost function. +// Steps backwards are limited to being between min_step and max_step +// inclusive. +// The return value is the tail of the best path. +DPPoint* DPPoint::Solve(int min_step, int max_step, bool debug, + CostFunc cost_func, int size, DPPoint* points) { + if (size <= 0 || max_step < min_step || min_step >= size) + return NULL; // Degenerate, but not necessarily an error. + ASSERT_HOST(min_step > 0); // Infinite loop possible if this is not true. + if (debug) + tprintf("min = %d, max=%d\n", + min_step, max_step); + // Evaluate the total cost at each point. + for (int i = 0; i < size; ++i) { + for (int offset = min_step; offset <= max_step; ++offset) { + DPPoint* prev = offset <= i ? points + i - offset : NULL; + inT64 new_cost = (points[i].*cost_func)(prev); + if (points[i].best_prev_ != NULL && offset > min_step * 2 && + new_cost > points[i].total_cost_) + break; // Find only the first minimum if going over twice the min. + } + points[i].total_cost_ += points[i].local_cost_; + if (debug) { + tprintf("At point %d, local cost=%d, total_cost=%d, steps=%d\n", + i, points[i].local_cost_, points[i].total_cost_, + points[i].total_steps_); + } + } + // Now find the end of the best path and return it. + int best_cost = points[size - 1].total_cost_; + int best_end = size - 1; + for (int end = best_end - 1; end >= size - min_step; --end) { + int cost = points[end].total_cost_; + if (cost < best_cost) { + best_cost = cost; + best_end = end; + } + } + return points + best_end; +} + +// A CostFunc that takes the variance of step into account in the cost. +inT64 DPPoint::CostWithVariance(const DPPoint* prev) { + if (prev == NULL || prev == this) { + UpdateIfBetter(0, 1, NULL, 0, 0, 0); + return 0; + } + + int delta = this - prev; + inT32 n = prev->n_ + 1; + inT32 sig_x = prev->sig_x_ + delta; + inT64 sig_xsq = prev->sig_xsq_ + delta * delta; + inT64 cost = (sig_xsq - sig_x * sig_x / n) / n; + cost += prev->total_cost_; + UpdateIfBetter(cost, prev->total_steps_ + 1, prev, n, sig_x, sig_xsq); + return cost; +} + +// Update the other members if the cost is lower. +void DPPoint::UpdateIfBetter(inT64 cost, inT32 steps, const DPPoint* prev, + inT32 n, inT32 sig_x, inT64 sig_xsq) { + if (cost < total_cost_) { + total_cost_ = cost; + total_steps_ = steps; + best_prev_ = prev; + n_ = n; + sig_x_ = sig_x; + sig_xsq_ = sig_xsq; + } +} + +} // namespace tesseract. + diff --git a/ccstruct/dppoint.h b/ccstruct/dppoint.h new file mode 100644 index 0000000000..fd87bb9127 --- /dev/null +++ b/ccstruct/dppoint.h @@ -0,0 +1,102 @@ +/********************************************************************** + * File: dppoint.h + * Description: Simple generic dynamic programming class. + * Author: Ray Smith + * Created: Wed Mar 25 18:57:01 PDT 2009 + * + * (C) Copyright 2009, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef TESSERACT_CCSTRUCT_DPPOINT_H__ +#define TESSERACT_CCSTRUCT_DPPOINT_H__ + +#include "host.h" + +namespace tesseract { + +// A simple class to provide a dynamic programming solution to a class of +// 1st-order problems in which the cost is dependent only on the current +// step and the best cost to that step, with a possible special case +// of using the variance of the steps, and only the top choice is required. +// Useful for problems such as finding the optimal cut points in a fixed-pitch +// (vertical or horizontal) situation. +// Skeletal Example: +// DPPoint* array = new DPPoint[width]; +// for (int i = 0; i < width; i++) { +// array[i].AddLocalCost(cost_at_i) +// } +// DPPoint* best_end = DPPoint::Solve(..., array); +// while (best_end != NULL) { +// int cut_index = best_end - array; +// best_end = best_end->best_prev(); +// } +// delete [] array; +class DPPoint { + public: + // The cost function evaluates the total cost at this (excluding this's + // local_cost) and if it beats this's total_cost, then + // replace the appropriate values in this. + typedef inT64 (DPPoint::*CostFunc)(const DPPoint* prev); + + DPPoint() + : local_cost_(0), total_cost_(MAX_INT32), total_steps_(1), best_prev_(NULL), + n_(0), sig_x_(0), sig_xsq_(0) { + } + + // Solve the dynamic programming problem for the given array of points, with + // the given size and cost function. + // Steps backwards are limited to being between min_step and max_step + // inclusive. + // The return value is the tail of the best path. + static DPPoint* Solve(int min_step, int max_step, bool debug, + CostFunc cost_func, int size, DPPoint* points); + + // A CostFunc that takes the variance of step into account in the cost. + inT64 CostWithVariance(const DPPoint* prev); + + // Accessors. + int total_cost() const { + return total_cost_; + } + int Pathlength() const { + return total_steps_; + } + const DPPoint* best_prev() const { + return best_prev_; + } + void AddLocalCost(int new_cost) { + local_cost_ += new_cost; + } + + private: + // Code common to different cost functions. + + // Update the other members if the cost is lower. + void UpdateIfBetter(inT64 cost, inT32 steps, const DPPoint* prev, + inT32 n, inT32 sig_x, inT64 sig_xsq); + + inT32 local_cost_; // Cost of this point on its own. + inT32 total_cost_; // Sum of all costs in best path to here. + // During cost calculations local_cost is excluded. + inT32 total_steps_; // Number of steps in best path to here. + const DPPoint* best_prev_; // Pointer to prev point in best path from here. + // Information for computing the variance part of the cost. + inT32 n_; // Number of steps in best path to here for variance. + inT32 sig_x_; // Sum of step sizes for computing variance. + inT64 sig_xsq_; // Sum of squares of steps for computing variance. +}; + +} // namespace tesseract. + +#endif // TESSERACT_CCSTRUCT_DPPOINT_H__ + diff --git a/ccstruct/labls.cpp b/ccstruct/labls.cpp deleted file mode 100644 index af01c30c3c..0000000000 --- a/ccstruct/labls.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/********************************************************************** - * File: labls.c (Formerly labels.c) - * Description: Attribute definition tables - * Author: Sheelagh Lloyd? - * Created: - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#include "mfcpch.h" -#include "hpdsizes.h" -#include "labls.h" - -/****************************************************************************** - * TEXT REGIONS - *****************************************************************************/ -DLLSYM inT32 tn[NUM_TEXT_ATTR] = { - 3, //T_HORIZONTAL - 4, //T_TEXT - 2, //T_SERIF - 2, //T_PROPORTIONAL - 2, //T_NORMAL - 2, //T_UPRIGHT - 2, //T_SOLID - 3, //T_BLACK - 2, //T_NOTUNDER - 2, //T_NOTDROP -}; - -DLLSYM char tlabel[NUM_TEXT_ATTR][4][MAXLENGTH] = { { - //T_HORIZONTAL - "Horizontal", - "Vertical", - "Skew", - "" - }, - { //T_TEXT - "Text", - "Table", - "Form", - "Mixed" - }, - { //T_SERIF - "Serif", - "Sans-serif", - "", - "" - }, - { //T_PROPORTIONAL - "Proportional", - "Fixed pitch", - "", - "" - }, - { //T_NORMAL - "Normal", - "Bold", - "", - "" - }, - { //T_UPRIGHT - "Upright", - "Italic", - "", - "" - }, - { //T_SOLID - "Solid", - "Outline", - "", - "" - }, - { //T_BLACK - "Black", - "White", - "Coloured", - "" - }, - { //T_NOTUNDER - "Not underlined", - "Underlined", - "", - "" - }, - { //T_NOTDROP - "Not drop caps", - "Drop Caps", - "", - "" - } -}; - -DLLSYM inT32 bn[NUM_BLOCK_ATTR] = { - 4, //G_MONOCHROME - 2, //I_MONOCHROME - 2, //I_SMOOTH - 3, //R_SINGLE - 3, //R_BLACK - 3, //S_BLACK - 2 //W_TEXT -}; - -DLLSYM inT32 tvar[NUM_TEXT_ATTR]; -DLLSYM inT32 bvar[NUM_BLOCK_ATTR]; -DLLSYM char blabel[NUM_BLOCK_ATTR][4][MAXLENGTH] = { { - //G_MONOCHROME - - /**************************************************************************** - * GRAPHICS - ***************************************************************************/ - "Monochrome ", - "Two colour ", - "Spot colour", - "Multicolour" - }, - - /**************************************************************************** - * IMAGE - ***************************************************************************/ - { //I_MONOCHROME - "Monochrome ", - "Colour ", - "", - "" - }, - { //I_SMOOTH - "Smooth ", - "Grainy ", - "", - "" - }, - - /**************************************************************************** - * RULES - ***************************************************************************/ - { //R_SINGLE - "Single ", - "Double ", - "Multiple", - "" - }, - { //R_BLACK - "Black ", - "White ", - "Coloured", - "" - }, - - /**************************************************************************** - * SCRIBBLE - ***************************************************************************/ - { //S_BLACK - "Black ", - "White ", - "Coloured", - "" - }, - /**************************************************************************** - * WEIRD - ***************************************************************************/ - { //W_TEXT - "No text ", - "Contains text", - "", - "" - } -}; - -DLLSYM char backlabel[NUM_BACKGROUNDS][MAXLENGTH] = { - "White", //B_WHITE - "Black", //B_BLACK - "Coloured", //B_COLOURED - "Textured", //B_TEXTURED - "Patterned", //B_PATTERNED - "Gradient fill", //B_GRADIENTFILL - "Image", //B_IMAGE - "Text" //B_TEXT -}; diff --git a/ccstruct/labls.h b/ccstruct/labls.h deleted file mode 100644 index ece7190a45..0000000000 --- a/ccstruct/labls.h +++ /dev/null @@ -1,38 +0,0 @@ -/********************************************************************** - * File: labls.h (Formerly labels.h) - * Description: Attribute definition tables - * Author: Sheelagh Lloyd? - * Created: - * - * (C) Copyright 1993, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ -#ifndef LABLS_H -#define LABLS_H - -#include "host.h" -#include "hpdsizes.h" - -#include "hpddef.h" //must be last (handpd.dll) - -extern DLLSYM inT32 tn[NUM_TEXT_ATTR]; - -extern DLLSYM char tlabel[NUM_TEXT_ATTR][4][MAXLENGTH]; - -extern DLLSYM inT32 bn[NUM_BLOCK_ATTR]; - -extern DLLSYM inT32 tvar[NUM_TEXT_ATTR]; -extern DLLSYM inT32 bvar[NUM_BLOCK_ATTR]; -extern DLLSYM char blabel[NUM_BLOCK_ATTR][4][MAXLENGTH]; - -extern DLLSYM char backlabel[NUM_BACKGROUNDS][MAXLENGTH]; -#endif diff --git a/ccstruct/linlsq.cpp b/ccstruct/linlsq.cpp index 1f7bbd0c37..ad8b64e371 100644 --- a/ccstruct/linlsq.cpp +++ b/ccstruct/linlsq.cpp @@ -31,9 +31,8 @@ const ERRCODE EMPTY_LLSQ = "Can't delete from an empty LLSQ"; #define EXTERN -EXTERN double_VAR (pdlsq_posdir_ratio, 4e-6, "Mult of dir to cf pos"); -EXTERN double_VAR (pdlsq_threshold_angleavg, 0.1666666, -"Frac of pi for simple fit"); +const double kPdlsqPosdirRatio = 4e-6f; // Mult of dir to cf pos +const double kPdlsqThresholdAngleAvg = 0.166666f; // Frac of pi for simple fit /********************************************************************** * LLSQ::clear @@ -192,11 +191,11 @@ float PDLSQ::fit( //get fit if (pos.n > 0) { a = pos.sigxy - pos.sigx * pos.sigy / pos.n - + pdlsq_posdir_ratio * dir.sigxy; + + kPdlsqPosdirRatio * dir.sigxy; b = pos.sigxx - pos.sigyy + (pos.sigy * pos.sigy - pos.sigx * pos.sigx) / pos.n + - pdlsq_posdir_ratio * (dir.sigxx - dir.sigyy); + kPdlsqPosdirRatio * (dir.sigxx - dir.sigyy); if (dir.sigy != 0 || dir.sigx != 0) avg_angle = atan2 (dir.sigy, dir.sigx); else @@ -214,8 +213,8 @@ float PDLSQ::fit( //get fit error += M_PI; angle -= M_PI; } - if (error > M_PI * pdlsq_threshold_angleavg - || error < -M_PI * pdlsq_threshold_angleavg) + if (error > M_PI * kPdlsqThresholdAngleAvg || + error < -M_PI * kPdlsqThresholdAngleAvg) angle = avg_angle; //go simple //convert direction ang = (inT16) (angle * MODULUS / (2 * M_PI)); @@ -227,7 +226,7 @@ float PDLSQ::fit( //get fit // a,b,angle,r); error = dir.sigxx * sinx * sinx + dir.sigyy * cosx * cosx - 2 * dir.sigxy * sinx * cosx; - error *= pdlsq_posdir_ratio; + error *= kPdlsqPosdirRatio; error += sinx * sinx * pos.sigxx + cosx * cosx * pos.sigyy - 2 * sinx * cosx * pos.sigxy - 2 * r * (sinx * pos.sigx - cosx * pos.sigy) + r * r * pos.n; diff --git a/ccstruct/linlsq.h b/ccstruct/linlsq.h index 13a5db6937..ffe6900ac1 100644 --- a/ccstruct/linlsq.h +++ b/ccstruct/linlsq.h @@ -22,7 +22,7 @@ #include "points.h" #include "mod128.h" -#include "varable.h" +#include "params.h" class LLSQ { diff --git a/ccstruct/lmedsq.cpp b/ccstruct/lmedsq.cpp deleted file mode 100644 index 544a57ea80..0000000000 --- a/ccstruct/lmedsq.cpp +++ /dev/null @@ -1,458 +0,0 @@ -/********************************************************************** - * File: lmedsq.cpp (Formerly lms.c) - * Description: Code for the LMS class. - * Author: Ray Smith - * Created: Fri Aug 7 09:30:53 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mfcpch.h" -#include -#include "statistc.h" -#include "memry.h" -#include "statistc.h" -#include "lmedsq.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define EXTERN - -EXTERN INT_VAR (lms_line_trials, 12, "Number of linew fits to do"); -#define SEED1 0x1234 //default seeds -#define SEED2 0x5678 -#define SEED3 0x9abc -#define LMS_MAX_FAILURES 3 - -#ifndef __UNIX__ -uinT32 nrand48( //get random number - uinT16 *seeds //seeds to use - ) { - static uinT32 seed = 0; //only seed - - if (seed == 0) { - seed = seeds[0] ^ (seeds[1] << 8) ^ (seeds[2] << 16); - srand(seed); - } - //make 32 bit one - return rand () | (rand () << 16); -} -#endif - -/********************************************************************** - * LMS::LMS - * - * Construct a LMS class, given the max no of samples to be given - **********************************************************************/ - -LMS::LMS ( //constructor -inT32 size //samplesize -):samplesize (size) { - samplecount = 0; - a = 0; - m = 0.0f; - c = 0.0f; - samples = (FCOORD *) alloc_mem (size * sizeof (FCOORD)); - errors = (float *) alloc_mem (size * sizeof (float)); - line_error = 0.0f; - fitted = FALSE; -} - - -/********************************************************************** - * LMS::~LMS - * - * Destruct a LMS class. - **********************************************************************/ - -LMS::~LMS ( //constructor -) { - free_mem(samples); - free_mem(errors); -} - - -/********************************************************************** - * LMS::clear - * - * Clear samples from array. - **********************************************************************/ - -void LMS::clear() { //clear sample - samplecount = 0; - fitted = FALSE; -} - - -/********************************************************************** - * LMS::add - * - * Add another sample. More than the constructed number will be ignored. - **********************************************************************/ - -void LMS::add( //add sample - FCOORD sample //sample coords - ) { - if (samplecount < samplesize) - //save it - samples[samplecount++] = sample; - fitted = FALSE; -} - - -/********************************************************************** - * LMS::fit - * - * Fit a line to the given sample points. - **********************************************************************/ - -void LMS::fit( //fit sample - float &out_m, //output line - float &out_c) { - inT32 index; //of median - inT32 trials; //no of medians - float test_m, test_c; //candidate line - float test_error; //error of test line - - switch (samplecount) { - case 0: - m = 0.0f; //no info - c = 0.0f; - line_error = 0.0f; - break; - - case 1: - m = 0.0f; - c = samples[0].y (); //horiz thru pt - line_error = 0.0f; - break; - - case 2: - if (samples[0].x () != samples[1].x ()) { - m = (samples[1].y () - samples[0].y ()) - / (samples[1].x () - samples[0].x ()); - c = samples[0].y () - m * samples[0].x (); - } - else { - m = 0.0f; - c = (samples[0].y () + samples[1].y ()) / 2; - } - line_error = 0.0f; - break; - - default: - pick_line(m, c); //use pts at random - compute_errors(m, c); //from given line - index = choose_nth_item (samplecount / 2, errors, samplecount); - line_error = errors[index]; - for (trials = 1; trials < lms_line_trials; trials++) { - //random again - pick_line(test_m, test_c); - compute_errors(test_m, test_c); - index = choose_nth_item (samplecount / 2, errors, samplecount); - test_error = errors[index]; - if (test_error < line_error) { - //find least median - line_error = test_error; - m = test_m; - c = test_c; - } - } - } - fitted = TRUE; - out_m = m; - out_c = c; - a = 0; -} - - -/********************************************************************** - * LMS::fit_quadratic - * - * Fit a quadratic to the given sample points. - **********************************************************************/ - -void LMS::fit_quadratic( //fit sample - float outlier_threshold, //min outlier size - double &out_a, //x squared - float &out_b, //output line - float &out_c) { - inT32 trials; //no of medians - double test_a; - float test_b, test_c; //candidate line - float test_error; //error of test line - - if (samplecount < 3) { - out_a = 0; - fit(out_b, out_c); - return; - } - pick_quadratic(a, m, c); - line_error = compute_quadratic_errors (outlier_threshold, a, m, c); - for (trials = 1; trials < lms_line_trials * 2; trials++) { - pick_quadratic(test_a, test_b, test_c); - test_error = compute_quadratic_errors (outlier_threshold, - test_a, test_b, test_c); - if (test_error < line_error) { - line_error = test_error; //find least median - a = test_a; - m = test_b; - c = test_c; - } - } - fitted = TRUE; - out_a = a; - out_b = m; - out_c = c; -} - - -/********************************************************************** - * LMS::constrained_fit - * - * Fit a line to the given sample points. - * The line must have the given gradient. - **********************************************************************/ - -void LMS::constrained_fit( //fit sample - float fixed_m, //forced gradient - float &out_c) { - inT32 index; //of median - inT32 trials; //no of medians - float test_c; //candidate line - static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; - //for nrand - float test_error; //error of test line - - m = fixed_m; - switch (samplecount) { - case 0: - c = 0.0f; - line_error = 0.0f; - break; - - case 1: - //horiz thru pt - c = samples[0].y () - m * samples[0].x (); - line_error = 0.0f; - break; - - case 2: - c = (samples[0].y () + samples[1].y () - - m * (samples[0].x () + samples[1].x ())) / 2; - line_error = m * samples[0].x () + c - samples[0].y (); - line_error *= line_error; - break; - - default: - index = (inT32) nrand48 (seeds) % samplecount; - //compute line - c = samples[index].y () - m * samples[index].x (); - compute_errors(m, c); //from given line - index = choose_nth_item (samplecount / 2, errors, samplecount); - line_error = errors[index]; - for (trials = 1; trials < lms_line_trials; trials++) { - index = (inT32) nrand48 (seeds) % samplecount; - test_c = samples[index].y () - m * samples[index].x (); - //compute line - compute_errors(m, test_c); - index = choose_nth_item (samplecount / 2, errors, samplecount); - test_error = errors[index]; - if (test_error < line_error) { - //find least median - line_error = test_error; - c = test_c; - } - } - } - fitted = TRUE; - out_c = c; - a = 0; -} - - -/********************************************************************** - * LMS::pick_line - * - * Fit a line to a random pair of sample points. - **********************************************************************/ - -void LMS::pick_line( //fit sample - float &line_m, //output gradient - float &line_c) { - inT16 trial_count; //no of attempts - static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; - //for nrand - inT32 index1; //picked point - inT32 index2; //picked point - - trial_count = 0; - do { - index1 = (inT32) nrand48 (seeds) % samplecount; - index2 = (inT32) nrand48 (seeds) % samplecount; - line_m = samples[index2].x () - samples[index1].x (); - trial_count++; - } - while (line_m == 0 && trial_count < LMS_MAX_FAILURES); - if (line_m == 0) { - line_c = (samples[index2].y () + samples[index1].y ()) / 2; - } - else { - line_m = (samples[index2].y () - samples[index1].y ()) / line_m; - line_c = samples[index1].y () - samples[index1].x () * line_m; - } -} - - -/********************************************************************** - * LMS::pick_quadratic - * - * Fit a quadratic to a random triplet of sample points. - **********************************************************************/ - -void LMS::pick_quadratic( //fit sample - double &line_a, //x suaread - float &line_m, //output gradient - float &line_c) { - inT16 trial_count; //no of attempts - static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; - //for nrand - inT32 index1; //picked point - inT32 index2; //picked point - inT32 index3; - FCOORD x1x2; //vector - FCOORD x1x3; - FCOORD x3x2; - double bottom; //of a - - trial_count = 0; - do { - if (trial_count >= LMS_MAX_FAILURES - 1) { - index1 = 0; - index2 = samplecount / 2; - index3 = samplecount - 1; - } - else { - index1 = (inT32) nrand48 (seeds) % samplecount; - index2 = (inT32) nrand48 (seeds) % samplecount; - index3 = (inT32) nrand48 (seeds) % samplecount; - } - x1x2 = samples[index2] - samples[index1]; - x1x3 = samples[index3] - samples[index1]; - x3x2 = samples[index2] - samples[index3]; - bottom = x1x2.x () * x1x3.x () * x3x2.x (); - trial_count++; - } - while (bottom == 0 && trial_count < LMS_MAX_FAILURES); - if (bottom == 0) { - line_a = 0; - pick_line(line_m, line_c); - } - else { - line_a = x1x3 * x1x2 / bottom; - line_m = x1x2.y () - line_a * x1x2.x () - * (samples[index2].x () + samples[index1].x ()); - line_m /= x1x2.x (); - line_c = samples[index1].y () - samples[index1].x () - * (samples[index1].x () * line_a + line_m); - } -} - - -/********************************************************************** - * LMS::compute_errors - * - * Compute the squared error from all the points. - **********************************************************************/ - -void LMS::compute_errors( //fit sample - float line_m, //input gradient - float line_c) { - inT32 index; //picked point - - for (index = 0; index < samplecount; index++) { - errors[index] = - line_m * samples[index].x () + line_c - samples[index].y (); - errors[index] *= errors[index]; - } -} - - -/********************************************************************** - * LMS::compute_quadratic_errors - * - * Compute the squared error from all the points. - **********************************************************************/ - -float LMS::compute_quadratic_errors( //fit sample - float outlier_threshold, //min outlier - double line_a, - float line_m, //input gradient - float line_c) { - inT32 outlier_count; //total outliers - inT32 index; //picked point - inT32 error_count; //no in total - double total_error; //summed squares - - total_error = 0; - outlier_count = 0; - error_count = 0; - for (index = 0; index < samplecount; index++) { - errors[error_count] = line_c + samples[index].x () - * (line_m + samples[index].x () * line_a) - samples[index].y (); - errors[error_count] *= errors[error_count]; - if (errors[error_count] > outlier_threshold) { - outlier_count++; - errors[samplecount - outlier_count] = errors[error_count]; - } - else { - total_error += errors[error_count++]; - } - } - if (outlier_count * 3 < error_count) - return total_error / error_count; - else { - index = choose_nth_item (outlier_count / 2, - errors + samplecount - outlier_count, - outlier_count); - //median outlier - return errors[samplecount - outlier_count + index]; - } -} - - -/********************************************************************** - * LMS::plot - * - * Plot the fitted line of a LMS. - **********************************************************************/ - -#ifndef GRAPHICS_DISABLED -void LMS::plot( //plot fit - ScrollView* win, //window - ScrollView::Color colour //colour to draw in - ) { - if (fitted) { - win->Pen(colour); - win->SetCursor(samples[0].x (), - c + samples[0].x () * (m + samples[0].x () * a)); - win->DrawTo(samples[samplecount - 1].x (), - c + samples[samplecount - 1].x () * (m + - samples[samplecount - - 1].x () * a)); - } -} -#endif diff --git a/ccstruct/lmedsq.h b/ccstruct/lmedsq.h deleted file mode 100644 index cf12f9766d..0000000000 --- a/ccstruct/lmedsq.h +++ /dev/null @@ -1,84 +0,0 @@ -/********************************************************************** - * File: lmedsq.h (Formerly lms.h) - * Description: Code for the LMS class. - * Author: Ray Smith - * Created: Fri Aug 7 09:30:53 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef LMEDSQ_H -#define LMEDSQ_H - -#include "points.h" -#include "varable.h" -#include "scrollview.h" -#include "notdll.h" - -class LMS -{ - public: - LMS( //constructor - inT32 size); //no of samples - ~LMS (); //destructor - void clear(); //clear samples - void add( //add sample - FCOORD sample); //sample coords - void fit( //generate fit - float &m, //output line - float &c); - void constrained_fit( //fixed gradient - float fixed_m, //forced gradient - float &out_c); //output line - void fit_quadratic( //easy quadratic - float outlier_threshold, //min outlier - double &a, //x squared - float &b, //x - float &c); //constant - void plot( //plot fit - ScrollView* win, //window - ScrollView::Color colour); //colour to draw in - float error() { //get error - return fitted ? line_error : -1; - } - - private: - - void pick_line( //random choice - float &m, //output line - float &c); - void pick_quadratic( //random choice - double &a, //output curve - float &b, - float &c); - void compute_errors( //find errors - float m, //from line - float c); - //find errors - float compute_quadratic_errors(float outlier_threshold, //min outlier - double a, //from curve - float m, - float c); - - BOOL8 fitted; //line parts valid - inT32 samplesize; //max samples - inT32 samplecount; //current sample size - FCOORD *samples; //array of samples - float *errors; //error distances - double a; //x squared - float m; //line gradient - float c; - float line_error; //error of fit -}; -extern INT_VAR_H (lms_line_trials, 12, "Number of linew fits to do"); -#endif diff --git a/wordrec/matrix.cpp b/ccstruct/matrix.cpp similarity index 61% rename from wordrec/matrix.cpp rename to ccstruct/matrix.cpp index cee6b96feb..82b27d7e3b 100644 --- a/wordrec/matrix.cpp +++ b/ccstruct/matrix.cpp @@ -27,35 +27,34 @@ ----------------------------------------------------------------------*/ #include "matrix.h" +#include "callcpp.h" #include "ratngs.h" +#include "tprintf.h" #include "unicharset.h" -#include "callcpp.h" // Print the best guesses out of the match rating matrix. -void MATRIX::print(const UNICHARSET ¤t_unicharset) { - cprintf("Ratings Matrix (top choices)\n"); - - /* Do each diagonal */ - for (int spread = 0; spread < this->dimension(); spread++) { - /* For each spot */ - for (int x = 0; x < this->dimension() - spread; x++) { - /* Process one square */ - BLOB_CHOICE_LIST *rating = this->get(x, x + spread); +void MATRIX::print(const UNICHARSET &unicharset) { + tprintf("Ratings Matrix (top choices)\n"); + int row, col; + for (col = 0; col < this->dimension(); ++col) tprintf("\t%d", col); + tprintf("\n"); + for (row = 0; row < this->dimension(); ++row) { + for (col = 0; col <= row; ++col) { + if (col == 0) tprintf("%d\t", row); + BLOB_CHOICE_LIST *rating = this->get(col, row); if (rating != NOT_CLASSIFIED) { - cprintf("\t[%d,%d] : ", x, x + spread); - // Print first 3 BLOB_CHOICES from ratings. - BLOB_CHOICE_IT rating_it; - rating_it.set_to_list(rating); - int count = 0; - for (rating_it.mark_cycle_pt(); - count < 3 && !rating_it.cycled_list(); - ++count, rating_it.forward()) { - UNICHAR_ID unichar_id = rating_it.data()->unichar_id(); - cprintf("%-10s%4.0f%s", current_unicharset.id_to_unichar(unichar_id), - rating_it.data()->rating(), - (!rating_it.at_last() && count+1 < 3) ? "\t|\t" : "\n"); + BLOB_CHOICE_IT b_it(rating); + int counter = 0; + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id())); + ++counter; + if (counter == 3) break; } + tprintf("\t"); + } else { + tprintf(" \t"); } } + tprintf("\n"); } } diff --git a/wordrec/matrix.h b/ccstruct/matrix.h similarity index 83% rename from wordrec/matrix.h rename to ccstruct/matrix.h index 5317e02e2d..feed46572e 100644 --- a/wordrec/matrix.h +++ b/ccstruct/matrix.h @@ -22,13 +22,13 @@ ** limitations under the License. * *********************************************************************************/ -#ifndef MATRIX_H -#define MATRIX_H +#ifndef TESSERACT_CCSTRUCT_MATRIX_H__ +#define TESSERACT_CCSTRUCT_MATRIX_H__ #include "ratngs.h" #include "unicharset.h" -static BLOB_CHOICE_LIST* NOT_CLASSIFIED = NULL; +#define NOT_CLASSIFIED reinterpret_cast(NULL) // A generic class to store a matrix with entries of type T. template @@ -86,7 +86,22 @@ class MATRIX : public GENERIC_MATRIX { MATRIX(int dimension) : GENERIC_MATRIX(dimension, NOT_CLASSIFIED) {} // Print a shortened version of the contents of the matrix. - void print(const UNICHARSET ¤t_unicharset); + void print(const UNICHARSET &unicharset); }; -#endif +struct MATRIX_COORD { + static void Delete(void *arg) { + MATRIX_COORD *c = static_cast(arg); + delete c; + } + MATRIX_COORD(int c, int r): col(c), row(r) {} + ~MATRIX_COORD() {} + bool Valid(const MATRIX &m) const { + return (col >= 0 && row >= 0 && + col < m.dimension() && row < m.dimension()); + } + int col; + int row; +}; + +#endif // TESSERACT_CCSTRUCT_MATRIX_H__ diff --git a/ccstruct/mod128.cpp b/ccstruct/mod128.cpp index 72fd917812..40b138f6a1 100644 --- a/ccstruct/mod128.cpp +++ b/ccstruct/mod128.cpp @@ -20,7 +20,7 @@ #include "mfcpch.h" //precompiled headers #include "mod128.h" -static inT16 idirtab[] = { +const inT16 idirtab[] = { 1000, 0, 998, 49, 995, 98, 989, 146, 980, 195, 970, 242, 956, 290, 941, 336, 923, 382, 903, 427, 881, 471, 857, 514, @@ -55,7 +55,7 @@ static inT16 idirtab[] = { 980, -195, 989, -146, 995, -98, 998, -49 }; -static ICOORD *dirtab = (ICOORD *) idirtab; +const ICOORD *dirtab = (ICOORD *) idirtab; /********************************************************************** * DIR128::DIR128 diff --git a/ccstruct/normalis.cpp b/ccstruct/normalis.cpp index 5d33e86198..40ec8dfc8b 100644 --- a/ccstruct/normalis.cpp +++ b/ccstruct/normalis.cpp @@ -70,16 +70,20 @@ float DENORM::scale_at_x(float src_x) const { // In normalized coords. float DENORM::yshift_at_x(float src_x) const { // In normalized coords. if (segments != 0) { const DENORM_SEG* seg = binary_search_segment(src_x); - if (seg->ycoord == -MAX_INT32) { - if (base_is_row) - return source_row->base_line(x(src_x)); - else - return m * x(src_x) + c; - } else { + if (seg->ycoord != -MAX_INT32) { return seg->ycoord; } } - return source_row->base_line(x(src_x)); + return yshift_at_orig_x(x(src_x)); +} + +// Returns the y-shift at the original (un-normalized) x, assuming +// no segments. +float DENORM::yshift_at_orig_x(float orig_x) const { + if (base_is_row && source_row != NULL) + return source_row->base_line(orig_x); + else + return m * orig_x + c; } /********************************************************************** @@ -124,24 +128,12 @@ DENORM::DENORM(float x, //from same pieces DENORM_SEG *seg_pts, //actual segments BOOL8 using_row, //as baseline ROW *src) { + Init(); x_centre = x; //just copy scale_factor = scaling; source_row = src; - if (seg_count > 0) { - segs = new DENORM_SEG[seg_count]; - for (segments = 0; segments < seg_count; segments++) - segs[segments] = seg_pts[segments]; - // It is possible, if infrequent that the segments may be out of order. - // since we are searching with a binary search, keep them in order. - qsort(segs, segments, sizeof(DENORM_SEG), - reinterpret_cast( - &compare_seg_by_xstart)); - } - else { - segments = 0; - segs = NULL; - } - base_is_row = using_row; + set_segments(seg_pts, seg_count); + base_is_row = src != NULL && using_row; m = line_m; c = line_c; block_ = NULL; @@ -155,24 +147,33 @@ DENORM::DENORM(const DENORM &src) { } -DENORM & DENORM::operator= (const DENORM & src) { +DENORM & DENORM::operator=(const DENORM & src) { x_centre = src.x_centre; scale_factor = src.scale_factor; source_row = src.source_row; - if (segments > 0) - delete[]segs; - if (src.segments > 0) { - segs = new DENORM_SEG[src.segments]; - for (segments = 0; segments < src.segments; segments++) - segs[segments] = src.segs[segments]; - } - else { - segments = 0; - segs = NULL; - } + set_segments(src.segs, src.segments); base_is_row = src.base_is_row; m = src.m; c = src.c; block_ = src.block_; return *this; } + +void DENORM::set_segments(const DENORM_SEG* src_segs, int seg_count) { + if (segments > 0) + delete [] segs; + if (seg_count > 0) { + segs = new DENORM_SEG[seg_count]; + for (segments = 0; segments < seg_count; segments++) + segs[segments] = src_segs[segments]; + // It is possible, if infrequent that the segments may be out of order. + // since we are searching with a binary search, keep them in order. + qsort(segs, segments, sizeof(DENORM_SEG), + reinterpret_cast( + &compare_seg_by_xstart)); + } else { + segments = 0; + segs = NULL; + } +} + diff --git a/ccstruct/normalis.h b/ccstruct/normalis.h index 870c201390..7f7e7cadaf 100644 --- a/ccstruct/normalis.h +++ b/ccstruct/normalis.h @@ -20,102 +20,109 @@ #ifndef NORMALIS_H #define NORMALIS_H -#include +#include +#include "host.h" class ROW; //forward decl class BLOCK; -class DENORM_SEG -{ - public: - DENORM_SEG() {} +class DENORM_SEG { + public: + DENORM_SEG() {} - inT32 xstart; // start of segment - inT32 ycoord; // y at segment - float scale_factor; // normalized_x/scale_factor + x_center == original_x + inT32 xstart; // start of segment + inT32 ycoord; // y at segment + float scale_factor; // normalized_x/scale_factor + x_center == original_x }; -class DENORM -{ - public: - DENORM() { //constructor - source_row = NULL; - x_centre = 0.0f; - scale_factor = 1.0f; - segments = 0; - segs = NULL; - base_is_row = TRUE; - m = c = 0; - block_ = NULL; - } - DENORM( //constructor - float x, //from same pieces - float scaling, - ROW *src) { - x_centre = x; //just copy - scale_factor = scaling; - source_row = src; - segments = 0; - segs = NULL; - base_is_row = TRUE; - m = c = 0; - block_ = NULL; - } - DENORM( // constructor - float x, // from same pieces - float scaling, - double line_m, // default line: y = mx + c - double line_c, - inT16 seg_count, // no of segments - DENORM_SEG *seg_pts, // actual segments - BOOL8 using_row, // as baseline - ROW *src); - DENORM(const DENORM &); - DENORM & operator= (const DENORM &); - ~DENORM() { - if (segments > 0) - delete[]segs; - } +class DENORM { + public: + DENORM() { + Init(); + } + DENORM(float x, // from same pieces + float scaling, + ROW *src) { + Init(); + x_centre = x; //just copy + scale_factor = scaling; + source_row = src; + base_is_row = src != NULL; + } + DENORM(float x, // from same pieces + float scaling, + double line_m, // default line: y = mx + c + double line_c, + inT16 seg_count, // no of segments + DENORM_SEG *seg_pts, // actual segments + BOOL8 using_row, // as baseline + ROW *src); + DENORM(const DENORM &); + DENORM& operator=(const DENORM&); + ~DENORM() { + if (segments > 0) + delete[] segs; + } - // Return the original x coordinate of the middle of the word - // (mapped to 0 in normalized coordinates). - float origin() const { return x_centre; } + // Setup default values. + void Init() { + base_is_row = false; + segments = 0; + m = c = 0.0; + x_centre = 0.0f; + scale_factor = 1.0f; + source_row = NULL; + segs = NULL; + block_ = NULL; + } - float scale() const { //get scale - return scale_factor; - } - ROW *row() const { //get row - return source_row; - } - const BLOCK* block() const { - return block_; - } - void set_block(const BLOCK* block) { - block_ = block; - } + // Return the original x coordinate of the middle of the word + // (mapped to 0 in normalized coordinates). + float origin() const { return x_centre; } - // normalized x -> original x - float x(float src_x) const; + float scale() const { //get scale + return scale_factor; + } + ROW *row() const { //get row + return source_row; + } + void set_row(ROW* row) { + source_row = row; + } + const BLOCK* block() const { + return block_; + } + void set_block(const BLOCK* block) { + block_ = block; + } - // Given a (y coordinate, x center of segment) in normalized coordinates, - // return the original y coordinate. - float y(float src_y, float src_x_centre) const; + // normalized x -> original x + float x(float src_x) const; - float scale_at_x( // Return scaling at this coord. - float src_x) const; - float yshift_at_x( // Return yshift at this coord. - float src_x) const; + // Given a (y coordinate, x center of segment) in normalized coordinates, + // return the original y coordinate. + float y(float src_y, float src_x_centre) const; - private: - const DENORM_SEG *binary_search_segment(float src_x) const; + float scale_at_x( // Return scaling at this coord. + float src_x) const; + float yshift_at_x( // Return yshift at this coord. + float src_x) const; + // Returns the y-shift at the original (un-normalized) x, assuming + // no segments. + float yshift_at_orig_x(float orig_x) const; - BOOL8 base_is_row; // using row baseline? - inT16 segments; // no of segments - double c, m; // baseline: y = mx + c - float x_centre; // middle of word in original coordinates - float scale_factor; // normalized_x/scale_factor + x_center == original_x - ROW *source_row; // row it came from - DENORM_SEG *segs; // array of segments - const BLOCK* block_; // Block the word came from. + void set_segments(const DENORM_SEG* new_segs, int seg_count); + + private: + const DENORM_SEG *binary_search_segment(float src_x) const; + + BOOL8 base_is_row; // using row baseline? + inT16 segments; // no of segments + double c, m; // baseline: y = mx + c + float x_centre; // middle of word in original coordinates + float scale_factor; // normalized_x/scale_factor + x_center == original_x + ROW *source_row; // row it came from + DENORM_SEG *segs; // array of segments + const BLOCK* block_; // Block the word came from. }; #endif diff --git a/ccstruct/ocrblock.cpp b/ccstruct/ocrblock.cpp index 86d14f4d0e..56e05b5aee 100644 --- a/ccstruct/ocrblock.cpp +++ b/ccstruct/ocrblock.cpp @@ -47,6 +47,7 @@ BLOCK::BLOCK(const char *name, //< filename ICOORDELT_IT right_it = &rightside; proportional = prop; + right_to_left_ = false; kerning = kern; spacing = space; font_class = -1; //not assigned @@ -217,3 +218,117 @@ const BLOCK & source //from this skew_ = source.skew_; return *this; } + +/********************************************************************** + * PrintSegmentationStats + * + * Prints segmentation stats for the given block list. + **********************************************************************/ + +void PrintSegmentationStats(BLOCK_LIST* block_list) { + int num_blocks = 0; + int num_rows = 0; + int num_words = 0; + int num_blobs = 0; + BLOCK_IT block_it(block_list); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + BLOCK* block = block_it.data(); + ++num_blocks; + ROW_IT row_it(block->row_list()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + ++num_rows; + ROW* row = row_it.data(); + // Iterate over all werds in the row. + WERD_IT werd_it(row->word_list()); + for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { + WERD* werd = werd_it.data(); + ++num_words; + num_blobs += werd->cblob_list()->length(); + } + } + } + tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n", + num_blocks, num_rows, num_words, num_blobs); +} + +/********************************************************************** + * ExtractBlobsFromSegmentation + * + * Extracts blobs from the given block list and adds them to the output list. + * The block list must have been created by performing a page segmentation. + **********************************************************************/ + +void ExtractBlobsFromSegmentation(BLOCK_LIST* blocks, + C_BLOB_LIST* output_blob_list) { + C_BLOB_IT return_list_it(output_blob_list); + BLOCK_IT block_it(blocks); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + BLOCK* block = block_it.data(); + ROW_IT row_it(block->row_list()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + ROW* row = row_it.data(); + // Iterate over all werds in the row. + WERD_IT werd_it(row->word_list()); + for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { + WERD* werd = werd_it.data(); + return_list_it.move_to_last(); + return_list_it.add_list_after(werd->cblob_list()); + return_list_it.move_to_last(); + return_list_it.add_list_after(werd->rej_cblob_list()); + } + } + } +} + +/********************************************************************** + * RefreshWordBlobsFromNewBlobs() + * + * Refreshes the words in the block_list by using blobs in the + * new_blobs list. + * Block list must have word segmentation in it. + * It consumes the blobs provided in the new_blobs list. The blobs leftover in + * the new_blobs list after the call weren't matched to any blobs of the words + * in block list. + * The output not_found_blobs is a list of blobs from the original segmentation + * in the block_list for which no corresponding new blobs were found. + **********************************************************************/ + +void RefreshWordBlobsFromNewBlobs(BLOCK_LIST* block_list, + C_BLOB_LIST* new_blobs, + C_BLOB_LIST* not_found_blobs) { + // Now iterate over all the blobs in the segmentation_block_list_, and just + // replace the corresponding c-blobs inside the werds. + BLOCK_IT block_it(block_list); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + BLOCK* block = block_it.data(); + // Iterate over all rows in the block. + ROW_IT row_it(block->row_list()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + ROW* row = row_it.data(); + // Iterate over all werds in the row. + WERD_IT werd_it(row->word_list()); + WERD_LIST new_words; + WERD_IT new_words_it(&new_words); + for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { + WERD* werd = werd_it.extract(); + WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs, + not_found_blobs); + if (new_werd) { + // Insert this new werd into the actual row's werd-list. Remove the + // existing one. + new_words_it.add_after_then_move(new_werd); + delete werd; + } else { + // Reinsert the older word back, for lack of better options. + // This is critical since dropping the words messes up segmentation: + // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on. + new_words_it.add_after_then_move(werd); + } + } + // Get rid of the old word list & replace it with the new one. + row->word_list()->clear(); + werd_it.move_to_first(); + werd_it.add_list_after(&new_words); + } + } +} diff --git a/ccstruct/ocrblock.h b/ccstruct/ocrblock.h index 25db1fdf3d..36d70f7b90 100644 --- a/ccstruct/ocrblock.h +++ b/ccstruct/ocrblock.h @@ -37,6 +37,7 @@ class BLOCK:public ELIST_LINK, public PDBLK : re_rotation_(1.0f, 0.0f), classify_rotation_(1.0f, 0.0f), skew_(1.0f, 0.0f) { + right_to_left_ = false; hand_poly = NULL; } BLOCK(const char *name, //< filename @@ -79,6 +80,12 @@ class BLOCK:public ELIST_LINK, public PDBLK BOOL8 prop() const { return proportional; } + bool right_to_left() const { + return right_to_left_; + } + void set_right_to_left(bool value) { + right_to_left_ = value; + } /// return pitch inT32 fixed_pitch() const { return pitch; @@ -146,6 +153,10 @@ class BLOCK:public ELIST_LINK, public PDBLK median_size_.set_y(y); } + Pix* render_mask() { + return PDBLK::render_mask(re_rotation_); + } + void rotate(const FCOORD& rotation); /// decreasing y order @@ -198,6 +209,7 @@ class BLOCK:public ELIST_LINK, public PDBLK private: BOOL8 proportional; //< proportional + bool right_to_left_; //< major script is right to left. inT8 kerning; //< inter blob gap inT16 spacing; //< inter word gap inT16 pitch; //< pitch of non-props @@ -216,4 +228,24 @@ class BLOCK:public ELIST_LINK, public PDBLK int decreasing_top_order(const void *row1, const void *row2); +// A function to print segmentation stats for the given block list. +void PrintSegmentationStats(BLOCK_LIST* block_list); + +// Extracts blobs fromo the given block list and adds them to the output list. +// The block list must have been created by performing a page segmentation. +void ExtractBlobsFromSegmentation(BLOCK_LIST* blocks, + C_BLOB_LIST* output_blob_list); + +// Refreshes the words in the block_list by using blobs in the +// new_blobs list. +// Block list must have word segmentation in it. +// It consumes the blobs provided in the new_blobs list. The blobs leftover in +// the new_blobs list after the call weren't matched to any blobs of the words +// in block list. +// The output not_found_blobs is a list of blobs from the original segmentation +// in the block_list for which no corresponding new blobs were found. +void RefreshWordBlobsFromNewBlobs(BLOCK_LIST* block_list, + C_BLOB_LIST* new_blobs, + C_BLOB_LIST* not_found_blobs); + #endif diff --git a/ccstruct/ocrrow.h b/ccstruct/ocrrow.h index db045e77c4..4a71f6c11b 100644 --- a/ccstruct/ocrrow.h +++ b/ccstruct/ocrrow.h @@ -28,7 +28,7 @@ class TO_ROW; class ROW:public ELIST_LINK { - friend void tweak_row_baseline(ROW *); + friend void tweak_row_baseline(ROW *, double, double); public: ROW() { } //empty constructor diff --git a/ccstruct/pageres.cpp b/ccstruct/pageres.cpp index d678301262..85120c2466 100644 --- a/ccstruct/pageres.cpp +++ b/ccstruct/pageres.cpp @@ -22,7 +22,7 @@ #include #endif #include "pageres.h" -#include "notdll.h" +#include "blobs.h" ELISTIZE (BLOCK_RES) CLISTIZE (BLOCK_RES) ELISTIZE (ROW_RES) ELISTIZE (WERD_RES) @@ -31,9 +31,9 @@ CLISTIZE (BLOCK_RES) ELISTIZE (ROW_RES) ELISTIZE (WERD_RES) * * Constructor for page results *************************************************************************/ -PAGE_RES::PAGE_RES( //recursive construct - BLOCK_LIST *the_block_list //real page - ) { +PAGE_RES::PAGE_RES( + BLOCK_LIST *the_block_list, + WERD_CHOICE **prev_word_best_choice_ptr) { BLOCK_IT block_it(the_block_list); BLOCK_RES_IT block_res_it(&block_res_list); @@ -41,10 +41,12 @@ PAGE_RES::PAGE_RES( //recursive construct rej_count = 0; rejected = FALSE; - for (block_it.mark_cycle_pt (); - !block_it.cycled_list (); block_it.forward ()) { - block_res_it.add_to_end (new BLOCK_RES (block_it.data ())); + for (block_it.mark_cycle_pt(); + !block_it.cycled_list(); block_it.forward()) { + block_res_it.add_to_end(new BLOCK_RES(block_it.data())); } + + prev_word_best_choice = prev_word_best_choice_ptr; } @@ -54,9 +56,7 @@ PAGE_RES::PAGE_RES( //recursive construct * Constructor for BLOCK results *************************************************************************/ -BLOCK_RES::BLOCK_RES( //recursive construct - BLOCK *the_block //real BLOCK - ) { +BLOCK_RES::BLOCK_RES(BLOCK *the_block) { ROW_IT row_it (the_block->row_list ()); ROW_RES_IT row_res_it(&row_res_list); @@ -71,8 +71,9 @@ BLOCK_RES::BLOCK_RES( //recursive construct block = the_block; - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row_res_it.add_to_end (new ROW_RES (row_it.data ())); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row_res_it.add_to_end(new ROW_RES(the_block->right_to_left(), + row_it.data())); } } @@ -83,81 +84,108 @@ BLOCK_RES::BLOCK_RES( //recursive construct * Constructor for ROW results *************************************************************************/ -ROW_RES::ROW_RES( //recursive construct - ROW *the_row //real ROW - ) { - WERD_IT word_it (the_row->word_list ()); +ROW_RES::ROW_RES(bool right_to_left, + ROW *the_row) { + WERD_IT word_it(the_row->word_list()); WERD_RES_IT word_res_it(&word_res_list); - WERD_RES *combo = NULL; //current combination of fuzzies - WERD_RES *word_res; //current word + WERD_RES *combo = NULL; // current combination of fuzzies + WERD_RES *word_res; // current word WERD *copy_word; char_count = 0; rej_count = 0; whole_word_rej_count = 0; - font_class = -1; - font_class_score = -1.0; - bold = FALSE; - italic = FALSE; row = the_row; - - for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { - word_res = new WERD_RES (word_it.data ()); - word_res->x_height = the_row->x_height(); - - if (word_res->word->flag (W_FUZZY_NON)) { - ASSERT_HOST (combo != NULL); - word_res->part_of_combo = TRUE; - combo->copy_on (word_res); - } - if (word_it.data_relative (1)->flag (W_FUZZY_NON)) { - if (combo == NULL) { + if (right_to_left) { + word_it.move_to_last(); + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.backward()) { + word_res = new WERD_RES(word_it.data()); + word_res->x_height = the_row->x_height(); + // A FUZZY_NON marks the beginning of a combo if we are not in one. + if (combo == NULL && word_res->word->flag(W_FUZZY_NON)) { copy_word = new WERD; //deep copy - *copy_word = *(word_it.data ()); - combo = new WERD_RES (copy_word); + *copy_word = *(word_it.data()); + combo = new WERD_RES(copy_word); combo->x_height = the_row->x_height(); combo->combination = TRUE; - word_res_it.add_to_end (combo); + word_res_it.add_to_end(combo); + word_res->part_of_combo = TRUE; + } else if (combo != NULL) { + word_res->part_of_combo = TRUE; + combo->copy_on(word_res); + // The first non FUZZY_NON is the last word in the combo. + if (!word_res->word->flag(W_FUZZY_NON)) + combo = NULL; } - word_res->part_of_combo = TRUE; + word_res_it.add_to_end(word_res); + } + } else { + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + word_res = new WERD_RES(word_it.data()); + word_res->x_height = the_row->x_height(); + + if (word_res->word->flag(W_FUZZY_NON)) { + ASSERT_HOST(combo != NULL); + word_res->part_of_combo = TRUE; + combo->copy_on(word_res); + } + if (word_it.data_relative(1)->flag(W_FUZZY_NON)) { + if (combo == NULL) { + copy_word = new WERD; + //deep copy + *copy_word = *(word_it.data()); + combo = new WERD_RES(copy_word); + combo->x_height = the_row->x_height(); + combo->combination = TRUE; + word_res_it.add_to_end(combo); + } + word_res->part_of_combo = TRUE; + } else { + combo = NULL; + } + word_res_it.add_to_end(word_res); } - else - combo = NULL; - word_res_it.add_to_end (word_res); } } -WERD_RES & WERD_RES::operator= ( //assign word_res -const WERD_RES & source //from this -) { - this->ELIST_LINK::operator= (source); +WERD_RES& WERD_RES::operator=(const WERD_RES & source) { + this->ELIST_LINK::operator=(source); + Clear(); if (source.combination) { word = new WERD; - *word = *(source.word); //deep copy + *word = *(source.word); // deep copy + } else { + word = source.word; // pt to same word } - else - word = source.word; //pt to same word - - if (source.outword != NULL) { - outword = new WERD; - *outword = *(source.outword);//deep copy - } - else - outword = NULL; - + if (source.bln_boxes != NULL) + bln_boxes = new tesseract::BoxWord(*source.bln_boxes); + if (source.chopped_word != NULL) + chopped_word = new TWERD(*source.chopped_word); + if (source.rebuild_word != NULL) + rebuild_word = new TWERD(*source.rebuild_word); + // TODO(rays) Do we ever need to copy the seam_array? denorm = source.denorm; + if (source.box_word != NULL) + box_word = new tesseract::BoxWord(*source.box_word); + best_state = source.best_state; + correct_text = source.correct_text; + if (source.best_choice != NULL) { best_choice = new WERD_CHOICE; *best_choice = *(source.best_choice); raw_choice = new WERD_CHOICE; *raw_choice = *(source.raw_choice); + best_choice_fontinfo_ids = source.best_choice_fontinfo_ids; } else { best_choice = NULL; raw_choice = NULL; + if (!best_choice_fontinfo_ids.empty()) { + best_choice_fontinfo_ids.clear(); + } } if (source.ep_choice != NULL) { ep_choice = new WERD_CHOICE; @@ -166,6 +194,15 @@ const WERD_RES & source //from this else ep_choice = NULL; reject_map = source.reject_map; + combination = source.combination; + part_of_combo = source.part_of_combo; + CopySimpleFields(source); + return *this; +} + +// Copies basic fields that don't involve pointers that might be useful +// to copy when making one WERD_RES from another. +void WERD_RES::CopySimpleFields(const WERD_RES& source) { tess_failed = source.tess_failed; tess_accepted = source.tess_accepted; tess_would_adapt = source.tess_would_adapt; @@ -181,37 +218,327 @@ const WERD_RES & source //from this caps_height = source.caps_height; guessed_x_ht = source.guessed_x_ht; guessed_caps_ht = source.guessed_caps_ht; - combination = source.combination; - part_of_combo = source.part_of_combo; reject_spaces = source.reject_spaces; - return *this; +} + +// Sets up the members used in recognition: +// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. +// Returns false if the word is empty and sets up fake results. +bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset, + bool numeric_mode, ROW *row, BLOCK* block) { + ClearResults(); + if (word->cblob_list()->empty()) { + tprintf("Initial word empty!\n"); + chopped_word = new TWERD; + rebuild_word = new TWERD; + bln_boxes = new tesseract::BoxWord; + box_word = new tesseract::BoxWord; + best_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f, + TOP_CHOICE_PERM, unicharset); + raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f, + TOP_CHOICE_PERM, unicharset); + tess_failed = true; + return false; + } + chopped_word = TWERD::PolygonalCopy(word); + chopped_word->Normalize(row, x_height, numeric_mode, &denorm); + if (block != NULL) + denorm.set_block(block); + bln_boxes = tesseract::BoxWord::CopyFromNormalized(NULL, chopped_word); + seam_array = start_seam_list(chopped_word->blobs); + best_choice = new WERD_CHOICE; + best_choice->make_bad(); + raw_choice = new WERD_CHOICE; + raw_choice->make_bad(); + return true; +} + +// Builds the rebuild_word from the chopped_word and the best_state. +void WERD_RES::RebuildBestState() { + if (rebuild_word != NULL) + delete rebuild_word; + rebuild_word = new TWERD; + TBLOB* prev_blob = NULL; + int start = 0; + for (int i = 0; i < best_state.size(); ++i) { + int length = best_state[i]; + join_pieces(chopped_word->blobs, seam_array, start, start + length - 1); + TBLOB* blob = chopped_word->blobs; + for (int i = 0; i < start; ++i) + blob = blob->next; + TBLOB* copy_blob = new TBLOB(*blob); + if (prev_blob == NULL) + rebuild_word->blobs = copy_blob; + else + prev_blob->next = copy_blob; + prev_blob = copy_blob; + break_pieces(blob, seam_array, start, start + length - 1); + start += length; + } +} + +// Copies the chopped_word to the rebuild_word, faking a best_state as well. +// Also sets up the output box_word. +void WERD_RES::CloneChoppedToRebuild() { + if (rebuild_word != NULL) + delete rebuild_word; + rebuild_word = new TWERD(*chopped_word); + SetupBoxWord(); + int word_len = box_word->length(); + best_state.reserve(word_len); + correct_text.reserve(word_len); + for (int i = 0; i < word_len; ++i) { + best_state.push_back(1); + correct_text.push_back(STRING("")); + } +} + +// Sets/replaces the box_word with one made from the rebuild_word. +void WERD_RES::SetupBoxWord() { + if (box_word != NULL) + delete box_word; + rebuild_word->ComputeBoundingBoxes(); + box_word = tesseract::BoxWord::CopyFromNormalized(&denorm, rebuild_word); + box_word->ClipToOriginalWord(denorm.block(), word); +} + +// Classifies the word with some already-calculated BLOB_CHOICEs. +// The choices are an array of blob_count pointers to BLOB_CHOICE, +// providing a single classifier result for each blob. +// The BLOB_CHOICEs are consumed and the word takes ownership. +// The number of blobs in the outword must match blob_count. +void WERD_RES::FakeClassifyWord(const UNICHARSET& unicharset, int blob_count, + BLOB_CHOICE** choices) { + // Setup the WERD_RES. + ASSERT_HOST(box_word != NULL); + ASSERT_HOST(blob_count == box_word->length()); + ASSERT_HOST(best_choice != NULL); + BLOB_CHOICE_LIST_CLIST* word_choices = new BLOB_CHOICE_LIST_CLIST; + BLOB_CHOICE_LIST_C_IT bc_it(word_choices); + for (int c = 0; c < blob_count; ++c) { + best_choice->append_unichar_id( + choices[c]->unichar_id(), 1, + choices[c]->rating(), choices[c]->certainty()); + BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST; + BLOB_CHOICE_IT choice_it(choice_list); + choice_it.add_after_then_move(choices[c]); + bc_it.add_after_then_move(choice_list); + } + best_choice->set_blob_choices(word_choices); + best_choice->populate_unichars(unicharset); + delete raw_choice; + raw_choice = new WERD_CHOICE(*best_choice); + reject_map.initialise(blob_count); +} + +// Copies the best_choice strings to the correct_text for adaption/training. +void WERD_RES::BestChoiceToCorrectText(const UNICHARSET& unicharset) { + correct_text.clear(); + ASSERT_HOST(best_choice != NULL); + for (int i = 0; i < best_choice->length(); ++i) { + UNICHAR_ID choice_id = best_choice->unichar_id(i); + const char* blob_choice = unicharset.id_to_unichar(choice_id); + correct_text.push_back(STRING(blob_choice)); + } +} + +// Merges 2 adjacent blobs in the result if the permanent callback +// class_cb returns other than INVALID_UNICHAR_ID, AND the permanent +// callback box_cb is NULL or returns true, setting the merged blob +// result to the class returned from class_cb. +// Returns true if anything was merged. +bool WERD_RES::ConditionalBlobMerge( + const UNICHARSET& unicharset, + TessResultCallback2* class_cb, + TessResultCallback2* box_cb, + + BLOB_CHOICE_LIST_CLIST *blob_choices) { + bool modified = false; + for (int i = 0; i + 1 < best_choice->length(); ++i) { + UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i), + best_choice->unichar_id(i+1)); + if (new_id != INVALID_UNICHAR_ID && + (box_cb == NULL || box_cb->Run(box_word->BlobBox(i), + box_word->BlobBox(i + 1)))) { + if (reject_map.length() == best_choice->length()) + reject_map.remove_pos(i); + best_choice->set_unichar_id(new_id, i); + best_choice->remove_unichar_id(i + 1); + raw_choice->set_unichar_id(new_id, i); + raw_choice->remove_unichar_id(i + 1); + modified = true; + rebuild_word->MergeBlobs(i, i + 2); + box_word->MergeBoxes(i, i + 2); + if (i + 1 < best_state.length()) { + best_state[i] += best_state[i + 1]; + best_state.remove(i + 1); + } + + BLOB_CHOICE_LIST_C_IT blob_choices_it(blob_choices); + for (int j = 0; j < i; ++j) + blob_choices_it.forward(); + BLOB_CHOICE_IT it1(blob_choices_it.data()); // first choices + BLOB_CHOICE_LIST* target_choices = blob_choices_it.data_relative(1); + BLOB_CHOICE_IT it2(target_choices); // second choices + float certainty = it2.data()->certainty(); + float rating = it2.data()->rating(); + if (it1.data()->certainty() < certainty) { + certainty = it1.data()->certainty(); + rating = it1.data()->rating(); + target_choices = blob_choices_it.data(); + blob_choices_it.forward(); + } + delete blob_choices_it.extract(); // get rid of spare + // TODO(rays) Fix the choices so they contain the desired result. + // Do we really need to ? Only needed for fix_quotes, which should be + // going away. + } + } + delete class_cb; + delete box_cb; + if (modified) { + best_choice->populate_unichars(unicharset); + raw_choice->populate_unichars(unicharset); + } + return modified; } WERD_RES::~WERD_RES () { - if (combination) + Clear(); +} + +void WERD_RES::InitPointers() { + word = NULL; + bln_boxes = NULL; + chopped_word = NULL; + rebuild_word = NULL; + box_word = NULL; + seam_array = NULL; + best_choice = NULL; + raw_choice = NULL; + ep_choice = NULL; +} + +void WERD_RES::Clear() { + if (word != NULL && combination) delete word; - if (outword != NULL) - delete outword; + word = NULL; + ClearResults(); +} + +void WERD_RES::ClearResults() { + done = false; + if (bln_boxes != NULL) { + delete bln_boxes; + bln_boxes = NULL; + } + if (chopped_word != NULL) { + delete chopped_word; + chopped_word = NULL; + } + if (rebuild_word != NULL) { + delete rebuild_word; + rebuild_word = NULL; + } + if (box_word != NULL) { + delete box_word; + box_word = NULL; + } + best_state.clear(); + correct_text.clear(); + if (seam_array != NULL) { + free_seam_list(seam_array); + seam_array = NULL; + } if (best_choice != NULL) { delete best_choice; delete raw_choice; + best_choice = NULL; + raw_choice = NULL; } if (ep_choice != NULL) { delete ep_choice; + ep_choice = NULL; } } +// Inserts the new_word and a corresponding WERD_RES before the current +// position. The simple fields of the WERD_RES are copied from clone_res and +// the resulting WERD_RES is returned for further setup with best_choice etc. +WERD_RES* PAGE_RES_IT::InsertCloneWord(const WERD_RES& clone_res, + WERD* new_word) { + // Insert new_word into the ROW. + WERD_IT w_it(row()->row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + if (word == word_res->word) + break; + } + ASSERT_HOST(!w_it.cycled_list()); + w_it.add_before_then_move(new_word); + // Make a WERD_RES for the new_word. + WERD_RES* new_res = new WERD_RES(new_word); + new_res->CopySimpleFields(clone_res); + // Insert into the appropriate place in the ROW_RES. + WERD_RES_IT wr_it(&row()->word_res_list); + for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { + WERD_RES* word = wr_it.data(); + if (word == word_res) + break; + } + ASSERT_HOST(!wr_it.cycled_list()); + wr_it.add_before_then_move(new_res); + if (wr_it.at_first()) { + // This is the new first word, so reset the member iterator so it + // detects the cycled_list state correctly. + ResetWordIterator(); + } + return new_res; +} + +// Deletes the current WERD_RES and its underlying WERD. +void PAGE_RES_IT::DeleteCurrentWord() { + // Check that this word is as we expect. part_of_combos are NEVER iterated + // by the normal iterator, so we should never be trying to delete them. + ASSERT_HOST(!word_res->part_of_combo); + if (!word_res->combination) { + // Combinations own their own word, so we won't find the word on the + // row's word_list, but it is legitimate to try to delete them. + // Delete word from the ROW when not a combination. + WERD_IT w_it(row()->row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + if (w_it.data() == word_res->word) { + break; + } + } + ASSERT_HOST(!w_it.cycled_list()); + delete w_it.extract(); + } + // Remove the WERD_RES for the new_word. + // Remove the WORD_RES from the ROW_RES. + WERD_RES_IT wr_it(&row()->word_res_list); + for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { + if (wr_it.data() == word_res) { + word_res = NULL; + break; + } + } + ASSERT_HOST(!wr_it.cycled_list()); + delete wr_it.extract(); + ResetWordIterator(); +} + /************************************************************************* * PAGE_RES_IT::restart_page * * Set things up at the start of the page *************************************************************************/ -WERD_RES *PAGE_RES_IT::restart_page() { - block_res_it.set_to_list (&page_res->block_res_list); - block_res_it.mark_cycle_pt (); +WERD_RES *PAGE_RES_IT::start_page(bool empty_ok) { + block_res_it.set_to_list(&page_res->block_res_list); + block_res_it.mark_cycle_pt(); prev_block_res = NULL; prev_row_res = NULL; prev_word_res = NULL; @@ -221,23 +548,47 @@ WERD_RES *PAGE_RES_IT::restart_page() { next_block_res = NULL; next_row_res = NULL; next_word_res = NULL; - internal_forward(TRUE); - return internal_forward (FALSE); + internal_forward(true, empty_ok); + return internal_forward(false, empty_ok); } +// Recovers from operations on the current word, such as in InsertCloneWord +// and DeleteCurrentWord. +// Resets the word_res_it so that it is one past the next_word_res, as +// it should be after internal_forward. If next_row_res != row_res, +// then the next_word_res is in the next row, so there is no need to do +// anything, since operations on the current word will not have disturbed +// the word_res_it. +void PAGE_RES_IT::ResetWordIterator() { + if (row_res == next_row_res) { + // Reset the member iterator so it can move forward and detect the + // cycled_list state correctly. + word_res_it.move_to_first(); + word_res_it.mark_cycle_pt(); + while (!word_res_it.cycled_list() && word_res_it.data() != next_word_res) + word_res_it.forward(); + ASSERT_HOST(!word_res_it.cycled_list()); + word_res_it.forward(); + } +} /************************************************************************* * PAGE_RES_IT::internal_forward * - * Find the next word on the page. Empty blocks and rows are skipped. + * Find the next word on the page. If empty_ok is true, then non-text blocks + * and text blocks with no text are visited as if they contain a single + * imaginary word in a single imaginary row. (word() and row() both return NULL + * in such a block and the return value is NULL.) + * If empty_ok is false, the old behaviour is maintained. Each real word + * is visited and empty and non-text blocks and rows are skipped. + * new_block is used to initialize the iterators for a new block. * The iterator maintains pointers to block, row and word for the previous, * current and next words. These are correct, regardless of block/row * boundaries. NULL values denote start and end of the page. *************************************************************************/ -WERD_RES *PAGE_RES_IT::internal_forward(BOOL8 new_block) { - BOOL8 found_next_word = FALSE; - BOOL8 new_row = FALSE; +WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) { + bool new_row = false; prev_block_res = block_res; prev_row_res = row_res; @@ -245,44 +596,50 @@ WERD_RES *PAGE_RES_IT::internal_forward(BOOL8 new_block) { block_res = next_block_res; row_res = next_row_res; word_res = next_word_res; + next_block_res = NULL; + next_row_res = NULL; + next_word_res = NULL; - while (!found_next_word && !block_res_it.cycled_list ()) { + while (!block_res_it.cycled_list()) { if (new_block) { - new_block = FALSE; - row_res_it.set_to_list (&block_res_it.data ()->row_res_list); - row_res_it.mark_cycle_pt (); - new_row = TRUE; + new_block = false; + row_res_it.set_to_list(&block_res_it.data()->row_res_list); + row_res_it.mark_cycle_pt(); + if (row_res_it.empty() && empty_ok) { + next_block_res = block_res_it.data(); + break; + } + new_row = true; } - while (!found_next_word && !row_res_it.cycled_list ()) { + while (!row_res_it.cycled_list()) { if (new_row) { - new_row = FALSE; - word_res_it.set_to_list (&row_res_it.data ()->word_res_list); - word_res_it.mark_cycle_pt (); + new_row = false; + word_res_it.set_to_list(&row_res_it.data()->word_res_list); + word_res_it.mark_cycle_pt(); } - while (!found_next_word && !word_res_it.cycled_list ()) { - next_block_res = block_res_it.data (); - next_row_res = row_res_it.data (); - next_word_res = word_res_it.data (); - found_next_word = TRUE; - do { - word_res_it.forward (); - } - while (word_res_it.data ()->part_of_combo); + // Skip any part_of_combo words. + while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo) + word_res_it.forward(); + if (!word_res_it.cycled_list()) { + next_block_res = block_res_it.data(); + next_row_res = row_res_it.data(); + next_word_res = word_res_it.data(); + word_res_it.forward(); + goto foundword; } - if (!found_next_word) { //end of row reached - row_res_it.forward (); - new_row = TRUE; - } - } - if (!found_next_word) { //end of block reached - block_res_it.forward (); - new_block = TRUE; + // end of row reached + row_res_it.forward(); + new_row = true; } + // end of block reached + block_res_it.forward(); + new_block = true; } - if (!found_next_word) { //end of page reached - next_block_res = NULL; - next_row_res = NULL; - next_word_res = NULL; + foundword: + // Update prev_word_best_choice pointer. + if (page_res != NULL && page_res->prev_word_best_choice != NULL) { + *page_res->prev_word_best_choice = + (new_block || prev_word_res == NULL) ? NULL : prev_word_res->best_choice; } return word_res; } @@ -291,23 +648,14 @@ WERD_RES *PAGE_RES_IT::internal_forward(BOOL8 new_block) { /************************************************************************* * PAGE_RES_IT::forward_block * - * Move to the first word of the next block - * Can be followed by subsequent calls to forward() BUT at the first word in - * the block, the prev block, row and word are all NULL. + * Move to the beginning of the next block, allowing empty blocks. *************************************************************************/ WERD_RES *PAGE_RES_IT::forward_block() { - if (block_res == next_block_res) { - block_res_it.forward ();; - block_res = NULL; - row_res = NULL; - word_res = NULL; - next_block_res = NULL; - next_row_res = NULL; - next_word_res = NULL; - internal_forward(TRUE); + while (block_res == next_block_res) { + internal_forward(false, true); } - return internal_forward (FALSE); + return internal_forward(false, true); } diff --git a/ccstruct/pageres.h b/ccstruct/pageres.h index d1cf4b17a1..bc82a0afb5 100644 --- a/ccstruct/pageres.h +++ b/ccstruct/pageres.h @@ -19,14 +19,16 @@ #ifndef PAGERES_H #define PAGERES_H -#include "elst.h" -#include "ocrblock.h" -#include "ocrrow.h" -#include "werd.h" -#include "ratngs.h" -#include "rejctmap.h" -#include "notdll.h" -#include "notdll.h" +#include "blobs.h" +#include "boxword.h" +#include "elst.h" +#include "genericvector.h" +#include "ocrblock.h" +#include "ocrrow.h" +#include "ratngs.h" +#include "rejctmap.h" +#include "seam.h" +#include "werd.h" /* Forward declarations */ @@ -40,87 +42,77 @@ ELISTIZEH (ROW_RES) class WERD_RES; ELISTIZEH (WERD_RES) + /************************************************************************* * PAGE_RES - Page results *************************************************************************/ -class PAGE_RES //page result -{ - public: - inT32 char_count; - inT32 rej_count; - BLOCK_RES_LIST block_res_list; - BOOL8 rejected; - - PAGE_RES() { - } //empty constructor - - PAGE_RES( //simple constructor - BLOCK_LIST *block_list); //real blocks - - ~PAGE_RES () { //destructor - } +class PAGE_RES { // page result + public: + inT32 char_count; + inT32 rej_count; + BLOCK_RES_LIST block_res_list; + BOOL8 rejected; + // Updated every time PAGE_RES_IT iterating on this PAGE_RES moves to + // the next word. This pointer is not owned by PAGE_RES class. + WERD_CHOICE **prev_word_best_choice; + + PAGE_RES() { + } // empty constructor + + PAGE_RES(BLOCK_LIST *block_list, // real blocks + WERD_CHOICE **prev_word_best_choice_ptr); + + ~PAGE_RES () { // destructor + } }; /************************************************************************* * BLOCK_RES - Block results *************************************************************************/ -class BLOCK_RES:public ELIST_LINK - //page block result -{ - public: - BLOCK * block; //real block - inT32 char_count; //chars in block - inT32 rej_count; //rejected chars - inT16 font_class; // - inT16 row_count; - float x_height; - BOOL8 font_assigned; // block already - // processed - BOOL8 bold; // all bold - BOOL8 italic; // all italic - - ROW_RES_LIST row_res_list; - - BLOCK_RES() { - } //empty constructor - - BLOCK_RES( //simple constructor - BLOCK *the_block); //real block - - ~BLOCK_RES () { //destructor - } +class BLOCK_RES:public ELIST_LINK { + public: + BLOCK * block; // real block + inT32 char_count; // chars in block + inT32 rej_count; // rejected chars + inT16 font_class; // + inT16 row_count; + float x_height; + BOOL8 font_assigned; // block already + // processed + BOOL8 bold; // all bold + BOOL8 italic; // all italic + + ROW_RES_LIST row_res_list; + + BLOCK_RES() { + } // empty constructor + + BLOCK_RES(BLOCK *the_block); // real block + + ~BLOCK_RES () { // destructor + } }; /************************************************************************* * ROW_RES - Row results *************************************************************************/ -class ROW_RES:public ELIST_LINK //row result -{ - public: - ROW * row; //real row - inT32 char_count; //chars in block - inT32 rej_count; //rejected chars - inT32 whole_word_rej_count; //rejs in total rej wds - WERD_RES_LIST word_res_list; - float font_class_score; - inT16 font_class; // - inT32 italic; - inT32 bold; - inT8 font1; //primary font - inT8 font1_count; //no of voters - inT8 font2; //secondary font - inT8 font2_count; //no of voters - - ROW_RES() { - } //empty constructor - - ROW_RES( //simple constructor - ROW *the_row); //real row - - ~ROW_RES () { //destructor - } +class ROW_RES:public ELIST_LINK { + public: + ROW * row; // real row + inT32 char_count; // chars in block + inT32 rej_count; // rejected chars + inT32 whole_word_rej_count; // rejs in total rej wds + WERD_RES_LIST word_res_list; + + ROW_RES() { + } // empty constructor + + ROW_RES(bool right_to_left, ROW *the_row); // real row + + ~ROW_RES() { // destructor + } }; /************************************************************************* @@ -134,180 +126,293 @@ enum CRUNCH_MODE CR_DELETE }; -class WERD_RES:public ELIST_LINK //word result -{ - public: - WERD * word; //non-bln real word - WERD *outword; //bln best choice - //segmentation - DENORM denorm; //for use on outword - WERD_CHOICE *best_choice; //tess output - WERD_CHOICE *raw_choice; //top choice permuter - WERD_CHOICE *ep_choice; //ep text - REJMAP reject_map; //best_choice rejects - BOOL8 tess_failed; - /* - If tess_failed is TRUE, one of the following tests failed when Tess - returned: - - The outword blob list was not the same length as the best_choice string; - - The best_choice string contained ALL blanks; - - The best_choice string was zero length - */ - BOOL8 tess_accepted; //Tess thinks its ok? - BOOL8 tess_would_adapt; //Tess would adapt? - BOOL8 done; //ready for output? - inT8 italic; - inT8 bold; - inT8 font1; //primary font - inT8 font1_count; //no of voters - inT8 font2; //secondary font - inT8 font2_count; //no of voters - CRUNCH_MODE unlv_crunch_mode; - float x_height; //Post match estimate - float caps_height; //Post match estimate - BOOL8 guessed_x_ht; - BOOL8 guessed_caps_ht; - /* - To deal with fuzzy spaces we need to be able to combine "words" to form - combinations when we suspect that the gap is a non-space. The (new) text - ord code generates separate words for EVERY fuzzy gap - flags in the word - indicate whether the gap is below the threshold (fuzzy kern) and is thus - NOT a real word break by default, or above the threshold (fuzzy space) and - this is a real word break by default. - - The WERD_RES list contains all these words PLUS "combination" words built - out of (copies of) the words split by fuzzy kerns. The separate parts have - their "part_of_combo" flag set true and should be IGNORED on a default - reading of the list. - - Combination words are FOLLOWED by the sequence of part_of_combo words - which they combine. - */ - BOOL8 combination; //of two fuzzy gap wds - BOOL8 part_of_combo; //part of a combo - BOOL8 reject_spaces; //Reject spacing? - - WERD_RES() { - } //empty constructor - - WERD_RES( //simple constructor - WERD *the_word) { //real word - word = the_word; - outword = NULL; - best_choice = NULL; - raw_choice = NULL; - ep_choice = NULL; - tess_failed = FALSE; - tess_accepted = FALSE; - tess_would_adapt = FALSE; - done = FALSE; - unlv_crunch_mode = CR_NONE; - italic = FALSE; - bold = FALSE; - font1 = -1; - font1_count = 0; - font2 = -1; - font2_count = 0; - x_height = 0.0; - caps_height = 0.0; - guessed_x_ht = TRUE; - guessed_caps_ht = TRUE; - combination = FALSE; - part_of_combo = FALSE; - reject_spaces = FALSE; - } - WERD_RES(const WERD_RES &source) { - *this = source; //see operator= - } - - ~WERD_RES (); //destructor - - WERD_RES& operator=(const WERD_RES& source); //from this - - static WERD_RES* deep_copy(const WERD_RES* src) { - return new WERD_RES(*src); - } - - void copy_on( //copy blobs onto word - WERD_RES *word_res) { //from this word - word->set_flag (W_EOL, word_res->word->flag (W_EOL)); - word->copy_on (word_res->word); - } +// WERD_RES is a collection of publicly accessible members that gathers +// information about a word result. +class WERD_RES : public ELIST_LINK { + public: + // Which word is which? + // There are 3 coordinate spaces in use here: a possibly rotated pixel space, + // the original image coordinate space, and the BLN space in which the + // baseline of a word is at kBlnBaselineOffset, the xheight is kBlnXHeight, + // and the x-middle of the word is at 0. + // In the rotated pixel space, coordinates correspond to the input image, + // but may be rotated about the origin by a multiple of 90 degrees, + // and may therefore be negative. + // In any case a rotation by denorm.block()->re_rotation() will take them + // back to the original image. + // The other differences between words all represent different stages of + // processing. + // + // The word is the input C_BLOBs in the rotated pixel space. + // word is NOT owned by the WERD_RES unless combination is true. + // All the other word pointers ARE owned by the WERD_RES. + WERD* word; // Input C_BLOB word. + // The bln_boxes contains the bounding boxes (only) of the input word, in the + // BLN space. The lengths of word and bln_boxes + // match as they are both before any chopping. + // TODO(rays) determine if docqual does anything useful and delete bln_boxes + // if it doesn't. + tesseract::BoxWord* bln_boxes; // BLN input bounding boxes. + // The chopped_word is also in BLN space, and represents the fully chopped + // character fragments that make up the word. + // The length of chopped_word matches length of seam_array + 1 (if set). + TWERD* chopped_word; // BLN chopped fragments output. + SEAMS seam_array; // Seams matching chopped_word. + // The rebuild_word is also in BLN space, but represents the final best + // segmentation of the word. Its length is therefore the same as box_word. + TWERD* rebuild_word; // BLN best segmented word. + // The denorm provides the transformation to get back to the rotated image + // coords from the chopped_word/rebuild_word BLN coords. + DENORM denorm; // For use on chopped_word. + // The box_word is in the original image coordinate space. It is the + // bounding boxes of the rebuild_word, after denormalization. + // The length of box_word matches rebuild_word, best_state (if set) and + // correct_text (if set), as well as best_choice and represents the + // number of classified units in the output. + tesseract::BoxWord* box_word; // Denormalized output boxes. + // The best_state stores the relationship between chopped_word and + // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i] + // adjacent blobs in chopped_word. The seams in seam_array are hidden + // within a rebuild_word blob and revealed between them. + GenericVector best_state; // Number of blobs in each best blob. + // The correct_text is used during training and adaption to carry the + // text to the training system without the need for a unicharset. There + // is one entry in the vector for each blob in rebuild_word and box_word. + GenericVector correct_text; + WERD_CHOICE *best_choice; // tess output + WERD_CHOICE *raw_choice; // top choice permuter + WERD_CHOICE *ep_choice; // ep text TODO(rays) delete this. + REJMAP reject_map; // best_choice rejects + BOOL8 tess_failed; + /* + If tess_failed is TRUE, one of the following tests failed when Tess + returned: + - The outword blob list was not the same length as the best_choice string; + - The best_choice string contained ALL blanks; + - The best_choice string was zero length + */ + BOOL8 tess_accepted; //Tess thinks its ok? + BOOL8 tess_would_adapt; //Tess would adapt? + BOOL8 done; //ready for output? + inT8 italic; + inT8 bold; + inT8 font1; //primary font + inT8 font1_count; //no of voters + inT8 font2; //secondary font + inT8 font2_count; //no of voters + CRUNCH_MODE unlv_crunch_mode; + float x_height; //Post match estimate + float caps_height; //Post match estimate + BOOL8 guessed_x_ht; + BOOL8 guessed_caps_ht; + /* + To deal with fuzzy spaces we need to be able to combine "words" to form + combinations when we suspect that the gap is a non-space. The (new) text + ord code generates separate words for EVERY fuzzy gap - flags in the word + indicate whether the gap is below the threshold (fuzzy kern) and is thus + NOT a real word break by default, or above the threshold (fuzzy space) and + this is a real word break by default. + + The WERD_RES list contains all these words PLUS "combination" words built + out of (copies of) the words split by fuzzy kerns. The separate parts have + their "part_of_combo" flag set true and should be IGNORED on a default + reading of the list. + + Combination words are FOLLOWED by the sequence of part_of_combo words + which they combine. + */ + BOOL8 combination; //of two fuzzy gap wds + BOOL8 part_of_combo; //part of a combo + BOOL8 reject_spaces; //Reject spacing? + // FontInfo ids for each unichar in best_choice. + GenericVector best_choice_fontinfo_ids; + + WERD_RES() { + InitPointers(); + } + WERD_RES( //simple constructor + WERD *the_word) { //real word + InitPointers(); + word = the_word; + tess_failed = FALSE; + tess_accepted = FALSE; + tess_would_adapt = FALSE; + done = FALSE; + unlv_crunch_mode = CR_NONE; + italic = FALSE; + bold = FALSE; + font1 = -1; + font1_count = 0; + font2 = -1; + font2_count = 0; + x_height = 0.0; + caps_height = 0.0; + guessed_x_ht = TRUE; + guessed_caps_ht = TRUE; + combination = FALSE; + part_of_combo = FALSE; + reject_spaces = FALSE; + } + WERD_RES(const WERD_RES &source) { + InitPointers(); + *this = source; // see operator= + } + + ~WERD_RES(); + void InitPointers(); + void Clear(); + void ClearResults(); + + WERD_RES& operator=(const WERD_RES& source); //from this + + void CopySimpleFields(const WERD_RES& source); + + // Sets up the members used in recognition: + // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. + // Returns false if the word is empty and sets up fake results. + bool SetupForRecognition(const UNICHARSET& unicharset, + bool numeric_mode, ROW *row, BLOCK* block); + + // Builds the rebuild_word from the chopped_word and the best_state. + void RebuildBestState(); + + // Copies the chopped_word to the rebuild_word, faking a best_state as well. + // Also sets up the output box_word. + void CloneChoppedToRebuild(); + + // Sets/replaces the box_word with one made from the rebuild_word. + void SetupBoxWord(); + + // Classifies the word with some already-calculated BLOB_CHOICEs. + // The choices are an array of blob_count pointers to BLOB_CHOICE, + // providing a single classifier result for each blob. + // The BLOB_CHOICEs are consumed and the word takes ownership. + // The number of blobs in the outword must match blob_count. + void FakeClassifyWord(const UNICHARSET& unicharset, int blob_count, + BLOB_CHOICE** choices); + + // Copies the best_choice strings to the correct_text for adaption/training. + void BestChoiceToCorrectText(const UNICHARSET& unicharset); + + // Merges 2 adjacent blobs in the result if the permanent callback + // class_cb returns other than INVALID_UNICHAR_ID, AND the permanent + // callback box_cb is NULL or returns true, setting the merged blob + // result to the class returned from class_cb. + // Returns true if anything was merged. + bool ConditionalBlobMerge( + const UNICHARSET& unicharset, + TessResultCallback2* class_cb, + TessResultCallback2* box_cb, + BLOB_CHOICE_LIST_CLIST *blob_choices); + + static WERD_RES* deep_copy(const WERD_RES* src) { + return new WERD_RES(*src); + } + + // Copy blobs from word_res onto this word (eliminating spaces between). + // Since this may be called bidirectionally OR both the BOL and EOL flags. + void copy_on(WERD_RES *word_res) { //from this word + word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL)); + word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL)); + word->copy_on(word_res->word); + } }; /************************************************************************* * PAGE_RES_IT - Page results iterator *************************************************************************/ -class PAGE_RES_IT -{ - public: - PAGE_RES * page_res; //page being iterated - - PAGE_RES_IT() { - } //empty contructor - - PAGE_RES_IT( //empty contructor - PAGE_RES *the_page_res) { //page result - page_res = the_page_res; - restart_page(); //ready to scan - } - - WERD_RES *restart_page(); //get ready - - WERD_RES *internal_forward( //get next word - BOOL8 new_block); - - WERD_RES *forward() { //get next word - return internal_forward (FALSE); - } - - WERD_RES *forward_block(); //get first word in - //next non-empty block - WERD_RES *prev_word() { //previous word - return prev_word_res; - } - ROW_RES *prev_row() { //row of prev word - return prev_row_res; - } - BLOCK_RES *prev_block() { //block of prev word - return prev_block_res; - } - WERD_RES *word() { //current word - return word_res; - } - ROW_RES *row() { //row of current word - return row_res; - } - BLOCK_RES *block() { //block of cur. word - return block_res; - } - WERD_RES *next_word() { //next word - return next_word_res; - } - ROW_RES *next_row() { //row of next word - return next_row_res; - } - BLOCK_RES *next_block() { //block of next word - return next_block_res; - } - void rej_stat_word(); //for page/block/row - - private: - WERD_RES * prev_word_res; //previous word - ROW_RES *prev_row_res; //row of prev word - BLOCK_RES *prev_block_res; //block of prev word - - WERD_RES *word_res; //current word - ROW_RES *row_res; //row of current word - BLOCK_RES *block_res; //block of cur. word - - WERD_RES *next_word_res; //next word - ROW_RES *next_row_res; //row of next word - BLOCK_RES *next_block_res; //block of next word - - BLOCK_RES_IT block_res_it; //iterators - ROW_RES_IT row_res_it; - WERD_RES_IT word_res_it; +class PAGE_RES_IT { + public: + PAGE_RES * page_res; // page being iterated + + PAGE_RES_IT() { + } // empty contructor + + PAGE_RES_IT(PAGE_RES *the_page_res) { // page result + page_res = the_page_res; + restart_page(); // ready to scan + } + + WERD_RES *restart_page() { + return start_page(false); // Skip empty blocks. + } + WERD_RES *restart_page_with_empties() { + return start_page(true); // Allow empty blocks. + } + WERD_RES *start_page(bool empty_ok); + + // ============ Methods that mutate the underling structures =========== + // Note that these methods will potentially invalidate other PAGE_RES_ITs + // and are intended to be used only while a single PAGE_RES_IT is active. + // This problem needs to be taken into account if these mutation operators + // are ever provided to PageIterator or its subclasses. + + // Inserts the new_word and a corresponding WERD_RES before the current + // position. The simple fields of the WERD_RES are copied from clone_res and + // the resulting WERD_RES is returned for further setup with best_choice etc. + WERD_RES* InsertCloneWord(const WERD_RES& clone_res, WERD* new_word); + + // Deletes the current WERD_RES and its underlying WERD. + void DeleteCurrentWord(); + + WERD_RES *forward() { // Get next word. + return internal_forward(false, false); + } + // Move forward, but allow empty blocks to show as single NULL words. + WERD_RES *forward_with_empties() { + return internal_forward(false, true); + } + + WERD_RES *forward_block(); // get first word in + // next non-empty block + WERD_RES *prev_word() const { // previous word + return prev_word_res; + } + ROW_RES *prev_row() const { // row of prev word + return prev_row_res; + } + BLOCK_RES *prev_block() const { // block of prev word + return prev_block_res; + } + WERD_RES *word() const { // current word + return word_res; + } + ROW_RES *row() const { // row of current word + return row_res; + } + BLOCK_RES *block() const { // block of cur. word + return block_res; + } + WERD_RES *next_word() const { // next word + return next_word_res; + } + ROW_RES *next_row() const { // row of next word + return next_row_res; + } + BLOCK_RES *next_block() const { // block of next word + return next_block_res; + } + void rej_stat_word(); // for page/block/row + + private: + void ResetWordIterator(); + WERD_RES *internal_forward(bool new_block, bool empty_ok); + + WERD_RES * prev_word_res; // previous word + ROW_RES *prev_row_res; // row of prev word + BLOCK_RES *prev_block_res; // block of prev word + + WERD_RES *word_res; // current word + ROW_RES *row_res; // row of current word + BLOCK_RES *block_res; // block of cur. word + + WERD_RES *next_word_res; // next word + ROW_RES *next_row_res; // row of next word + BLOCK_RES *next_block_res; // block of next word + + BLOCK_RES_IT block_res_it; // iterators + ROW_RES_IT row_res_it; + WERD_RES_IT word_res_it; }; #endif diff --git a/ccstruct/pdblock.cpp b/ccstruct/pdblock.cpp index 02a7af8e3b..0b9f490426 100644 --- a/ccstruct/pdblock.cpp +++ b/ccstruct/pdblock.cpp @@ -19,6 +19,7 @@ #include "mfcpch.h" #include +#include "allheaders.h" #include "blckerr.h" #include "pdblock.h" #include "svshowim.h" @@ -127,6 +128,48 @@ void PDBLK::move( // reposition block box.move (vec); } +// Returns a binary Pix mask with a 1 pixel for every pixel within the +// block. Rotates the coordinate system by rerotation prior to rendering. +Pix* PDBLK::render_mask(const FCOORD& rerotation) { + TBOX rotated_box(box); + rotated_box.rotate(rerotation); + Pix* pix = pixCreate(rotated_box.width(), rotated_box.height(), 1); + if (hand_poly != NULL) { + // We are going to rotate, so get a deep copy of the points and + // make a new POLY_BLOCK with it. + ICOORDELT_LIST polygon; + polygon.deep_copy(hand_poly->points(), ICOORDELT::deep_copy); + POLY_BLOCK image_block(&polygon, hand_poly->isA()); + image_block.rotate(rerotation); + // Block outline is a polygon, so use a PB_LINE_IT to get the + // rasterized interior. (Runs of interior pixels on a line.) + PB_LINE_IT *lines = new PB_LINE_IT(&image_block); + for (int y = box.bottom(); y < box.top(); ++y) { + ICOORDELT_LIST* segments = lines->get_line(y); + if (!segments->empty()) { + ICOORDELT_IT s_it(segments); + // Each element of segments is a start x and x size of the + // run of interior pixels. + for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) { + int start = s_it.data()->x(); + int xext = s_it.data()->y(); + // Set the run of pixels to 1. + pixRasterop(pix, start - rotated_box.left(), + rotated_box.height() - 1 - (y - rotated_box.bottom()), + xext, 1, PIX_SET, NULL, 0, 0); + } + } + delete segments; + } + delete lines; + } else { + // Just fill the whole block as there is only a bounding box. + pixRasterop(pix, 0, 0, rotated_box.width(), rotated_box.height(), + PIX_SET, NULL, 0, 0); + } + return pix; +} + /********************************************************************** * PDBLK::plot diff --git a/ccstruct/pdblock.h b/ccstruct/pdblock.h index 5233d558e5..a3fb54ed4f 100644 --- a/ccstruct/pdblock.h +++ b/ccstruct/pdblock.h @@ -1,8 +1,8 @@ /********************************************************************** * File: pdblock.h (Formerly pdblk.h) * Description: Page block class definition. - * Author: Ray Smith - * Created: Thu Mar 14 17:32:01 GMT 1991 + * Author: Ray Smith + * Created: Thu Mar 14 17:32:01 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,6 +20,7 @@ #ifndef PDBLOCK_H #define PDBLOCK_H +#include "clst.h" #include "img.h" #include "strngs.h" #include "polyblk.h" @@ -27,10 +28,11 @@ #include "hpddef.h" //must be last (handpd.dll) class DLLSYM PDBLK; //forward decl +struct Pix; CLISTIZEH (PDBLK) ///page block -class DLLSYM PDBLK +class PDBLK { friend class BLOCK_RECT_IT; //< block iterator @@ -40,19 +42,19 @@ class DLLSYM PDBLK hand_poly = NULL; index_ = 0; } - ///simple constructor + ///simple constructor PDBLK(inT16 xmin, //< bottom left inT16 ymin, inT16 xmax, //< top right inT16 ymax); - ///set vertex lists - ///@param left list of left vertices - ///@param right list of right vertices + ///set vertex lists + ///@param left list of left vertices + ///@param right list of right vertices void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right); - ///destructor + ///destructor ~PDBLK () { if (hand_poly) delete hand_poly; } @@ -60,11 +62,11 @@ class DLLSYM PDBLK POLY_BLOCK *poly_block() { return hand_poly; } - ///set the poly block + ///set the poly block void set_poly_block(POLY_BLOCK *blk) { hand_poly = blk; } - ///get box + ///get box void bounding_box(ICOORD &bottom_left, //bottom left ICOORD &top_right) const { //topright bottom_left = box.botleft (); @@ -82,28 +84,31 @@ class DLLSYM PDBLK index_ = value; } - ///is pt inside block + ///is pt inside block BOOL8 contains(ICOORD pt); - /// reposition block + /// reposition block void move(const ICOORD vec); // by vector - ///draw histogram - ///@param window window to draw in - ///@param serial serial number - ///@param colour colour to draw in + // Returns a binary Pix mask with a 1 pixel for every pixel within the + // block. Rotates the coordinate system by rerotation prior to rendering. + Pix* render_mask(const FCOORD& rerotation); + ///draw histogram + ///@param window window to draw in + ///@param serial serial number + ///@param colour colour to draw in void plot(ScrollView* window, inT32 serial, ScrollView::Color colour); - ///show image - ///@param image image to show - ///@param window window to show in + ///show image + ///@param image image to show + ///@param window window to show in void show(IMAGE *image, ScrollView* window); - ///assignment - ///@param source from this + ///assignment + ///@param source from this PDBLK & operator= (const PDBLK & source); protected: @@ -121,24 +126,25 @@ class DLLSYM BLOCK_RECT_IT //rectangle iterator ///@param blkptr block to iterate BLOCK_RECT_IT(PDBLK *blkptr); + NEWDELETE2 (BLOCK_RECT_IT) ///start (new) block - NEWDELETE2 (BLOCK_RECT_IT) void set_to_block ( + void set_to_block ( PDBLK * blkptr); //block to iterate - ///start iteration + ///start iteration void start_block(); - ///next rectangle + ///next rectangle void forward(); - ///test end + ///test end BOOL8 cycled_rects() { return left_it.cycled_list () && right_it.cycled_list (); } - ///current rectangle - ///@param bleft bottom left - ///@param tright top right + ///current rectangle + ///@param bleft bottom left + ///@param tright top right void bounding_box(ICOORD &bleft, ICOORD &tright) { //bottom left @@ -166,17 +172,18 @@ class DLLSYM BLOCK_LINE_IT block = blkptr; //remember block } + NEWDELETE2 (BLOCK_LINE_IT) ///start (new) block - ///@param blkptr block to start - NEWDELETE2 (BLOCK_LINE_IT) void set_to_block (PDBLK * blkptr) { + ///@param blkptr block to start + void set_to_block (PDBLK * blkptr) { block = blkptr; //remember block //set iterator rect_it.set_to_block (blkptr); } - ///get a line - ///@param y line to get - ///@param xext output extent + ///get a line + ///@param y line to get + ///@param xext output extent inT16 get_line(inT16 y, inT16 &xext); diff --git a/ccstruct/points.h b/ccstruct/points.h index 31e4fc9337..6284c126ba 100644 --- a/ccstruct/points.h +++ b/ccstruct/points.h @@ -1,8 +1,8 @@ /********************************************************************** * File: points.h (Formerly coords.h) * Description: Coordinate class definitions. - * Author: Ray Smith - * Created: Fri Mar 15 08:32:45 GMT 1991 + * Author: Ray Smith + * Created: Fri Mar 15 08:32:45 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,12 +23,11 @@ #include #include #include "elst.h" -//#include "ipeerr.h" class FCOORD; ///integer coordinate -class DLLSYM ICOORD +class ICOORD { friend class FCOORD; @@ -37,15 +36,15 @@ class DLLSYM ICOORD ICOORD() { xcoord = ycoord = 0; //default zero } - ///constructor - ///@param xin x value - ///@param yin y value + ///constructor + ///@param xin x value + ///@param yin y value ICOORD(inT16 xin, inT16 yin) { xcoord = xin; ycoord = yin; } - ///destructor + ///destructor ~ICOORD () { } @@ -54,16 +53,16 @@ class DLLSYM ICOORD { return xcoord; } - ///access_function + ///access_function inT16 y() const { return ycoord; } - ///rewrite function + ///rewrite function void set_x(inT16 xin) { xcoord = xin; //write new value } - ///rewrite function + ///rewrite function void set_y(inT16 yin) { //value to set ycoord = yin; } @@ -71,17 +70,17 @@ class DLLSYM ICOORD /// Set from the given x,y, shrinking the vector to fit if needed. void set_with_shrink(int x, int y); - ///find sq length + ///find sq length float sqlength() const { return (float) (xcoord * xcoord + ycoord * ycoord); } - ///find length + ///find length float length() const { return (float) sqrt (sqlength ()); } - ///sq dist between pts + ///sq dist between pts float pt_to_pt_sqdist(const ICOORD &pt) const { ICOORD gap; @@ -90,55 +89,55 @@ class DLLSYM ICOORD return gap.sqlength (); } - ///Distance between pts + ///Distance between pts float pt_to_pt_dist(const ICOORD &pt) const { return (float) sqrt (pt_to_pt_sqdist (pt)); } - ///find angle + ///find angle float angle() const { return (float) atan2 ((double) ycoord, (double) xcoord); } - ///test equality + ///test equality BOOL8 operator== (const ICOORD & other) { return xcoord == other.xcoord && ycoord == other.ycoord; } - ///test inequality + ///test inequality BOOL8 operator!= (const ICOORD & other) { return xcoord != other.xcoord || ycoord != other.ycoord; } - ///rotate 90 deg anti + ///rotate 90 deg anti friend ICOORD operator! (const ICOORD &); ///unary minus - friend ICOORD operator- (const ICOORD &); - ///add - friend ICOORD operator+ (const ICOORD &, const ICOORD &); - ///add + friend ICOORD operator- (const ICOORD &); + ///add + friend ICOORD operator+ (const ICOORD &, const ICOORD &); + ///add friend ICOORD & operator+= (ICOORD &, const ICOORD &); ///subtract - friend ICOORD operator- (const ICOORD &, const ICOORD &); + friend ICOORD operator- (const ICOORD &, const ICOORD &); ///subtract friend ICOORD & operator-= (ICOORD &, const ICOORD &); - ///scalar product - friend inT32 operator% (const ICOORD &, const ICOORD &); - ///cross product + ///scalar product + friend inT32 operator% (const ICOORD &, const ICOORD &); + ///cross product friend inT32 operator *(const ICOORD &, const ICOORD &); - ///multiply + ///multiply friend ICOORD operator *(const ICOORD &, inT16); - ///multiply + ///multiply friend ICOORD operator *(inT16, const ICOORD &); - ///multiply + ///multiply friend ICOORD & operator*= (ICOORD &, inT16); ///divide friend ICOORD operator/ (const ICOORD &, inT16); ///divide friend ICOORD & operator/= (ICOORD &, inT16); - ///rotate - ///@param vec by vector + ///rotate + ///@param vec by vector void rotate(const FCOORD& vec); /// Setup for iterating over the pixels in a vector by the well-known @@ -149,9 +148,9 @@ class DLLSYM ICOORD void setup_render(ICOORD* major_step, ICOORD* minor_step, int* major, int* minor) const; - ///serialise to ascii + ///serialise to ascii void serialise_asc(FILE *f); - ///serialise from ascii + ///serialise from ascii void de_serialise_asc(FILE *f); protected: @@ -166,12 +165,12 @@ class DLLSYM ICOORDELT:public ELIST_LINK, public ICOORD ///empty constructor ICOORDELT() { } - ///constructor from ICOORD + ///constructor from ICOORD ICOORDELT (ICOORD icoord):ICOORD (icoord) { } - ///constructor - ///@param xin x value - ///@param yin y value + ///constructor + ///@param xin x value + ///@param yin y value ICOORDELT(inT16 xin, inT16 yin) { xcoord = xin; @@ -181,15 +180,15 @@ class DLLSYM ICOORDELT:public ELIST_LINK, public ICOORD /* Note that prep_serialise() dump() and de_dump() dont need to do anything more than terminate recursion. */ - ///set ptrs to counts + ///set ptrs to counts void prep_serialise() const { } - ///write external bits + ///write external bits void dump(FILE *) const { } - ///read external bits + ///read external bits void de_dump(FILE *) { } @@ -202,9 +201,9 @@ class DLLSYM ICOORDELT:public ELIST_LINK, public ICOORD return elt; } - ///serialise to ascii + ///serialise to ascii void serialise_asc(FILE * f); - ///deserialise from ascii + ///deserialise from ascii void de_serialise_asc(FILE *f); }; @@ -216,9 +215,9 @@ class DLLSYM FCOORD ///empty constructor FCOORD() { } - ///constructor - ///@param xvalue x value - ///@param yvalue y value + ///constructor + ///@param xvalue x value + ///@param yvalue y value FCOORD(float xvalue, float yvalue) { xcoord = xvalue; //set coords @@ -236,26 +235,26 @@ class DLLSYM FCOORD float y() const { return ycoord; } - ///rewrite function + ///rewrite function void set_x(float xin) { xcoord = xin; //write new value } - ///rewrite function + ///rewrite function void set_y(float yin) { //value to set ycoord = yin; } - ///find sq length + ///find sq length float sqlength() const { return xcoord * xcoord + ycoord * ycoord; } - ///find length + ///find length float length() const { return (float) sqrt (sqlength ()); } - ///sq dist between pts + ///sq dist between pts float pt_to_pt_sqdist(const FCOORD &pt) const { FCOORD gap; @@ -264,24 +263,24 @@ class DLLSYM FCOORD return gap.sqlength (); } - ///Distance between pts + ///Distance between pts float pt_to_pt_dist(const FCOORD &pt) const { return (float) sqrt (pt_to_pt_sqdist (pt)); } - ///find angle + ///find angle float angle() const { return (float) atan2 (ycoord, xcoord); } - ///Convert to unit vec + ///Convert to unit vec bool normalise(); - ///test equality + ///test equality BOOL8 operator== (const FCOORD & other) { return xcoord == other.xcoord && ycoord == other.ycoord; } - ///test inequality + ///test inequality BOOL8 operator!= (const FCOORD & other) { return xcoord != other.xcoord || ycoord != other.ycoord; } @@ -302,7 +301,7 @@ class DLLSYM FCOORD ///cross product friend float operator *(const FCOORD &, const FCOORD &); ///multiply - friend FCOORD operator *(const FCOORD &, float); + friend FCOORD operator *(const FCOORD &, float); ///multiply friend FCOORD operator *(float, const FCOORD &); @@ -310,8 +309,8 @@ class DLLSYM FCOORD friend FCOORD & operator*= (FCOORD &, float); ///divide friend FCOORD operator/ (const FCOORD &, float); - ///rotate - ///@param vec by vector + ///rotate + ///@param vec by vector void rotate(const FCOORD vec); ///divide friend FCOORD & operator/= (FCOORD &, float); diff --git a/ccstruct/polyaprx.cpp b/ccstruct/polyaprx.cpp index 67d0a0b5c7..e2f671474a 100644 --- a/ccstruct/polyaprx.cpp +++ b/ccstruct/polyaprx.cpp @@ -24,7 +24,7 @@ #endif #define FASTEDGELENGTH 256 #include "polyaprx.h" -#include "varable.h" +#include "params.h" #include "tprintf.h" #define EXTERN @@ -33,41 +33,33 @@ EXTERN BOOL_VAR (poly_debug, FALSE, "Debug old poly"); EXTERN BOOL_VAR (poly_wide_objects_better, TRUE, "More accurate approx on wide things"); -static int par1, par2; -#define CONVEX 1 /*OUTLINE point is convex */ -#define CONCAVE 2 /*used and set only in edges */ #define FIXED 4 /*OUTLINE point is fixed */ -#define ONHULL 8 /*on convex hull */ #define RUNLENGTH 1 /*length of run */ #define DIR 2 /*direction of run */ -#define CORRECTION 3 /*correction of run */ -//#define MAXSHORT 32767 /*max value of short*/ #define FLAGS 0 #define fixed_dist 20 //really an int_variable #define approx_dist 15 //really an int_variable -#define point_diff(p,p1,p2) (p).x = (p1).x - (p2).x ; (p).y = (p1).y - (p2).y -#define CROSS(a,b) ((a).x * (b).y - (a).y * (b).x) -#define LENGTH(a) ((a).x * (a).x + (a).y * (a).y) +const int par1 = 4500 / (approx_dist * approx_dist); +const int par2 = 6750 / (approx_dist * approx_dist); -#define DISTANCE(a,b) (((b).x-(a).x) * ((b).x-(a).x) \ - + ((b).y-(a).y) * ((b).y-(a).y)) /********************************************************************** * tesspoly_outline * - * Approximate an outline from c form using the old tess algorithm. + * Approximate an outline from chain codes form using the old tess algorithm. **********************************************************************/ -OUTLINE *tesspoly_outline( //old approximation - C_OUTLINE *c_outline, //input - float //xheight - ) { +#ifndef NO_PBLOB_POLY +// TODO(rays) This code is scheduled for deletion, but first all dependencies +// have to be removed or rewritten. NO_BLOB_POLY is used for finding the +// dependencies. +OUTLINE *tesspoly_outline(C_OUTLINE *c_outline) { EDGEPT *edgept; //converted steps EDGEPT *startpt; //start of outline TBOX loop_box; //bounding box @@ -94,11 +86,11 @@ OUTLINE *tesspoly_outline( //old approximation edgept = poly2 (edgepts, area);/*2nd approximation */ startpt = edgept; do { - pos = FCOORD (edgept->pos.x, edgept->pos.y); - vec = FCOORD (edgept->vec.x, edgept->vec.y); - polypt = new POLYPT (pos, vec); + pos = FCOORD(edgept->pos.x, edgept->pos.y); + vec = FCOORD(edgept->vec.x, edgept->vec.y); + polypt = new POLYPT(pos, vec); //add to list - poly_it.add_after_then_move (polypt); + poly_it.add_after_then_move(polypt); edgept = edgept->next; } while (edgept != startpt); @@ -109,6 +101,50 @@ OUTLINE *tesspoly_outline( //old approximation else return new OUTLINE(&poly_it); } +#endif + +TESSLINE* ApproximateOutline(C_OUTLINE* c_outline) { + EDGEPT *edgept; // converted steps + TBOX loop_box; // bounding box + inT32 area; // loop area + EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path + EDGEPT* edgepts = stack_edgepts; + + // Use heap memory if the stack buffer is not big enough. + if (c_outline->pathlength() > FASTEDGELENGTH) + edgepts = new EDGEPT[c_outline->pathlength()]; + + loop_box = c_outline->bounding_box(); + area = loop_box.height(); + if (!poly_wide_objects_better && loop_box.width() > area) + area = loop_box.width(); + area *= area; + edgept = edgesteps_to_edgepts(c_outline, edgepts); + fix2(edgepts, area); + edgept = poly2 (edgepts, area); // 2nd approximation. + EDGEPT* startpt = edgept; + EDGEPT* result = NULL; + EDGEPT* prev_result = NULL; + do { + EDGEPT* new_pt = new EDGEPT; + new_pt->pos = edgept->pos; + new_pt->prev = prev_result; + if (prev_result == NULL) { + result = new_pt; + } else { + prev_result->next = new_pt; + new_pt->prev = prev_result; + } + prev_result = new_pt; + edgept = edgept->next; + } + while (edgept != startpt); + prev_result->next = result; + result->prev = prev_result; + if (edgepts != stack_edgepts) + delete [] edgepts; + return TESSLINE::BuildFromOutlineList(result); +} /********************************************************************** @@ -349,6 +385,11 @@ void fix2( //polygonal approx break; //already too few point_diff (d12vec, edgefix1->pos, edgefix2->pos); d12 = LENGTH (d12vec); + // TODO(rays) investigate this change: + // Only unfix a point if it is part of a low-curvature section + // of outline and the total angle change of the outlines is + // less than 90 degrees, ie the scalar product is positive. + // if (d12 <= gapmin && SCALAR(edgefix0->vec, edgefix2->vec) > 0) { if (d12 <= gapmin) { point_diff (d01vec, edgefix0->pos, edgefix1->pos); d01 = LENGTH (d01vec); @@ -357,16 +398,10 @@ void fix2( //polygonal approx if (d01 > d23) { edgefix2->flags[FLAGS] &= ~FIXED; fixed_count--; - /* if ( plots[EDGE] & PATHS ) - mark(edgefd,edgefix2->pos.x,edgefix2->pos.y,PLUS); - */ } else { edgefix1->flags[FLAGS] &= ~FIXED; fixed_count--; - /* if ( plots[EDGE] & PATHS ) - mark(edgefd,edgefix1->pos.x,edgefix1->pos.y,PLUS); - */ edgefix1 = edgefix2; } } @@ -407,11 +442,6 @@ EDGEPT *poly2( //second poly if (area < 1200) area = 1200; /*minimum value */ - /*1200(4) */ - par1 = 4500 / (approx_dist * approx_dist); - /*1200(6) */ - par2 = 6750 / (approx_dist * approx_dist); - loopstart = NULL; /*not found it yet */ edgept = startpt; /*start of loop */ diff --git a/ccstruct/polyaprx.h b/ccstruct/polyaprx.h index 6e6feaef57..c7e261da2e 100644 --- a/ccstruct/polyaprx.h +++ b/ccstruct/polyaprx.h @@ -20,14 +20,13 @@ #ifndef POLYAPRX_H #define POLYAPRX_H -#include "tessclas.h" +#include "blobs.h" #include "poutline.h" #include "coutln.h" -OUTLINE *tesspoly_outline( //old approximation - C_OUTLINE *c_outline, //input - float //xheight - ); +// convert a chain-coded input to the old OUTLINE approximation +OUTLINE *tesspoly_outline(C_OUTLINE *c_outline); +TESSLINE* ApproximateOutline(C_OUTLINE *c_outline); EDGEPT *edgesteps_to_edgepts ( //convert outline C_OUTLINE * c_outline, //input EDGEPT edgepts[] //output is array @@ -44,8 +43,4 @@ void cutline( //recursive refine EDGEPT *last, int area /*area of object */ ); -#define fixed_dist 20 //really an int_variable -#define point_diff(p,p1,p2) (p).x = (p1).x - (p2).x ; (p).y = (p1).y - (p2).y -#define CROSS(a,b) ((a).x * (b).y - (a).y * (b).x) -#define LENGTH(a) ((a).x * (a).x + (a).y * (a).y) #endif diff --git a/ccstruct/polyblk.cpp b/ccstruct/polyblk.cpp index e071698fd0..a0ebe96b05 100644 --- a/ccstruct/polyblk.cpp +++ b/ccstruct/polyblk.cpp @@ -46,6 +46,19 @@ POLY_BLOCK::POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType t) { type = t; } +// Initialize from box coordinates. +POLY_BLOCK::POLY_BLOCK(const TBOX& box, PolyBlockType t) { + vertices.clear(); + ICOORDELT_IT v = &vertices; + v.move_to_first(); + v.add_to_end(new ICOORDELT(box.left(), box.top())); + v.add_to_end(new ICOORDELT(box.left(), box.top() + box.height())); + v.add_to_end(new ICOORDELT(box.left() + box.width(), + box.top() + box.height())); + v.add_to_end(new ICOORDELT(box.left(), box.top() + box.height())); + compute_bb(); + type = t; +} /** * @name POLY_BLOCK::compute_bb @@ -196,7 +209,7 @@ void POLY_BLOCK::rotate(FCOORD rotation) { * POLY_BLOCK::move * * Move the POLY_BLOCK. - * @param shift cos, sin of angle + * @param shift x,y translation vector */ void POLY_BLOCK::move(ICOORD shift) { @@ -393,24 +406,24 @@ void POLY_BLOCK::de_serialise_asc(FILE *f) { /// Returns a color to draw the given type. ScrollView::Color POLY_BLOCK::ColorForPolyBlockType(PolyBlockType type) { + // Keep kPBColors in sync with PolyBlockType. const ScrollView::Color kPBColors[PT_COUNT] = { - ScrollView::WHITE, - ScrollView::BLUE, - ScrollView::CYAN, - ScrollView::MEDIUM_BLUE, - ScrollView::MAGENTA, - ScrollView::YELLOW, - ScrollView::RED, - ScrollView::MAROON, - ScrollView::ORANGE, - ScrollView::GREEN, - ScrollView::LIME_GREEN, - ScrollView::DARK_GREEN, - ScrollView::GREY + ScrollView::WHITE, // Type is not yet known. Keep as the 1st element. + ScrollView::BLUE, // Text that lives inside a column. + ScrollView::CYAN, // Text that spans more than one column. + ScrollView::MEDIUM_BLUE, // Text that is in a cross-column pull-out region. + ScrollView::MAGENTA, // Partition belonging to a table region. + ScrollView::GREEN, // Text-line runs vertically. + ScrollView::LIGHT_BLUE, // Text that belongs to an image. + ScrollView::RED, // Image that lives inside a column. + ScrollView::YELLOW, // Image that spans more than one column. + ScrollView::ORANGE, // Image in a cross-column pull-out region. + ScrollView::BROWN, // Horizontal Line. + ScrollView::DARK_GREEN, // Vertical Line. + ScrollView::GREY // Lies outside of any column. }; if (type >= 0 && type < PT_COUNT) { return kPBColors[type]; } return ScrollView::WHITE; } - diff --git a/ccstruct/polyblk.h b/ccstruct/polyblk.h index aeb6ddb491..0749645c92 100644 --- a/ccstruct/polyblk.h +++ b/ccstruct/polyblk.h @@ -19,37 +19,20 @@ #ifndef POLYBLK_H #define POLYBLK_H -#include "rect.h" -#include "points.h" -#include "scrollview.h" -#include "elst.h" +#include "publictypes.h" +#include "elst.h" +#include "points.h" +#include "rect.h" +#include "scrollview.h" #include "hpddef.h" // must be last (handpd.dll) -// Possible types for a POLY_BLOCK or ColPartition. Must be kept in sync with -// kPBColors. Used extensively by ColPartition, but polyblk is a lower-level -// file. -enum PolyBlockType { - PT_UNKNOWN, // Type is not yet known. Keep as the first element. - PT_FLOWING_TEXT, // Text that lives inside a column. - PT_HEADING_TEXT, // Text that spans more than one column. - PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. - PT_TABLE, // Partition belonging to a table region. - PT_VERTICAL_TEXT, // Text-line runs vertically. - PT_FLOWING_IMAGE, // Image that lives inside a column. - PT_HEADING_IMAGE, // Image that spans more than one column. - PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. - PT_FLOWING_LINE, // H-Line that lives inside a column. - PT_HEADING_LINE, // H-Line that spans more than one column. - PT_PULLOUT_LINE, // H-Line that is in a cross-column pull-out region. - PT_NOISE, // Lies outside of any column. - PT_COUNT -}; - class DLLSYM POLY_BLOCK { public: POLY_BLOCK() { } + // Initialize from box coordinates. + POLY_BLOCK(const TBOX& box, PolyBlockType type); POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType type); ~POLY_BLOCK () { } @@ -69,7 +52,7 @@ class DLLSYM POLY_BLOCK { } bool IsText() const { - return IsTextType(type); + return PTIsTextType(type); } // Rotate about the origin by the given rotation. (Analogous to @@ -112,23 +95,6 @@ class DLLSYM POLY_BLOCK { // Returns a color to draw the given type. static ScrollView::Color ColorForPolyBlockType(PolyBlockType type); - // Returns true if PolyBlockType is of horizontal line type - static bool IsLineType(PolyBlockType type) { - return (type == PT_FLOWING_LINE) || (type == PT_HEADING_LINE) || - (type == PT_PULLOUT_LINE); - } - // Returns true if PolyBlockType is of image type - static bool IsImageType(PolyBlockType type) { - return (type == PT_FLOWING_IMAGE) || (type == PT_HEADING_IMAGE) || - (type == PT_PULLOUT_IMAGE); - } - // Returns true if PolyBlockType is of text type - static bool IsTextType(PolyBlockType type) { - return (type == PT_FLOWING_TEXT) || (type == PT_HEADING_TEXT) || - (type == PT_PULLOUT_TEXT) || (type == PT_TABLE) || - (type == PT_VERTICAL_TEXT); - } - private: ICOORDELT_LIST vertices; // vertices TBOX box; // bounding box diff --git a/ccstruct/polyblob.cpp b/ccstruct/polyblob.cpp index d080976f39..0cad60d605 100644 --- a/ccstruct/polyblob.cpp +++ b/ccstruct/polyblob.cpp @@ -18,7 +18,7 @@ **********************************************************************/ #include "mfcpch.h" -#include "varable.h" +#include "params.h" #include "ocrrow.h" #include "polyblob.h" //#include "lapoly.h" @@ -31,9 +31,6 @@ #define EXTERN -EXTERN BOOL_VAR (polygon_tess_approximation, TRUE, -"Do tess poly instead of greyscale"); - ELISTIZE_S (PBLOB) /********************************************************************** * position_outline @@ -146,56 +143,41 @@ PBLOB::PBLOB( //constructor /********************************************************************** * approximate_outline_list * - * Convert a list of outlines to polygonal form. + * Convert a list of chain-coded outlines (srclist) to polygonal form. **********************************************************************/ -static void approximate_outline_list( //do list of outlines - C_OUTLINE_LIST *srclist, //list to convert - OUTLINE_LIST *destlist, //desstination list - float xheight //height of line - ) { - C_OUTLINE *src_outline; //outline from src list - OUTLINE *dest_outline; //result - C_OUTLINE_IT src_it = srclist; //source iterator - OUTLINE_IT dest_it = destlist; //iterator +static void approximate_outline_list(C_OUTLINE_LIST *srclist, + OUTLINE_LIST *destlist) { + C_OUTLINE *src_outline; // outline from src list + OUTLINE *dest_outline; // result + C_OUTLINE_IT src_it = srclist; // source iterator + OUTLINE_IT dest_it = destlist; // iterator do { src_outline = src_it.data (); - // if (polygon_tess_approximation) - dest_outline = tesspoly_outline (src_outline, xheight); - // else - // dest_outline=greypoly_outline(src_outline,xheight); + dest_outline = tesspoly_outline(src_outline); if (dest_outline != NULL) { - dest_it.add_after_then_move (dest_outline); - if (!src_outline->child ()->empty ()) - //do child list - approximate_outline_list (src_outline->child (), dest_outline->child (), xheight); + dest_it.add_after_then_move(dest_outline); + if (!src_outline->child()->empty()) + // do child list + approximate_outline_list(src_outline->child(), dest_outline->child()); } - src_it.forward (); + src_it.forward(); } - while (!src_it.at_first ()); + while (!src_it.at_first()); } /********************************************************************** * PBLOB::PBLOB * - * Constructor to build a PBLOB from a C_BLOB by polygonal approximation. + * Constructor to build a PBLOB (polygonal blob) from a C_BLOB + * (chain-coded blob) by polygonal approximation. **********************************************************************/ -PBLOB::PBLOB( //constructor - C_BLOB *cblob, //compact blob - float xheight //height of line - ) { - TBOX bbox; //bounding box - +PBLOB::PBLOB(C_BLOB *cblob) { if (!cblob->out_list ()->empty ()) { - //get bounding box - bbox = cblob->bounding_box (); - if (bbox.height () > xheight) - xheight = bbox.height (); //max of line and blob - //copy it - approximate_outline_list (cblob->out_list (), &outlines, xheight); + approximate_outline_list (cblob->out_list (), &outlines); } } @@ -253,12 +235,12 @@ PBLOB *PBLOB::baseline_normalise( //normalize blob float x_centre = (blob_box.left () + blob_box.right ()) / 2.0; PBLOB *bn_blob; //copied blob - *denorm = DENORM (x_centre, bln_x_height / row->x_height (), row); + *denorm = DENORM (x_centre, kBlnXHeight / row->x_height (), row); bn_blob = new PBLOB; //get one *bn_blob = *this; //deep copy bn_blob->move (FCOORD (-denorm->origin (), -row->base_line (x_centre))); bn_blob->scale (denorm->scale ()); - bn_blob->move (FCOORD (0.0, bln_baseline_offset)); + bn_blob->move (FCOORD (0.0, kBlnBaselineOffset)); return bn_blob; } @@ -269,18 +251,12 @@ PBLOB *PBLOB::baseline_normalise( //normalize blob * DeBaseline Normalise the blob properly with the given denorm. **********************************************************************/ -void PBLOB::baseline_denormalise( // Tess style BL Norm - const DENORM *denorm //antidote - ) { - float blob_x_left; // Left edge of blob. - TBOX blob_box; //blob bounding box - - move(FCOORD (0.0f, 0.0f - bln_baseline_offset)); - blob_box = bounding_box (); - blob_x_left = blob_box.left (); - scale (1.0 / denorm->scale_at_x (blob_x_left)); - move (FCOORD (denorm->origin (), - denorm->yshift_at_x (blob_x_left))); +void PBLOB::baseline_denormalise(const DENORM *denorm ) { + move(FCOORD(0.0f, 0.0f - kBlnBaselineOffset)); + TBOX blob_box = bounding_box(); + float blob_x_centre = (blob_box.left() + blob_box.right()) / 2.0f; + scale(1.0 / denorm->scale_at_x(blob_x_centre)); + move(FCOORD(denorm->origin(), denorm->yshift_at_x(blob_x_centre))); } diff --git a/ccstruct/polyblob.h b/ccstruct/polyblob.h index 790657efd9..334ab234ba 100644 --- a/ccstruct/polyblob.h +++ b/ccstruct/polyblob.h @@ -30,13 +30,13 @@ const int kBlnBaselineOffset = 64; // offset for baseline normalization class PBLOB : public ELIST_LINK { public: - PBLOB() { - } //empty constructor - PBLOB( //constructor - OUTLINE_LIST *outline_list); //in random order - PBLOB( //constructor - C_BLOB *cblob, //polygonal approx - float xheight); + PBLOB() {} + + // Create from a list of polygonal outlines. + PBLOB(OUTLINE_LIST *outline_list); + + // Create from a chain-coded form. + PBLOB(C_BLOB *cblob); OUTLINE_LIST *out_list() { //get outline list return &outlines; diff --git a/ccstruct/publictypes.cpp b/ccstruct/publictypes.cpp new file mode 100644 index 0000000000..e6795960b2 --- /dev/null +++ b/ccstruct/publictypes.cpp @@ -0,0 +1,38 @@ +/////////////////////////////////////////////////////////////////////// +// File: publictypes.cpp +// Description: Types used in both the API and internally +// Author: Ray Smith +// Created: Wed Mar 03 11:17:09 PST 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "publictypes.h" + +// String name for each block type. Keep in sync with PolyBlockType. +const char* kPolyBlockNames[] = { + "Unknown", + "Flowing Text", + "Heading Text", + "Pullout Text", + "Table", + "Vertical Text", + "Caption Text", + "Flowing Image", + "Heading Image", + "Pullout Image", + "Horizontal Line", + "Vertical Line", + "Noise", + "" // End marker for testing that sizes match. +}; diff --git a/ccstruct/publictypes.h b/ccstruct/publictypes.h new file mode 100644 index 0000000000..4f5373a2a8 --- /dev/null +++ b/ccstruct/publictypes.h @@ -0,0 +1,148 @@ +/////////////////////////////////////////////////////////////////////// +// File: publictypes.h +// Description: Types used in both the API and internally +// Author: Ray Smith +// Created: Wed Mar 03 09:22:53 PST 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H__ +#define TESSERACT_CCSTRUCT_PUBLICTYPES_H__ + +// This file contains types that are used both by the API and internally +// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic +// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT. +// Restated: It is OK for low-level Tesseract files to include publictypes.h, +// but not for the low-level tesseract code to include top-level API code. +// This file should not use other Tesseract types, as that would drag +// their includes into the API-level. +// API-level code should include apitypes.h in preference to this file. + +// Number of printers' points in an inch. The unit of the pointsize return. +const int kPointsPerInch = 72; + +// Possible types for a POLY_BLOCK or ColPartition. +// Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions +// below, as well as kPolyBlockNames in publictypes.cpp. +// Used extensively by ColPartition, and POLY_BLOCK. +enum PolyBlockType { + PT_UNKNOWN, // Type is not yet known. Keep as the first element. + PT_FLOWING_TEXT, // Text that lives inside a column. + PT_HEADING_TEXT, // Text that spans more than one column. + PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. + PT_TABLE, // Partition belonging to a table region. + PT_VERTICAL_TEXT, // Text-line runs vertically. + PT_CAPTION_TEXT, // Text that belongs to an image. + PT_FLOWING_IMAGE, // Image that lives inside a column. + PT_HEADING_IMAGE, // Image that spans more than one column. + PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. + PT_HORZ_LINE, // Horizontal Line. + PT_VERT_LINE, // Vertical Line. + PT_NOISE, // Lies outside of any column. + PT_COUNT +}; + + +// Returns true if PolyBlockType is of horizontal line type +inline bool PTIsLineType(PolyBlockType type) { + return type == PT_HORZ_LINE || type == PT_VERT_LINE; +} +// Returns true if PolyBlockType is of image type +inline bool PTIsImageType(PolyBlockType type) { + return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE || + type == PT_PULLOUT_IMAGE; +} +// Returns true if PolyBlockType is of text type +inline bool PTIsTextType(PolyBlockType type) { + return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT || + type == PT_PULLOUT_TEXT || type == PT_TABLE || + type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT; +} + +// String name for each block type. Keep in sync with PolyBlockType. +extern const char* kPolyBlockNames[]; + +namespace tesseract { + +// Possible modes for page layout analysis. These *must* be kept in order +// of decreasing amount of layout analysis to be done, except for OSD_ONLY, +// so that the inequality test macros below work. +enum PageSegMode { + PSM_OSD_ONLY, ///< Orientation and script detection only. + PSM_AUTO_OSD, ///< Automatic page segmentation with orientation and + ///< script detection. (OSD) + PSM_AUTO, ///< Fully automatic page segmentation, but no OSD. + PSM_SINGLE_COLUMN, ///< Assume a single column of text of variable sizes. + PSM_SINGLE_BLOCK_VERT_TEXT, ///< Assume a single uniform block of vertically + ///< aligned text. + PSM_SINGLE_BLOCK, ///< Assume a single uniform block of text. (Default.) + PSM_SINGLE_LINE, ///< Treat the image as a single text line. + PSM_SINGLE_WORD, ///< Treat the image as a single word. + PSM_CIRCLE_WORD, ///< Treat the image as a single word in a circle. + PSM_SINGLE_CHAR, ///< Treat the image as a single character. + + PSM_COUNT ///< Number of enum entries. +}; + +// Macros that act on a PageSegMode to determine whether components of +// layout analysis are enabled. +// *Depend critically on the order of elements of PageSegMode.* +#define PSM_OSD_ENABLED(pageseg_mode) ((pageseg_mode) <= PSM_AUTO_OSD) +#define PSM_COL_FIND_ENABLED(pageseg_mode) \ + ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_AUTO) +#define PSM_BLOCK_FIND_ENABLED(pageseg_mode) \ + ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_COLUMN) +#define PSM_LINE_FIND_ENABLED(pageseg_mode) \ + ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_BLOCK) +#define PSM_WORD_FIND_ENABLED(pageseg_mode) \ + ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_LINE) + +// enum of the elements of the page hierarchy, used in ResultIterator +// to provide functions that operate on each level without having to +// have 5x as many functions. +// NOTE: At present RIL_PARA and RIL_BLOCK are equivalent as there is +// no paragraph internally yet. +// TODO(rays) Add paragraph detection. +enum PageIteratorLevel { + RIL_BLOCK, // Block of text/image/separator line. + RIL_PARA, // Paragraph within a block. + RIL_TEXTLINE, // Line within a paragraph. + RIL_WORD, // Word within a textline. + RIL_SYMBOL // Symbol/character within a word. +}; + +// When Tesseract/Cube is initialized we can choose to instantiate/load/run +// only the Tesseract part, only the Cube part or both along with the combiner. +// The preference of which engine to use is stored in tessedit_ocr_engine_mode. +// +// ATTENTION: When modifying this enum, please make sure to make the +// appropriate changes to all the enums mirroring it (e.g. OCREngine in +// cityblock/workflow/detection/detection_storage.proto). Such enums will +// mention the connection to OcrEngineMode in the comments. +enum OcrEngineMode { + OEM_TESSERACT_ONLY, // Run Tesseract only - fastest + OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower + OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy + OEM_DEFAULT // Specify this mode when calling init_*(), + // to indicate that any of the above modes + // should be automatically inferred from the + // variables in the language-specific config, + // command-line configs, or if not specified + // in any of the above should be set to the + // default OEM_TESSERACT_ONLY. +}; + +} // namespace tesseract. + +#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H__ diff --git a/ccstruct/quspline.cpp b/ccstruct/quspline.cpp index 5fd4e79d05..22609b41c6 100644 --- a/ccstruct/quspline.cpp +++ b/ccstruct/quspline.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: quspline.cpp (Formerly qspline.c) * Description: Code for the QSPLINE class. - * Author: Ray Smith - * Created: Tue Oct 08 17:16:12 BST 1991 + * Author: Ray Smith + * Created: Tue Oct 08 17:16:12 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -377,9 +377,9 @@ void QSPLINE::plot( //draw it x = xcoords[segment]; for (step = 0; step <= QSPLINE_PRECISION; step++) { if (segment == 0 && step == 0) - window->SetCursor(x, quadratics[segment].y (x)); + window->SetCursor(x, quadratics[segment].y (x)); else - window->DrawTo(x, quadratics[segment].y (x)); + window->DrawTo(x, quadratics[segment].y (x)); x += increment; } } diff --git a/ccstruct/quspline.h b/ccstruct/quspline.h index 7a16046259..edb81afc47 100644 --- a/ccstruct/quspline.h +++ b/ccstruct/quspline.h @@ -37,7 +37,7 @@ class QSPLINE QSPLINE *, float); friend void make_holed_baseline(TBOX *, int, QSPLINE *, QSPLINE *, float); - friend void tweak_row_baseline(ROW *); + friend void tweak_row_baseline(ROW *, double, double); public: QSPLINE() { //empty constructor segments = 0; diff --git a/ccstruct/ratngs.cpp b/ccstruct/ratngs.cpp index 30e4db2a8a..c64cc28f44 100644 --- a/ccstruct/ratngs.cpp +++ b/ccstruct/ratngs.cpp @@ -18,16 +18,15 @@ **********************************************************************/ #include "mfcpch.h" - #include "ratngs.h" + #include "callcpp.h" #include "genericvector.h" #include "unicharset.h" -extern FILE *matcher_fp; - ELISTIZE (BLOB_CHOICE) CLISTIZE (BLOB_CHOICE_LIST) CLISTIZE (WERD_CHOICE) -//extern FILE* matcher_fp; + +const float WERD_CHOICE::kBadRating = 100000.0; /** * BLOB_CHOICE::BLOB_CHOICE @@ -37,14 +36,17 @@ ELISTIZE (BLOB_CHOICE) CLISTIZE (BLOB_CHOICE_LIST) CLISTIZE (WERD_CHOICE) BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, //< character id float src_rating, //< rating float src_cert, //< certainty - inT8 src_config, //< config (font) + inT16 src_config, //< config (font) + inT16 src_config2, //< 2nd choice config. int src_script_id //< script ) { unichar_id_ = src_unichar_id; rating_ = src_rating; certainty_ = src_cert; config_ = src_config; + config2_ = src_config2; script_id_ = src_script_id; + language_model_state_ = NULL; } /** @@ -57,7 +59,9 @@ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) { rating_ = other.rating(); certainty_ = other.certainty(); config_ = other.config(); + config2_ = other.config2(); script_id_ = other.script_id(); + language_model_state_ = NULL; } /** @@ -69,12 +73,12 @@ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) { WERD_CHOICE::WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset) { STRING src_lengths; - int len = strlen(src_string); const char *ptr = src_string; + const char *end = src_string + strlen(src_string); int step = unicharset.step(ptr); - for (; ptr < src_string + len && step > 0; + for (; ptr < end && step > 0; step = unicharset.step(ptr), src_lengths += step, ptr += step); - if (step != 0 && ptr == src_string + len) { + if (step != 0 && ptr == end) { this->init(src_string, src_lengths.string(), 0.0, 0.0, NO_PERM, unicharset); } else { // there must have been an invalid unichar in the string @@ -395,13 +399,8 @@ void print_ratings_list(const char *msg, BLOB_CHOICE_IT c_it; c_it.set_to_list(ratings); for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { - tprintf("r%.2f c%.2f : %d %s", - c_it.data()->rating(), c_it.data()->certainty(), - c_it.data()->unichar_id(), - current_unicharset.debug_str(c_it.data()->unichar_id()).string()); - if (!c_it.at_last()) { - tprintf("\n"); - } + c_it.data()->print(¤t_unicharset); + if (!c_it.at_last()) tprintf("\n"); } tprintf("\n"); fflush(stdout); @@ -423,11 +422,8 @@ void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings) { BLOB_CHOICE_IT c_it; c_it.set_to_list(ratings); for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { - tprintf("r%.2f c%.2f : %d", c_it.data()->rating(), - c_it.data()->certainty(), c_it.data()->unichar_id()); - if (!c_it.at_last()) { - tprintf("\n"); - } + c_it.data()->print(NULL); + if (!c_it.at_last()) tprintf("\n"); } tprintf("\n"); fflush(stdout); @@ -447,9 +443,6 @@ void print_ratings_info(FILE *fp, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset) { inT32 index; // to list - inT32 best_index; // to list - FLOAT32 best_rat; // rating - FLOAT32 best_cert; // certainty const char* first_char = NULL; // character FLOAT32 first_rat; // rating FLOAT32 first_cert; // certainty @@ -478,25 +471,12 @@ void print_ratings_info(FILE *fp, first_rat = -1; first_cert = -1; } - best_index = -1; - best_rat = -1; - best_cert = -1; - for (index = 0, c_it.mark_cycle_pt(); !c_it.cycled_list(); - c_it.forward(), index++) { - if (strcmp(current_unicharset.id_to_unichar(c_it.data()->unichar_id()), - blob_answer) == 0) { - best_index = index; - best_rat = c_it.data()->rating(); - best_cert = -c_it.data()->certainty(); - } - } if (first_char != NULL && (*first_char == '\0' || *first_char == ' ')) first_char = NULL; if (sec_char != NULL && (*sec_char == '\0' || *sec_char == ' ')) sec_char = NULL; - fprintf(matcher_fp, - " " INT32FORMAT " " INT32FORMAT " %g %g %s %g %g %s %g %g\n", - ratings->length(), best_index, best_rat, best_cert, + tprintf(" " INT32FORMAT " %s %g %g %s %g %g\n", + ratings->length(), first_char != NULL ? first_char : "~", first_rat, first_cert, sec_char != NULL ? sec_char : "~", sec_rat, sec_cert); @@ -513,9 +493,9 @@ void print_char_choices_list(const char *msg, for (int x = 0; x < char_choices.length(); ++x) { BLOB_CHOICE_IT c_it; c_it.set_to_list(char_choices.get(x)); - tprintf("char[%d]: %s\n", x, + tprintf("\nchar[%d]: %s\n", x, current_unicharset.debug_str( c_it.data()->unichar_id()).string()); if (detailed) - print_ratings_list(" ", char_choices.get(x), current_unicharset); + print_ratings_list("", char_choices.get(x), current_unicharset); } } diff --git a/ccstruct/ratngs.h b/ccstruct/ratngs.h index a72de82aef..9855f3f4ca 100644 --- a/ccstruct/ratngs.h +++ b/ccstruct/ratngs.h @@ -38,14 +38,17 @@ class BLOB_CHOICE: public ELIST_LINK rating_ = MAX_FLOAT32; certainty_ = -MAX_FLOAT32; script_id_ = -1; + language_model_state_ = NULL; } BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id float src_rating, // rating float src_cert, // certainty - inT8 src_config, // config (font) + inT16 src_config, // config (font) + inT16 src_config2, // 2nd choice config. int script_id); // script BLOB_CHOICE(const BLOB_CHOICE &other); ~BLOB_CHOICE() {} + UNICHAR_ID unichar_id() const { return unichar_id_; } @@ -55,12 +58,18 @@ class BLOB_CHOICE: public ELIST_LINK float certainty() const { return certainty_; } - inT8 config() const { + inT16 config() const { return config_; } + inT16 config2() const { + return config2_; + } int script_id() const { return script_id_; } + void *language_model_state() { + return language_model_state_; + } void set_unichar_id(UNICHAR_ID newunichar_id) { unichar_id_ = newunichar_id; @@ -71,27 +80,44 @@ class BLOB_CHOICE: public ELIST_LINK void set_certainty(float newrat) { certainty_ = newrat; } - void set_config(inT8 newfont) { + void set_config(inT16 newfont) { config_ = newfont; } + void set_config2(inT16 newfont) { + config2_ = newfont; + } void set_script(int newscript_id) { script_id_ = newscript_id; } + void set_language_model_state(void *language_model_state) { + language_model_state_ = language_model_state; + } static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) { BLOB_CHOICE* choice = new BLOB_CHOICE; *choice = *src; return choice; } + void print(const UNICHARSET *unicharset) { + tprintf("r%.2f c%.2f : %d %s", rating_, certainty_, unichar_id_, + (unicharset == NULL) ? "" : + unicharset->debug_str(unichar_id_).string()); + } NEWDELETE private: UNICHAR_ID unichar_id_; // unichar id - char config_; // char config (font) + inT16 config_; // char config (font) + inT16 config2_; // 2nd choice config (font) inT16 junk2_; float rating_; // size related float certainty_; // absolute int script_id_; + // Stores language model information about this BLOB_CHOICE. Used during + // the segmentation search for BLOB_CHOICEs in BLOB_CHOICE_LISTs that are + // recorded in the ratings matrix. + // The pointer is owned/managed by the segmentation search. + void *language_model_state_; }; // Make BLOB_CHOICE listable. @@ -99,21 +125,25 @@ ELISTIZEH (BLOB_CHOICE) CLISTIZEH (BLOB_CHOICE_LIST) // Permuter codes used in WERD_CHOICEs. enum PermuterType { - NO_PERM, // 0 - PUNC_PERM, // 1 - TOP_CHOICE_PERM, // 2 - LOWER_CASE_PERM, // 3 - UPPER_CASE_PERM, // 4 - NUMBER_PERM, // 5 - SYSTEM_DAWG_PERM, // 6 - DOC_DAWG_PERM, // 7 - USER_DAWG_PERM, // 8 - FREQ_DAWG_PERM, // 9 - COMPOUND_PERM, // 10 + NO_PERM, // 0 + PUNC_PERM, // 1 + TOP_CHOICE_PERM, // 2 + LOWER_CASE_PERM, // 3 + UPPER_CASE_PERM, // 4 + NGRAM_PERM, // 5 + NUMBER_PERM, // 6 + USER_PATTERN_PERM, // 7 + SYSTEM_DAWG_PERM, // 8 + DOC_DAWG_PERM, // 9 + USER_DAWG_PERM, // 10 + FREQ_DAWG_PERM, // 11 + COMPOUND_PERM, // 12 }; class WERD_CHOICE { public: + static const float kBadRating; + WERD_CHOICE() { this->init(8); } WERD_CHOICE(int reserved) { this->init(reserved); } WERD_CHOICE(const char *src_string, @@ -168,6 +198,10 @@ class WERD_CHOICE { assert(index < length_); unichar_ids_[index] = unichar_id; } + inline void set_fragment_length(char flen, int index) { + assert(index < length_); + fragment_lengths_[index] = flen; + } inline void set_rating(float new_val) { rating_ = new_val; } @@ -180,6 +214,13 @@ class WERD_CHOICE { inline void set_fragment_mark(bool new_fragment_mark) { fragment_mark_ = new_fragment_mark; } + // Note: this function should only be used if all the fields + // are populated manually with set_* functions (rather than + // (copy)constructors and append_* functions). + inline void set_length(int len) { + ASSERT_HOST(reserved_ >= len); + length_ = len; + } void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices); /// Make more space in unichar_id_ and fragment_lengths_ arrays. @@ -219,7 +260,7 @@ class WERD_CHOICE { /// Set the fields in this choice to be default (bad) values. inline void make_bad() { length_ = 0; - rating_ = MAX_FLOAT32; + rating_ = kBadRating; certainty_ = -MAX_FLOAT32; fragment_mark_ = false; unichar_string_ = ""; @@ -323,9 +364,6 @@ CLISTIZEH (WERD_CHOICE) typedef GenericVector BLOB_CHOICE_LIST_VECTOR; typedef GenericVector WERD_CHOICE_LIST_VECTOR; -typedef void (*POLY_TESTER) (const STRING&, PBLOB *, DENORM *, BOOL8, - char *, inT32, BLOB_CHOICE_LIST *); - void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings); void print_ratings_list( const char *msg, // intro message diff --git a/ccstruct/rect.h b/ccstruct/rect.h index a2fc40a11d..d2da28e7df 100644 --- a/ccstruct/rect.h +++ b/ccstruct/rect.h @@ -46,6 +46,10 @@ class DLLSYM TBOX { // bounding box return ((left () >= right ()) || (top () <= bottom ())); } + bool operator==(const TBOX& other) { + return bot_left == other.bot_left && top_right == other.top_right; + } + inT16 top() const { // coord of top return top_right.y (); } @@ -192,6 +196,22 @@ class DLLSYM TBOX { // bounding box // Do boxes overlap on x axis. bool x_overlap(const TBOX &box) const; + // Return the horizontal gap between the boxes. If the boxes + // overlap horizontally then the return value is negative, indicating + // the amount of the overlap. + int x_gap(const TBOX& box) const { + return MAX(bot_left.x(), box.bot_left.x()) - + MIN(top_right.x(), box.top_right.x()); + } + + // Return the vertical gap between the boxes. If the boxes + // overlap vertically then the return value is negative, indicating + // the amount of the overlap. + int y_gap(const TBOX& box) const { + return MAX(bot_left.y(), box.bot_left.y()) - + MIN(top_right.y(), box.top_right.y()); + } + // Do boxes overlap on x axis by more than // half of the width of the narrower box. bool major_x_overlap(const TBOX &box) const; @@ -206,12 +226,26 @@ class DLLSYM TBOX { // bounding box // fraction of current box's area covered by other double overlap_fraction(const TBOX &box) const; + // fraction of the current box's projected area covered by the other's + double x_overlap_fraction(const TBOX& box) const; + + // fraction of the current box's projected area covered by the other's + double y_overlap_fraction(const TBOX& box) const; + TBOX intersection( // shared area box const TBOX &box) const; TBOX bounding_union( // box enclosing both const TBOX &box) const; + // Sets the box boundaries to the given coordinates. + void set_to_given_coords(int x_min, int y_min, int x_max, int y_max) { + bot_left.set_x(x_min); + bot_left.set_y(y_min); + top_right.set_x(x_max); + top_right.set_y(y_max); + } + void print() const { // print tprintf("Bounding box=(%d,%d)->(%d,%d)\n", left(), bottom(), right(), top()); @@ -233,7 +267,7 @@ class DLLSYM TBOX { // bounding box friend DLLSYM TBOX & operator+= (TBOX &, const TBOX &); // in place union friend DLLSYM TBOX & operator-= (TBOX &, const TBOX &); - // in place intrsection + // in place intersection void serialise_asc( // convert to ascii FILE *f); @@ -379,4 +413,49 @@ inline bool TBOX::major_y_overlap(const TBOX &box) const { } return (overlap >= box.height() / 2 || overlap >= this->height() / 2); } + +/********************************************************************** + * TBOX::x_overlap_fraction() Calculates the horizontal overlap of the + * given boxes as a fraction of this boxes + * width. + * + **********************************************************************/ + +inline double TBOX::x_overlap_fraction(const TBOX& other) const { + int low = MAX(left(), other.left()); + int high = MIN(right(), other.right()); + int width = right() - left(); + if (width == 0) { + int x = left(); + if (other.left() <= x && x <= other.right()) + return 1.0; + else + return 0.0; + } else { + return MAX(0, static_cast(high - low) / width); + } +} + +/********************************************************************** + * TBOX::y_overlap_fraction() Calculates the vertical overlap of the + * given boxes as a fraction of this boxes + * height. + * + **********************************************************************/ + +inline double TBOX::y_overlap_fraction(const TBOX& other) const { + int low = MAX(bottom(), other.bottom()); + int high = MIN(top(), other.top()); + int height = top() - bottom(); + if (height == 0) { + int y = bottom(); + if (other.bottom() <= y && y <= other.top()) + return 1.0; + else + return 0.0; + } else { + return MAX(0, static_cast(high - low) / height); + } +} + #endif diff --git a/ccstruct/rejctmap.cpp b/ccstruct/rejctmap.cpp index 8402ffc35d..236d9d7a85 100644 --- a/ccstruct/rejctmap.cpp +++ b/ccstruct/rejctmap.cpp @@ -22,13 +22,7 @@ //#include "basefile.h" #include "rejctmap.h" #include "secname.h" - -#define EXTERN - -EXTERN BOOL_VAR (rejword_only_set_if_accepted, TRUE, "Mimic old reject_word"); -EXTERN BOOL_VAR (rejmap_allow_more_good_qual, FALSE, -"Use initial good qual setting"); -EXTERN BOOL_VAR (rej_use_1Il_rej, TRUE, "1Il rejection enabled"); +#include "params.h" BOOL8 REJ::perm_rejected() { //Is char perm reject? return (flag (R_TESS_FAILURE) || @@ -96,10 +90,9 @@ BOOL8 REJ::accept_if_good_quality() { //potential rej? !flag (R_POOR_MATCH) && !flag (R_NOT_TESS_ACCEPTED) && !flag (R_CONTAINS_BLANKS) && - (rejmap_allow_more_good_qual || (!rej_between_nn_and_mm () && - !rej_between_mm_and_quality_accept () && - !rej_between_quality_and_minimal_rej_accept ()))); + !rej_between_mm_and_quality_accept () && + !rej_between_quality_and_minimal_rej_accept ())); } @@ -119,8 +112,7 @@ void REJ::setrej_edge_char() { //Close to image edge void REJ::setrej_1Il_conflict() { //Initial reject map - if (rej_use_1Il_rej) - set_flag(R_1IL_CONFLICT); + set_flag(R_1IL_CONFLICT); } @@ -449,8 +441,7 @@ void REJMAP::rej_word_not_tess_accepted() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_not_tess_accepted (); + if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted(); } } @@ -459,8 +450,7 @@ void REJMAP::rej_word_contains_blanks() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_contains_blanks (); + if (ptr[i].accepted()) ptr[i].setrej_contains_blanks(); } } @@ -469,8 +459,7 @@ void REJMAP::rej_word_bad_permuter() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_bad_permuter (); + if (ptr[i].accepted()) ptr[i].setrej_bad_permuter (); } } @@ -479,8 +468,7 @@ void REJMAP::rej_word_xht_fixup() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_xht_fixup (); + if (ptr[i].accepted()) ptr[i].setrej_xht_fixup(); } } @@ -489,8 +477,7 @@ void REJMAP::rej_word_no_alphanums() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_no_alphanums (); + if (ptr[i].accepted()) ptr[i].setrej_no_alphanums(); } } @@ -499,8 +486,7 @@ void REJMAP::rej_word_mostly_rej() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_mostly_rej (); + if (ptr[i].accepted()) ptr[i].setrej_mostly_rej(); } } @@ -509,8 +495,7 @@ void REJMAP::rej_word_bad_quality() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_bad_quality (); + if (ptr[i].accepted()) ptr[i].setrej_bad_quality(); } } @@ -519,8 +504,7 @@ void REJMAP::rej_word_doc_rej() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_doc_rej (); + if (ptr[i].accepted()) ptr[i].setrej_doc_rej(); } } @@ -529,8 +513,7 @@ void REJMAP::rej_word_block_rej() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_block_rej (); + if (ptr[i].accepted()) ptr[i].setrej_block_rej(); } } @@ -539,7 +522,6 @@ void REJMAP::rej_word_row_rej() { //Reject whole word int i; for (i = 0; i < len; i++) { - if (!rejword_only_set_if_accepted || ptr[i].accepted ()) - ptr[i].setrej_row_rej (); + if (ptr[i].accepted()) ptr[i].setrej_row_rej(); } } diff --git a/ccstruct/rejctmap.h b/ccstruct/rejctmap.h index eff8eacfd8..3954cf4c2b 100644 --- a/ccstruct/rejctmap.h +++ b/ccstruct/rejctmap.h @@ -46,15 +46,9 @@ OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!! #endif #include "memry.h" #include "bits16.h" -#include "varable.h" +#include "params.h" #include "notdll.h" -extern BOOL_VAR_H (rejword_only_set_if_accepted, TRUE, -"Mimic old reject_word"); -extern BOOL_VAR_H (rejmap_allow_more_good_qual, FALSE, -"Use initial good qual setting"); -extern BOOL_VAR_H (rej_use_1Il_rej, TRUE, "1Il rejection enabled"); - enum REJ_FLAGS { /* Reject modes which are NEVER overridden */ diff --git a/wordrec/seam.cpp b/ccstruct/seam.cpp similarity index 50% rename from wordrec/seam.cpp rename to ccstruct/seam.cpp index 049c7c6bf5..ddfea81336 100644 --- a/wordrec/seam.cpp +++ b/ccstruct/seam.cpp @@ -26,9 +26,9 @@ I n c l u d e s ----------------------------------------------------------------------*/ #include "seam.h" +#include "blobs.h" #include "callcpp.h" #include "structures.h" -#include "makechop.h" #ifdef __UNIX__ #include @@ -38,10 +38,7 @@ V a r i a b l e s ----------------------------------------------------------------------*/ #define NUM_STARTING_SEAMS 20 - -#define SEAMBLOCK 100 /* Cells per block */ -makestructure (newseam, free_seam, printseam, SEAM, -freeseam, SEAMBLOCK, "SEAM", seamcount); +makestructure(newseam, free_seam, SEAM); /*---------------------------------------------------------------------- Public Function Code @@ -50,15 +47,15 @@ freeseam, SEAMBLOCK, "SEAM", seamcount); * @name point_in_split * * Check to see if either of these points are present in the current - * split. + * split. * @returns TRUE if one of them is split. */ bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2) { - return ((split) ? - ((exact_point (split->point1, point1) || - exact_point (split->point1, point2) || - exact_point (split->point2, point1) || - exact_point (split->point2, point2)) ? TRUE : FALSE) : FALSE); + return ((split) ? ((exact_point (split->point1, point1) || + exact_point (split->point1, point2) || + exact_point (split->point2, point1) || + exact_point (split->point2, point2)) ? TRUE : FALSE) + : FALSE); } @@ -66,13 +63,13 @@ bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2) { * @name point_in_seam * * Check to see if either of these points are present in the current - * seam. + * seam. * @returns TRUE if one of them is. */ bool point_in_seam(SEAM *seam, SPLIT *split) { - return (point_in_split (seam->split1, split->point1, split->point2) || - point_in_split (seam->split2, split->point1, split->point2) || - point_in_split (seam->split3, split->point1, split->point2)); + return (point_in_split(seam->split1, split->point1, split->point2) || + point_in_split(seam->split2, split->point1, split->point2) || + point_in_split(seam->split3, split->point1, split->point2)); } @@ -106,7 +103,7 @@ void combine_seams(SEAM *dest_seam, SEAM *source_seam) { else if (!dest_seam->split3) dest_seam->split3 = source_seam->split1; else - cprintf ("combine_seam: Seam is too crowded, can't be combined !\n"); + cprintf("combine_seam: Seam is too crowded, can't be combined !\n"); } if (source_seam->split2) { if (!dest_seam->split2) @@ -114,13 +111,13 @@ void combine_seams(SEAM *dest_seam, SEAM *source_seam) { else if (!dest_seam->split3) dest_seam->split3 = source_seam->split2; else - cprintf ("combine_seam: Seam is too crowded, can't be combined !\n"); + cprintf("combine_seam: Seam is too crowded, can't be combined !\n"); } if (source_seam->split3) { if (!dest_seam->split3) dest_seam->split3 = source_seam->split3; else - cprintf ("combine_seam: Seam is too crowded, can't be combined !\n"); + cprintf("combine_seam: Seam is too crowded, can't be combined !\n"); } free_seam(source_seam); } @@ -136,15 +133,48 @@ void delete_seam(void *arg) { //SEAM *seam) if (seam) { if (seam->split1) - delete_split (seam->split1); + delete_split(seam->split1); if (seam->split2) - delete_split (seam->split2); + delete_split(seam->split2); if (seam->split3) - delete_split (seam->split3); + delete_split(seam->split3); free_seam(seam); } } +/** + * @name start_seam_list + * + * Initialize a list of seams that match the original number of blobs + * present in the starting segmentation. Each of the seams created + * by this routine have location information only. + */ +SEAMS start_seam_list(TBLOB *blobs) { + TBLOB *blob; + SEAMS seam_list; + TPOINT topleft; + TPOINT botright; + TPOINT location; + /* Seam slot per char */ + seam_list = new_seam_list (); + + for (blob = blobs; blob->next != NULL; blob = blob->next) { + + blob_bounding_box(blob, &topleft, &botright); + location.x = botright.x; + location.y = botright.y + topleft.y; + blob_bounding_box (blob->next, &topleft, &botright); + location.x += topleft.x; + location.y += botright.y + topleft.y; + location.x /= 2; + location.y /= 4; + + seam_list = add_seam (seam_list, + new_seam (0.0, location, NULL, NULL, NULL)); + } + + return (seam_list); +} /** * @name free_seam_list @@ -155,7 +185,7 @@ void delete_seam(void *arg) { //SEAM *seam) void free_seam_list(SEAMS seam_list) { int x; - array_loop (seam_list, x) delete_seam (array_value (seam_list, x)); + array_loop(seam_list, x) delete_seam(array_value (seam_list, x)); array_free(seam_list); } @@ -175,17 +205,19 @@ bool test_insert_seam(SEAMS seam_list, int list_length; list_length = array_count (seam_list); - for (test_index = 0, blob = first_blob->next; - test_index < index; test_index++, blob = blob->next) { - test_seam = (SEAM *) array_value (seam_list, test_index); + for (test_index=0, blob=first_blob->next; + test_index < index; + test_index++, blob=blob->next) { + test_seam = (SEAM *) array_value(seam_list, test_index); if (test_index + test_seam->widthp < index && test_seam->widthp + test_index == index - 1 && account_splits_right(test_seam, blob) < 0) return false; } - for (test_index = index, blob = left_blob->next; - test_index < list_length; test_index++, blob = blob->next) { - test_seam = (SEAM *) array_value (seam_list, test_index); + for (test_index=index, blob=left_blob->next; + test_index < list_length; + test_index++, blob=blob->next) { + test_seam = (SEAM *) array_value(seam_list, test_index); if (test_index - test_seam->widthn >= index && test_index - test_seam->widthn == index && account_splits_left(test_seam, first_blob, blob) < 0) @@ -210,32 +242,32 @@ SEAMS insert_seam(SEAMS seam_list, int test_index; int list_length; - list_length = array_count (seam_list); - for (test_index = 0, blob = first_blob->next; - test_index < index; test_index++, blob = blob->next) { - test_seam = (SEAM *) array_value (seam_list, test_index); + list_length = array_count(seam_list); + for (test_index=0, blob=first_blob->next; + test_index < index; + test_index++, blob=blob->next) { + test_seam = (SEAM *) array_value(seam_list, test_index); if (test_index + test_seam->widthp >= index) { test_seam->widthp++; /*got in the way */ - } - else if (test_seam->widthp + test_index == index - 1) { + } else if (test_seam->widthp + test_index == index - 1) { test_seam->widthp = account_splits_right(test_seam, blob); if (test_seam->widthp < 0) { - cprintf ("Failed to find any right blob for a split!\n"); + cprintf("Failed to find any right blob for a split!\n"); print_seam("New dud seam", seam); print_seam("Failed seam", test_seam); } } } - for (test_index = index, blob = left_blob->next; - test_index < list_length; test_index++, blob = blob->next) { - test_seam = (SEAM *) array_value (seam_list, test_index); + for (test_index=index, blob=left_blob->next; + test_index < list_length; + test_index++, blob=blob->next) { + test_seam = (SEAM *) array_value(seam_list, test_index); if (test_index - test_seam->widthn < index) { test_seam->widthn++; /*got in the way */ - } - else if (test_index - test_seam->widthn == index) { + } else if (test_index - test_seam->widthn == index) { test_seam->widthn = account_splits_left(test_seam, first_blob, blob); if (test_seam->widthn < 0) { - cprintf ("Failed to find any left blob for a split!\n"); + cprintf("Failed to find any left blob for a split!\n"); print_seam("New dud seam", seam); print_seam("Failed seam", test_seam); } @@ -263,18 +295,17 @@ int account_splits_right(SEAM *seam, TBLOB *blob) { width = 0; do { if (!found_em[0]) - found_em[0] = find_split_in_blob (seam->split1, blob); + found_em[0] = find_split_in_blob(seam->split1, blob); if (!found_em[1]) - found_em[1] = find_split_in_blob (seam->split2, blob); + found_em[1] = find_split_in_blob(seam->split2, blob); if (!found_em[2]) - found_em[2] = find_split_in_blob (seam->split3, blob); + found_em[2] = find_split_in_blob(seam->split3, blob); if (found_em[0] && found_em[1] && found_em[2]) { return width; } width++; blob = blob->next; - } - while (blob != NULL); + } while (blob != NULL); return -1; } @@ -286,34 +317,38 @@ int account_splits_right(SEAM *seam, TBLOB *blob) { * in the blob list. */ int account_splits_left(SEAM *seam, TBLOB *blob, TBLOB *end_blob) { - static inT32 depth = 0; - static inT8 width; - static inT8 found_em[3]; + inT32 depth = 0; + inT8 width = 0; + inT8 found_em[3]; + account_splits_left_helper(seam, blob, end_blob, &depth, &width, found_em); + return width; +} +void account_splits_left_helper(SEAM *seam, TBLOB *blob, TBLOB *end_blob, + inT32 *depth, inT8 *width, inT8* found_em) { if (blob != end_blob) { - depth++; - account_splits_left (seam, blob->next, end_blob); - depth--; - } - else { + (*depth)++; + account_splits_left_helper(seam, blob->next, end_blob, + depth, width, found_em); + (*depth)--; + } else { found_em[0] = seam->split1 == NULL; found_em[1] = seam->split2 == NULL; found_em[2] = seam->split3 == NULL; - width = 0; + *width = 0; } if (!found_em[0]) - found_em[0] = find_split_in_blob (seam->split1, blob); + found_em[0] = find_split_in_blob(seam->split1, blob); if (!found_em[1]) - found_em[1] = find_split_in_blob (seam->split2, blob); + found_em[1] = find_split_in_blob(seam->split2, blob); if (!found_em[2]) - found_em[2] = find_split_in_blob (seam->split3, blob); + found_em[2] = find_split_in_blob(seam->split3, blob); if (!found_em[0] || !found_em[1] || !found_em[2]) { - width++; - if (depth == 0) { - width = -1; + (*width)++; + if (*depth == 0) { + *width = -1; } } - return width; } @@ -325,19 +360,13 @@ int account_splits_left(SEAM *seam, TBLOB *blob, TBLOB *end_blob) { bool find_split_in_blob(SPLIT *split, TBLOB *blob) { TESSLINE *outline; -#if 0 for (outline = blob->outlines; outline != NULL; outline = outline->next) - if (is_split_outline (outline, split)) - return TRUE; - return FALSE; -#endif - for (outline = blob->outlines; outline != NULL; outline = outline->next) - if (point_in_outline(split->point1, outline)) + if (outline->Contains(split->point1->pos)) break; if (outline == NULL) return FALSE; for (outline = blob->outlines; outline != NULL; outline = outline->next) - if (point_in_outline(split->point2, outline)) + if (outline->Contains(split->point2->pos)) return TRUE; return FALSE; } @@ -356,9 +385,9 @@ SEAM *join_two_seams(SEAM *seam1, SEAM *seam2) { assert(seam1 &&seam2); if (((seam1->split3 == NULL && seam2->split2 == NULL) || - (seam1->split2 == NULL && seam2->split3 == NULL) || - seam1->split1 == NULL || - seam2->split1 == NULL) && (!shared_split_points (seam1, seam2))) { + (seam1->split2 == NULL && seam2->split3 == NULL) || + seam1->split1 == NULL || seam2->split1 == NULL) && + (!shared_split_points(seam1, seam2))) { clone_seam(result, seam1); clone_seam(temp, seam2); combine_seams(result, temp); @@ -375,7 +404,7 @@ SEAM *join_two_seams(SEAM *seam1, SEAM *seam2) { * Initailization of this record is done by this routine. */ SEAM *new_seam(PRIORITY priority, - int x_location, + const TPOINT& location, SPLIT *split1, SPLIT *split2, SPLIT *split3) { @@ -384,7 +413,7 @@ SEAM *new_seam(PRIORITY priority, seam = newseam (); seam->priority = priority; - seam->location = x_location; + seam->location = location; seam->widthp = 0; seam->widthn = 0; seam->split1 = split1; @@ -414,17 +443,16 @@ SEAMS new_seam_list() { void print_seam(const char *label, SEAM *seam) { if (seam) { cprintf(label); - cprintf (" %6.2f @ %5d, p=%d, n=%d ", - seam->priority, seam->location, seam->widthp, seam->widthn); - - print_split (seam->split1); + cprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", + seam->priority, seam->location.x, seam->location.y, + seam->widthp, seam->widthn); + print_split(seam->split1); if (seam->split2) { - cprintf (", "); + cprintf(", "); print_split (seam->split2); - if (seam->split3) { - cprintf (", "); + cprintf(", "); print_split (seam->split3); } } @@ -444,12 +472,12 @@ void print_seams(const char *label, SEAMS seams) { char number[CHARS_PER_LINE]; if (seams) { - cprintf ("%s\n", label); + cprintf("%s\n", label); array_loop(seams, x) { - sprintf (number, "%2d: ", x); - print_seam (number, (SEAM *) array_value (seams, x)); + sprintf(number, "%2d: ", x); + print_seam(number, (SEAM *) array_value(seams, x)); } - cprintf ("\n"); + cprintf("\n"); } } @@ -467,18 +495,186 @@ int shared_split_points(SEAM *seam1, SEAM *seam2) { if (seam2->split1 == NULL) return (FALSE); - if (point_in_seam (seam1, seam2->split1)) + if (point_in_seam(seam1, seam2->split1)) return (TRUE); if (seam2->split2 == NULL) return (FALSE); - if (point_in_seam (seam1, seam2->split2)) + if (point_in_seam(seam1, seam2->split2)) return (TRUE); if (seam2->split3 == NULL) return (FALSE); - if (point_in_seam (seam1, seam2->split3)) + if (point_in_seam(seam1, seam2->split3)) return (TRUE); return (FALSE); } + +/********************************************************************** + * break_pieces + * + * Break up the blobs in this chain so that they are all independent. + * This operation should undo the affect of join_pieces. + **********************************************************************/ +void break_pieces(TBLOB *blobs, SEAMS seams, inT16 start, inT16 end) { + TESSLINE *outline = blobs->outlines; + TBLOB *next_blob; + inT16 x; + + for (x = start; x < end; x++) + reveal_seam ((SEAM *) array_value (seams, x)); + + next_blob = blobs->next; + + while (outline && next_blob) { + if (outline->next == next_blob->outlines) { + outline->next = NULL; + outline = next_blob->outlines; + next_blob = next_blob->next; + } + else { + outline = outline->next; + } + } +} + + +/********************************************************************** + * join_pieces + * + * Join a group of base level pieces into a single blob that can then + * be classified. + **********************************************************************/ +void join_pieces(TBLOB *piece_blobs, SEAMS seams, inT16 start, inT16 end) { + TBLOB *next_blob; + TBLOB *blob; + inT16 x; + TESSLINE *outline; + SEAM *seam; + + for (x = 0, blob = piece_blobs; x < start; x++) + blob = blob->next; + next_blob = blob->next; + outline = blob->outlines; + if (!outline) + return; + + while (x < end) { + seam = (SEAM *) array_value (seams, x); + if (x - seam->widthn >= start && x + seam->widthp < end) + hide_seam(seam); + while (outline->next) + outline = outline->next; + outline->next = next_blob->outlines; + next_blob = next_blob->next; + + x++; + } +} + + +/********************************************************************** + * hide_seam + * + * Change the edge points that are referenced by this seam to make + * them hidden edges. + **********************************************************************/ +void hide_seam(SEAM *seam) { + if (seam == NULL || seam->split1 == NULL) + return; + hide_edge_pair (seam->split1->point1, seam->split1->point2); + + if (seam->split2 == NULL) + return; + hide_edge_pair (seam->split2->point1, seam->split2->point2); + + if (seam->split3 == NULL) + return; + hide_edge_pair (seam->split3->point1, seam->split3->point2); +} + + +/********************************************************************** + * hide_edge_pair + * + * Change the edge points that are referenced by this seam to make + * them hidden edges. + **********************************************************************/ +void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2) { + EDGEPT *edgept; + + edgept = pt1; + do { + edgept->Hide(); + edgept = edgept->next; + } + while (!exact_point (edgept, pt2) && edgept != pt1); + if (edgept == pt1) { + /* cprintf("Hid entire outline at (%d,%d)!!\n", + edgept->pos.x,edgept->pos.y); */ + } + edgept = pt2; + do { + edgept->Hide(); + edgept = edgept->next; + } + while (!exact_point (edgept, pt1) && edgept != pt2); + if (edgept == pt2) { + /* cprintf("Hid entire outline at (%d,%d)!!\n", + edgept->pos.x,edgept->pos.y); */ + } +} + + +/********************************************************************** + * reveal_seam + * + * Change the edge points that are referenced by this seam to make + * them hidden edges. + **********************************************************************/ +void reveal_seam(SEAM *seam) { + if (seam == NULL || seam->split1 == NULL) + return; + reveal_edge_pair (seam->split1->point1, seam->split1->point2); + + if (seam->split2 == NULL) + return; + reveal_edge_pair (seam->split2->point1, seam->split2->point2); + + if (seam->split3 == NULL) + return; + reveal_edge_pair (seam->split3->point1, seam->split3->point2); +} + + +/********************************************************************** + * reveal_edge_pair + * + * Change the edge points that are referenced by this seam to make + * them hidden edges. + **********************************************************************/ +void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2) { + EDGEPT *edgept; + + edgept = pt1; + do { + edgept->Reveal(); + edgept = edgept->next; + } + while (!exact_point (edgept, pt2) && edgept != pt1); + if (edgept == pt1) { + /* cprintf("Hid entire outline at (%d,%d)!!\n", + edgept->pos.x,edgept->pos.y); */ + } + edgept = pt2; + do { + edgept->Reveal(); + edgept = edgept->next; + } + while (!exact_point (edgept, pt1) && edgept != pt2); + if (edgept == pt2) { + /* cprintf("Hid entire outline at (%d,%d)!!\n", + edgept->pos.x,edgept->pos.y); */ + } +} diff --git a/wordrec/seam.h b/ccstruct/seam.h similarity index 72% rename from wordrec/seam.h rename to ccstruct/seam.h index e8793757e0..14ee8c9f55 100644 --- a/wordrec/seam.h +++ b/ccstruct/seam.h @@ -28,6 +28,7 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ +#include "blobs.h" #include "split.h" #include "tessarray.h" @@ -41,7 +42,7 @@ typedef struct seam_record PRIORITY priority; inT8 widthp; inT8 widthn; - inT16 location; + TPOINT location; SPLIT *split1; SPLIT *split2; SPLIT *split3; @@ -60,20 +61,20 @@ extern SEAM *newseam(); * Create a new seam record and copy the contents of this seam into it. */ -#define clone_seam(dest,source) \ -if (source) { \ - (dest) = newseam (); \ - (dest)->location = (source)->location; \ - (dest)->widthp = (source)->widthp; \ - (dest)->widthn = (source)->widthn; \ - (dest)->priority = (source)->priority; \ - clone_split ((dest)->split1, (source)->split1); \ - clone_split ((dest)->split2, (source)->split2); \ - clone_split ((dest)->split3, (source)->split3); \ -} \ -else { \ - (dest) = (SEAM*) NULL; \ -} \ +#define clone_seam(dest,source) \ +if (source) { \ + (dest) = newseam (); \ + (dest)->location = (source)->location; \ + (dest)->widthp = (source)->widthp; \ + (dest)->widthn = (source)->widthn; \ + (dest)->priority = (source)->priority; \ + clone_split ((dest)->split1, (source)->split1); \ + clone_split ((dest)->split2, (source)->split2); \ + clone_split ((dest)->split3, (source)->split3); \ +} \ +else { \ + (dest) = (SEAM*) NULL; \ +} \ /** @@ -84,7 +85,7 @@ else { \ */ #define exact_point(p1,p2) \ - (! ((p1->pos.x - p2->pos.x) || (p1->pos.y - p2->pos.y))) + (! ((p1->pos.x - p2->pos.x) || (p1->pos.y - p2->pos.y))) /*---------------------------------------------------------------------- F u n c t i o n s @@ -99,6 +100,8 @@ void combine_seams(SEAM *dest_seam, SEAM *source_seam); void delete_seam(void *arg); //SEAM *seam); +SEAMS start_seam_list(TBLOB *blobs); + void free_seam_list(SEAMS seam_list); bool test_insert_seam(SEAMS seam_list, @@ -116,12 +119,15 @@ int account_splits_right(SEAM *seam, TBLOB *blob); int account_splits_left(SEAM *seam, TBLOB *blob, TBLOB *end_blob); +void account_splits_left_helper(SEAM *seam, TBLOB *blob, TBLOB *end_blob, + inT32 *depth, inT8 *width, inT8 *found_em); + bool find_split_in_blob(SPLIT *split, TBLOB *blob); SEAM *join_two_seams(SEAM *seam1, SEAM *seam2); SEAM *new_seam(PRIORITY priority, - int x_location, + const TPOINT& location, SPLIT *split1, SPLIT *split2, SPLIT *split3); @@ -133,4 +139,17 @@ void print_seam(const char *label, SEAM *seam); void print_seams(const char *label, SEAMS seams); int shared_split_points(SEAM *seam1, SEAM *seam2); + +void break_pieces(TBLOB *blobs, SEAMS seams, inT16 start, inT16 end); + +void join_pieces(TBLOB *piece_blobs, SEAMS seams, inT16 start, inT16 end); + +void hide_seam(SEAM *seam); + +void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2); + +void reveal_seam(SEAM *seam); + +void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2); + #endif diff --git a/wordrec/split.cpp b/ccstruct/split.cpp similarity index 94% rename from wordrec/split.cpp rename to ccstruct/split.cpp index 4fc4a5aaf5..3bdc9f211e 100644 --- a/wordrec/split.cpp +++ b/ccstruct/split.cpp @@ -27,7 +27,6 @@ ----------------------------------------------------------------------*/ #include "split.h" #include "structures.h" -#include "hideedge.h" #include "callcpp.h" #ifdef __UNIX__ @@ -39,9 +38,7 @@ ----------------------------------------------------------------------*/ BOOL_VAR(wordrec_display_splits, 0, "Display splits"); -#define SPLITBLOCK 100 /* Cells per block */ -makestructure (newsplit, free_split, printsplit, SPLIT, -freesplit, SPLITBLOCK, "SPLIT", splitcount); +makestructure(newsplit, free_split, SPLIT); /*---------------------------------------------------------------------- F u n c t i o n s @@ -68,7 +65,7 @@ void delete_split(SPLIT *split) { EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) { EDGEPT *this_edgept; /* Create point */ - this_edgept = newedgept (); + this_edgept = new EDGEPT; this_edgept->pos.x = x; this_edgept->pos.y = y; /* Hook it up */ @@ -82,9 +79,6 @@ EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) { this_edgept->prev->vec.x = x - this_edgept->prev->pos.x; this_edgept->prev->vec.y = y - this_edgept->prev->pos.y; - reveal_edge(this_edgept); - this_edgept->flags[1] = 0; - return (this_edgept); } @@ -159,8 +153,8 @@ void unsplit_outlines(EDGEPT *p1, EDGEPT *p2) { p1->next = tmp2->next; p2->next = tmp1->next; - oldedgept(tmp1); - oldedgept(tmp2); + delete tmp1; + delete tmp2; p1->vec.x = p1->next->pos.x - p1->pos.x; p1->vec.y = p1->next->pos.y - p1->pos.y; diff --git a/wordrec/split.h b/ccstruct/split.h similarity index 99% rename from wordrec/split.h rename to ccstruct/split.h index ae3a25d5d5..ad76725fd4 100644 --- a/wordrec/split.h +++ b/ccstruct/split.h @@ -28,7 +28,7 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ -#include "tessclas.h" +#include "blobs.h" #include "oldlist.h" /*---------------------------------------------------------------------- diff --git a/ccstruct/statistc.cpp b/ccstruct/statistc.cpp index b8ceac0d2c..1d765174aa 100644 --- a/ccstruct/statistc.cpp +++ b/ccstruct/statistc.cpp @@ -740,8 +740,6 @@ DLLSYM inT32 choose_nth_item( //fast median float *array, //array of items inT32 count //no of items ) { - static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; - //for nrand inT32 next_sample; //next one to do inT32 next_lesser; //space for new inT32 prev_greater; //last one saved @@ -764,11 +762,7 @@ DLLSYM inT32 choose_nth_item( //fast median index = 0; //ensure lergal else if (index >= count) index = count - 1; - #ifdef __UNIX__ - equal_count = (inT32) (nrand48 (seeds) % count); - #else - equal_count = (inT32) (rand () % count); - #endif + equal_count = (inT32) (rand() % count); pivot = array[equal_count]; //fill gap array[equal_count] = array[0]; @@ -823,8 +817,6 @@ size_t size, //element size //comparator int (*compar) (const void *, const void *) ) { - static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; - //for nrand int result; //of compar inT32 next_sample; //next one to do inT32 next_lesser; //space for new @@ -846,11 +838,7 @@ int (*compar) (const void *, const void *) index = 0; //ensure lergal else if (index >= count) index = count - 1; - #ifdef __UNIX__ - pivot = (inT32) (nrand48 (seeds) % count); - #else pivot = (inT32) (rand () % count); - #endif swap_entries (array, size, pivot, 0); next_lesser = 0; prev_greater = count; diff --git a/ccstruct/stepblob.cpp b/ccstruct/stepblob.cpp index fe6f996fcd..13450f8233 100644 --- a/ccstruct/stepblob.cpp +++ b/ccstruct/stepblob.cpp @@ -18,7 +18,8 @@ **********************************************************************/ #include "mfcpch.h" -#include "stepblob.h" +#include "stepblob.h" +#include "allheaders.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -166,6 +167,14 @@ C_BLOB::C_BLOB( //constructor } } +// Simpler constructor to build a blob from a single outline that has +// already been fully initialized. +C_BLOB::C_BLOB(C_OUTLINE* outline) { + C_OUTLINE_IT it(&outlines); + it.add_to_end(outline); +} + + // Build and return a fake blob containing a single fake outline with no // steps. C_BLOB* C_BLOB::FakeBlob(const TBOX& box) { @@ -320,6 +329,24 @@ void C_BLOB::rotate(const FCOORD& rotation) { RotateOutlineList(rotation, &outlines); } +static void render_outline_list(C_OUTLINE_LIST *list, + int left, int top, Pix* pix) { + C_OUTLINE_IT it(list); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + C_OUTLINE* outline = it.data(); + outline->render(left, top, pix); + if (!outline->child()->empty()) + render_outline_list(outline->child(), left, top, pix); + } +} + +// Returns a Pix rendering of the blob. pixDestroy after use. +Pix* C_BLOB::render() { + TBOX box = bounding_box(); + Pix* pix = pixCreate(box.width(), box.height(), 1); + render_outline_list(&outlines, box.left(), box.top(), pix); + return pix; +} /********************************************************************** * C_BLOB::plot diff --git a/ccstruct/stepblob.h b/ccstruct/stepblob.h index a4dcaece0f..17b89df74b 100644 --- a/ccstruct/stepblob.h +++ b/ccstruct/stepblob.h @@ -23,12 +23,17 @@ #include "coutln.h" #include "rect.h" +struct Pix; + class C_BLOB:public ELIST_LINK { public: C_BLOB() { } explicit C_BLOB(C_OUTLINE_LIST *outline_list); + // Simpler constructor to build a blob from a single outline that has + // already been fully initialized. + explicit C_BLOB(C_OUTLINE* outline); // Build and return a fake blob containing a single fake outline with no // steps. @@ -48,6 +53,9 @@ class C_BLOB:public ELIST_LINK void move(const ICOORD vec); // repostion blob by vector void rotate(const FCOORD& rotation); // Rotate by given vector. + // Returns a Pix rendering of the blob. pixDestroy after use. + Pix* render(); + void plot( //draw one ScrollView* window, //window to draw in ScrollView::Color blob_colour, //for outer bits diff --git a/ccstruct/vecfuncs.h b/ccstruct/vecfuncs.h index 844d036f21..4a21d7c5e2 100644 --- a/ccstruct/vecfuncs.h +++ b/ccstruct/vecfuncs.h @@ -25,8 +25,10 @@ #ifndef VECFUNCS_H #define VECFUNCS_H -#include "tessclas.h" #include +#include "blobs.h" + +class EDGEPT; /*---------------------------------------------------------------------- M a c r o s @@ -75,17 +77,4 @@ ----------------------------------------------------------------------*/ int direction(EDGEPT *point); -/* -#if defined(__STDC__) || defined(__cplusplus) || MAC_OR_DOS -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* vecfuncs.c -int direction - _ARGS((EDGEPT *point)); - -#undef _ARGS -*/ #endif diff --git a/ccstruct/werd.cpp b/ccstruct/werd.cpp index c6cbe9c391..cd7fbf0bf2 100644 --- a/ccstruct/werd.cpp +++ b/ccstruct/werd.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: werd.cpp (Formerly word.c) * Description: Code for the WERD class. - * Author: Ray Smith - * Created: Tue Oct 08 14:32:12 BST 1991 + * Author: Ray Smith + * Created: Tue Oct 08 14:32:12 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,63 +18,51 @@ **********************************************************************/ #include "mfcpch.h" -#include "blckerr.h" -#include "linlsq.h" -#include "werd.h" +#include "blckerr.h" +#include "helpers.h" +#include "linlsq.h" +#include "werd.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif -#define FIRST_COLOUR ScrollView::RED //< first rainbow colour - -/// last rainbow colour -#define LAST_COLOUR ScrollView::AQUAMARINE -#define CHILD_COLOUR ScrollView::BROWN //< colour of children +#define FIRST_COLOUR ScrollView::RED //< first rainbow colour +#define LAST_COLOUR ScrollView::AQUAMARINE //< last rainbow colour +#define CHILD_COLOUR ScrollView::BROWN //< colour of children const ERRCODE CANT_SCALE_EDGESTEPS = -"Attempted to scale an edgestep format word"; - -#define EXTERN + "Attempted to scale an edgestep format word"; -EXTERN BOOL_VAR (bln_numericmode, 0, "Optimize for numbers"); -EXTERN INT_VAR (bln_x_height, 128, "Baseline Normalisation X-height"); -EXTERN INT_VAR (bln_baseline_offset, 64, "Baseline Norm. offset of baseline"); -EXTERN double_VAR (bln_blshift_maxshift, -1.0, -"Fraction of xh before shifting"); -EXTERN double_VAR (bln_blshift_xfraction, 0.75, -"Size fraction of xh before shifting"); +ELIST2IZE_S(WERD) -ELISTIZE_S (WERD) /** * WERD::WERD * * Constructor to build a WERD from a list of C_BLOBs. - * The C_BLOBs are not copied so the source list is emptied. + * blob_list The C_BLOBs (in word order) are not copied; + * we take its elements and put them in our lists. + * blank_count blanks in front of the word + * text correct text, outlives this WERD */ -WERD::WERD ( //constructor -C_BLOB_LIST * blob_list, //< in word order -uinT8 blank_count, //< blanks in front -const char *text //< correct text -): -flags (0), -correct(text) { - C_BLOB_IT start_it = blob_list;//iterator - C_BLOB_IT end_it = blob_list; //another - //rejected blobs in wd +WERD::WERD(C_BLOB_LIST *blob_list, uinT8 blank_count, const char *text) + : blanks(blank_count), + flags(0), + script_id_(0), + correct(text) { + C_BLOB_IT start_it = blob_list; + C_BLOB_IT end_it = blob_list; C_BLOB_IT rej_cblob_it = &rej_cblobs; - C_OUTLINE_IT c_outline_it; //coutline iterator - BOOL8 blob_inverted; - BOOL8 reject_blob; + C_OUTLINE_IT c_outline_it; inT16 inverted_vote = 0; inT16 non_inverted_vote = 0; - while (!end_it.at_last ()) - end_it.forward (); //move to last - //move to our list - cblobs.assign_to_sublist (&start_it, &end_it); - blanks = blank_count; + // Move blob_list's elements into cblobs. + while (!end_it.at_last()) + end_it.forward(); + cblobs.assign_to_sublist(&start_it, &end_it); + /* Set white on black flag for the WERD, moving any duff blobs onto the rej_cblobs list. @@ -88,23 +76,23 @@ correct(text) { Walk the blobs again, moving any blob whose inversion flag does not agree with the concencus onto the reject list. */ - start_it.set_to_list (&cblobs); - if (start_it.empty ()) + start_it.set_to_list(&cblobs); + if (start_it.empty()) return; - for (start_it.mark_cycle_pt (); - !start_it.cycled_list (); start_it.forward ()) { - c_outline_it.set_to_list (start_it.data ()->out_list ()); - blob_inverted = c_outline_it.data ()->flag (COUT_INVERSE); - reject_blob = FALSE; - for (c_outline_it.mark_cycle_pt (); - !c_outline_it.cycled_list () && !reject_blob; - c_outline_it.forward ()) { - reject_blob = - c_outline_it.data ()->flag (COUT_INVERSE) != blob_inverted; + for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) { + BOOL8 reject_blob = FALSE; + BOOL8 blob_inverted; + + c_outline_it.set_to_list(start_it.data()->out_list()); + blob_inverted = c_outline_it.data()->flag(COUT_INVERSE); + for (c_outline_it.mark_cycle_pt(); + !c_outline_it.cycled_list() && !reject_blob; + c_outline_it.forward()) { + reject_blob = c_outline_it.data()->flag(COUT_INVERSE) != blob_inverted; } - if (reject_blob) - rej_cblob_it.add_after_then_move (start_it.extract ()); - else { + if (reject_blob) { + rej_cblob_it.add_after_then_move(start_it.extract()); + } else { if (blob_inverted) inverted_vote++; else @@ -112,16 +100,15 @@ correct(text) { } } - flags.set_bit (W_INVERSE, (inverted_vote > non_inverted_vote)); + flags.set_bit(W_INVERSE, (inverted_vote > non_inverted_vote)); - start_it.set_to_list (&cblobs); - if (start_it.empty ()) + start_it.set_to_list(&cblobs); + if (start_it.empty()) return; - for (start_it.mark_cycle_pt (); - !start_it.cycled_list (); start_it.forward ()) { - c_outline_it.set_to_list (start_it.data ()->out_list ()); - if (c_outline_it.data ()->flag (COUT_INVERSE) != flags.bit (W_INVERSE)) - rej_cblob_it.add_after_then_move (start_it.extract ()); + for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) { + c_outline_it.set_to_list(start_it.data()->out_list()); + if (c_outline_it.data()->flag(COUT_INVERSE) != flags.bit(W_INVERSE)) + rej_cblob_it.add_after_then_move(start_it.extract()); } } @@ -133,13 +120,12 @@ correct(text) { * The BLOBs are not copied so the source list is emptied. */ -WERD::WERD ( //constructor -PBLOB_LIST * blob_list, //< in word order -uinT8 blank_count, //< blanks in front -const char *text //< correct text -): -flags (0), -correct(text) { +WERD::WERD(PBLOB_LIST *blob_list, //< In word order + uinT8 blank_count, //< Blanks in front + const char *text) //< Ccorrect text + : flags(0), + script_id_(0), + correct(text) { PBLOB_IT start_it = blob_list; //iterator PBLOB_IT end_it = blob_list; //another @@ -161,10 +147,11 @@ correct(text) { * The BLOBs are not copied so the source list is emptied. */ -WERD::WERD ( //constructor -PBLOB_LIST * blob_list, //< in word order -WERD * clone //< sorce of flags -):flags (clone->flags), correct (clone->correct) { +WERD::WERD(PBLOB_LIST * blob_list, //< In word order + WERD * clone) //< Source of flags + : flags(clone->flags), + script_id_(clone->script_id_), + correct(clone->correct) { PBLOB_IT start_it = blob_list; //iterator PBLOB_IT end_it = blob_list; //another @@ -184,12 +171,13 @@ WERD * clone //< sorce of flags * The C_BLOBs are not copied so the source list is emptied. */ -WERD::WERD ( //constructor -C_BLOB_LIST * blob_list, //< in word order -WERD * clone //< source of flags -):flags (clone->flags), correct (clone->correct) { - C_BLOB_IT start_it = blob_list;//iterator - C_BLOB_IT end_it = blob_list; //another +WERD::WERD(C_BLOB_LIST * blob_list, //< In word order + WERD * clone) //< Source of flags + : flags(clone->flags), + script_id_(clone->script_id_), + correct(clone->correct) { + C_BLOB_IT start_it = blob_list; // iterator + C_BLOB_IT end_it = blob_list; // another while (!end_it.at_last ()) end_it.forward (); //move to last @@ -199,6 +187,17 @@ WERD * clone //< source of flags // fprintf(stderr,"Wrong constructor!!!!\n"); } +// Construct a WERD from a single_blob and clone the flags from this. +// W_BOL and W_EOL flags are set according to the given values. +WERD* WERD::ConstructFromSingleBlob(bool bol, bool eol, C_BLOB* blob) { + C_BLOB_LIST temp_blobs; + C_BLOB_IT temp_it(&temp_blobs); + temp_it.add_after_then_move(blob); + WERD* blob_word = new WERD(&temp_blobs, this); + blob_word->set_flag(W_BOL, bol); + blob_word->set_flag(W_EOL, eol); + return blob_word; +} /** * WERD::poly_copy @@ -207,56 +206,36 @@ WERD * clone //< source of flags * The source WERD is untouched. */ -WERD *WERD::poly_copy( //make a poly copy - float xheight //< row height - ) { +WERD *WERD::poly_copy() { PBLOB *blob; //new blob WERD *result = new WERD; //output word C_BLOB_IT src_it = &cblobs; //iterator - // LARC_BLOB_IT larc_it=(LARC_BLOB_LIST*)(&cblobs); PBLOB_IT dest_it = (PBLOB_LIST *) (&result->cblobs); - //another if (flags.bit (W_POLYGON)) { *result = *this; //just copy it } else { result->flags = flags; + result->script_id_ = script_id_; result->correct = correct; //copy info result->dummy = dummy; if (!src_it.empty ()) { - // if (flags.bit(W_LINEARC)) - // { - // do - // { - // blob=new PBLOB; - // poly_linearc_outlines(larc_it.data()->out_list(), - // blob->out_list()); //convert outlines - // dest_it.add_after_then_move(blob); //add to dest list - // larc_it.forward(); - // } - // while (!larc_it.at_first()); - // } - // else - // { do { - blob = new PBLOB (src_it.data (), xheight); - //convert blob - //add to dest list + // convert blob and add to dest list + blob = new PBLOB (src_it.data ()); dest_it.add_after_then_move (blob); src_it.forward (); } while (!src_it.at_first ()); - // } } if (!rej_cblobs.empty ()) { - /* Polygonal approx of reject blobs */ + // Polygonal approx of reject blobs src_it.set_to_list (&rej_cblobs); dest_it = (PBLOB_LIST *) (&result->rej_cblobs); do { - //convert blob - blob = new PBLOB (src_it.data (), xheight); - //add to dest list + // convert blob and add to dest list + blob = new PBLOB (src_it.data ()); dest_it.add_after_then_move (blob); src_it.forward (); } @@ -283,29 +262,24 @@ WERD *WERD::poly_copy( //make a poly copy * row being marked as FUZZY space. */ -TBOX WERD::bounding_box() { //bounding box - TBOX box; //box being built - //rejected blobs in wd - C_BLOB_IT rej_cblob_it = &rej_cblobs; +TBOX WERD::bounding_box() { + TBOX box; // box being built + C_BLOB_IT rej_cblob_it = &rej_cblobs; // rejected blobs - for (rej_cblob_it.mark_cycle_pt (); - !rej_cblob_it.cycled_list (); rej_cblob_it.forward ()) { - box += rej_cblob_it.data ()->bounding_box (); + for (rej_cblob_it.mark_cycle_pt(); !rej_cblob_it.cycled_list(); + rej_cblob_it.forward()) { + box += rej_cblob_it.data()->bounding_box(); } - if (flags.bit (W_POLYGON)) { - //polygons - PBLOB_IT it = (PBLOB_LIST *) (&cblobs); - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - box += it.data ()->bounding_box (); + if (flags.bit(W_POLYGON)) { // polygons + PBLOB_IT it = (PBLOB_LIST *)(&cblobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + box += it.data()->bounding_box(); } - } - else { - C_BLOB_IT it = &cblobs; //blobs of WERD - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - box += it.data ()->bounding_box (); + } else { + C_BLOB_IT it = &cblobs; // blobs of WERD + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + box += it.data()->bounding_box(); } } return box; @@ -319,27 +293,17 @@ TBOX WERD::bounding_box() { //bounding box * NOTE!! REJECT CBLOBS ARE NOT MOVED */ -void WERD::move( // reposition WERD - const ICOORD vec //< by vector - ) { - PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); - // blob iterator - // LARC_BLOB_IT lblob_it((LARC_BLOB_LIST*)&cblobs); +void WERD::move(const ICOORD vec) { + PBLOB_IT blob_it((PBLOB_LIST *)&cblobs); C_BLOB_IT cblob_it(&cblobs); // cblob iterator - if (flags.bit (W_POLYGON)) - for (blob_it.mark_cycle_pt (); - !blob_it.cycled_list (); blob_it.forward ()) - blob_it.data ()->move (vec); - // else if (flags.bit(W_LINEARC)) - // for( lblob_it.mark_cycle_pt(); - // !lblob_it.cycled_list(); - // lblob_it.forward() ) - // lblob_it.data()->move( vec ); - else - for (cblob_it.mark_cycle_pt (); - !cblob_it.cycled_list (); cblob_it.forward ()) - cblob_it.data ()->move (vec); + if (flags.bit(W_POLYGON)) { + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) + blob_it.data()->move(vec); + } else { + for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) + cblob_it.data()->move(vec); + } } @@ -349,24 +313,15 @@ void WERD::move( // reposition WERD * Scale WERD by multiplier */ -void WERD::scale( // scale WERD - const float f //< by multiplier - ) { - PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); - // blob iterator - // LARC_BLOB_IT lblob_it((LARC_BLOB_LIST*)&cblobs); - - if (flags.bit (W_POLYGON)) - for (blob_it.mark_cycle_pt (); - !blob_it.cycled_list (); blob_it.forward ()) - blob_it.data ()->scale (f); - // else if (flags.bit(W_LINEARC)) - // for (lblob_it.mark_cycle_pt(); - // !lblob_it.cycled_list(); - // lblob_it.forward() ) - // lblob_it.data()->scale( f ); - else - CANT_SCALE_EDGESTEPS.error ("WERD::scale", ABORT, NULL); +void WERD::scale(const float f) { + PBLOB_IT blob_it((PBLOB_LIST *)&cblobs); + + if (flags.bit(W_POLYGON)) { + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) + blob_it.data()->scale(f); + } else { + CANT_SCALE_EDGESTEPS.error("WERD::scale", ABORT, NULL); + } } @@ -376,22 +331,19 @@ void WERD::scale( // scale WERD * Join other word onto this one. Delete the old word. */ -void WERD::join_on( // join WERD - WERD *&other //< other word - ) { - PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); - // blob iterator - PBLOB_IT src_it ((PBLOB_LIST *) & other->cblobs); +void WERD::join_on(WERD* other) { + PBLOB_IT blob_it((PBLOB_LIST *)&cblobs); + PBLOB_IT src_it((PBLOB_LIST *)&other->cblobs); C_BLOB_IT rej_cblob_it(&rej_cblobs); - C_BLOB_IT src_rej_it (&other->rej_cblobs); + C_BLOB_IT src_rej_it(&other->rej_cblobs); - while (!src_it.empty ()) { - blob_it.add_to_end (src_it.extract ()); - src_it.forward (); + while (!src_it.empty()) { + blob_it.add_to_end(src_it.extract()); + src_it.forward(); } - while (!src_rej_it.empty ()) { - rej_cblob_it.add_to_end (src_rej_it.extract ()); - src_rej_it.forward (); + while (!src_rej_it.empty()) { + rej_cblob_it.add_to_end(src_rej_it.extract()); + src_rej_it.forward(); } } @@ -402,345 +354,205 @@ void WERD::join_on( // join WERD * Copy blobs from other word onto this one. */ -void WERD::copy_on( //copy blobs - WERD *&other //< from other - ) { - if (flags.bit (W_POLYGON)) { - PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); +void WERD::copy_on(WERD* other) { + bool reversed = other->bounding_box().left() < bounding_box().left(); + if (flags.bit(W_POLYGON)) { + PBLOB_IT blob_it((PBLOB_LIST *) & cblobs); // blob iterator PBLOB_LIST blobs; blobs.deep_copy(reinterpret_cast(&other->cblobs), &PBLOB::deep_copy); - blob_it.move_to_last(); - blob_it.add_list_after(&blobs); + if (reversed) { + blob_it.add_list_before(&blobs); + } else { + blob_it.move_to_last(); + blob_it.add_list_after(&blobs); + } } else { C_BLOB_IT c_blob_it(&cblobs); C_BLOB_LIST c_blobs; c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy); - c_blob_it.move_to_last (); - c_blob_it.add_list_after (&c_blobs); + if (reversed) { + c_blob_it.add_list_before(&c_blobs); + } else { + c_blob_it.move_to_last(); + c_blob_it.add_list_after(&c_blobs); + } } - if (!other->rej_cblobs.empty ()) { + if (!other->rej_cblobs.empty()) { C_BLOB_IT rej_c_blob_it(&rej_cblobs); C_BLOB_LIST new_rej_c_blobs; new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy); - rej_c_blob_it.move_to_last (); - rej_c_blob_it.add_list_after (&new_rej_c_blobs); + if (reversed) { + rej_c_blob_it.add_list_before(&new_rej_c_blobs); + } else { + rej_c_blob_it.move_to_last(); + rej_c_blob_it.add_list_after(&new_rej_c_blobs); + } } } /** - * WERD::baseline_normalise + * WERD::baseline_normalize * - * Baseline Normalise the word in Tesseract style. (I.e origin at centre of + * Baseline Normalize the word in Tesseract style. (I.e origin at centre of * word at bottom. x-height region scaled to region y = - * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height) + * (kBlnBaselineOffset)..(kBlnBaselineOffset + kBlnXHeight) * - usually 64..192) */ -void WERD::baseline_normalise( // Tess style BL Norm - ROW *row, - DENORM *denorm //< antidote - ) { - baseline_normalise_x (row, row->x_height (), denorm); - //Use standard x ht +void WERD::baseline_normalize(ROW *row, DENORM *denorm, bool numeric_mode) { + baseline_normalize_x(row, row->x_height(), denorm, numeric_mode); } +/********************************************************************** + * Helper functions for WERD::baseline_normalize_x() + **********************************************************************/ + /** - * WERD::baseline_normalise_x + * Center a sequence of blobs of numbers [0-9] about x=0, with + * y = kBlnBaselineOffset .. kBlnBaselineOffset + (4/3) * kBlnXHeight + * (usually 64..170) + * + * Arguments: + * word_box - bounding box for the word in original coordinates. + * blobs - blobs to transform to normalized coordinates. + * antidote - this is the preliminary antidote (with the correct + * origin, default scale, and row). + * update_antidote - we should update antidote's segment information. + * Globals: + * kBlnXHeight * - * Baseline Normalise the word in Tesseract style. (I.e origin at centre of + **/ +static void baseline_normalize_numerals( + const TBOX &word_box, PBLOB_LIST *blobs, DENORM *antidote, + BOOL8 update_antidote) { + int segments = 0; + PBLOB_IT blob_it(blobs); + DENORM_SEG *segs = new DENORM_SEG[blob_it.length()]; + + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + PBLOB *blob = blob_it.data(); + TBOX blob_box = blob->bounding_box(); + blob->move(FCOORD(-antidote->origin(), -blob_box.bottom())); + // Constrain the scale factor as target numbers should be either + // cap height already or xheight. + float factor = ClipToRange( + kBlnXHeight * 4.0f / (3 * blob_box.height()), + antidote->scale(), antidote->scale() * 1.5f); + blob->scale(factor); + blob->move(FCOORD(0.0, kBlnBaselineOffset)); + segs[segments].xstart = blob->bounding_box().left(); + segs[segments].ycoord = blob_box.bottom(); + segs[segments++].scale_factor = factor; + } + if (update_antidote) { + antidote->set_segments(segs, segments); + } + delete [] segs; +} + +/** + * Center a sequence of textual blobs about x=0, + * using antidote for y positioning, scaling and row baseline information. + **/ +static void baseline_normalize_text(const DENORM &antidote, PBLOB_LIST *blobs) { + PBLOB_IT blob_it(blobs); + + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + PBLOB *blob = blob_it.data(); + TBOX blob_box = blob->bounding_box(); + float blob_x_center = (blob_box.left() + blob_box.right()) / 2.0; + blob->move(FCOORD(-antidote.origin(), + -antidote.yshift_at_orig_x(blob_x_center))); + blob->scale(antidote.scale()); + blob->move(FCOORD(0.0, kBlnBaselineOffset)); + } +} + +/** + * WERD::baseline_normalize_x + * + * Baseline Normalize the word in Tesseract style. (I.e origin at centre of * word at bottom. x-height region scaled to region y = - * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height) + * (kBlnBaselineOffset)..(kBlnBaselineOffset + kBlnXHeight) * - usually 64..192) - * USE A SPECIFIED X-HEIGHT - NOT NECESSARILY THE ONE IN row + * + * Arguments: + * row - row information (mainly baseline info) + * x_height - the x height to assume in the source coordinates + * (not necessarily the one in row) + * denorm - if non NULL, where to return the "undo" information */ -void WERD::baseline_normalise_x( // Tess style BL Norm - ROW *row, - float x_height, //< non standard value - DENORM *denorm //< antidote - ) { - BOOL8 using_row; //as baseline - float blob_x_centre; //middle of blob - float blob_offset; //bottom miss - float top_offset; //top miss - float blob_x_height; //xh for this blob - inT16 segments; //no of segments - inT16 segment; //current segment - DENORM_SEG *segs; //array of segments - float mean_x; //mean xheight - inT32 x_count; //no of xs - TBOX word_box = bounding_box ();//word bounding box - TBOX blob_box; //blob bounding box - PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); - // blob iterator - PBLOB *blob; - LLSQ line; //fitted line - double line_m, line_c; //fitted line - //inverse norm - DENORM antidote (word_box.left () + - - (word_box.right () - word_box.left ()) / 2.0, - bln_x_height / x_height, row); - - if (!flags.bit (W_POLYGON)) { - WRONG_WORD.error ("WERD::baseline_normalise", ABORT, - "Need to poly approx"); +void WERD::baseline_normalize_x(ROW *row, float x_height, + DENORM *denorm, bool numeric_mode) { + TBOX word_box = bounding_box(); + DENORM antidote((word_box.left() + word_box.right()) / 2.0, + kBlnXHeight / x_height, row); + if (row == NULL) { + antidote = DENORM(antidote.origin(), antidote.scale(), 0.0, + word_box.bottom(), 0, NULL, false, NULL); } - if (flags.bit (W_NORMALIZED)) { - WRONG_WORD.error ("WERD::baseline_normalise", ABORT, - "Baseline unnormalised"); - } - - if (bln_numericmode) { - segs = new DENORM_SEG[blob_it.length ()]; - segments = 0; - float factor; // For scaling to baseline normalised size. - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob->move (FCOORD (-antidote.origin (), - -blob_box.bottom ())); - factor = bln_x_height * 4.0f / (3 * blob_box.height ()); - // Constrain the scale factor as target numbers should be either - // cap height already or xheight. - if (factor < antidote.scale()) - factor = antidote.scale(); - else if (factor > antidote.scale() * 1.5f) - factor = antidote.scale() * 1.5f; - blob->scale (factor); - blob->move (FCOORD (0.0, bln_baseline_offset)); - segs[segments].xstart = blob->bounding_box().left(); - segs[segments].ycoord = blob_box.bottom(); - segs[segments++].scale_factor = factor; - } - antidote = DENORM (antidote.origin (), antidote.scale (), - 0.0f, 0.0f, segments, segs, true, row); - delete [] segs; - - //Repeat for rej blobs - blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob->move (FCOORD (-antidote.origin (), - -blob_box.bottom ())); - blob->scale (bln_x_height * 4.0f / (3 * blob_box.height ())); - blob->move (FCOORD (0.0, bln_baseline_offset)); - } + if (!flags.bit(W_POLYGON)) { + WRONG_WORD.error("WERD::baseline_normalize", ABORT, + "Need to poly approx"); } - else if (bln_blshift_maxshift < 0) { - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_x_centre = blob_box.left () + - (blob_box.right () - blob_box.left ()) / 2.0; - blob->move (FCOORD (-antidote.origin (), - -(row->base_line (blob_x_centre)))); - blob->scale (antidote.scale ()); - blob->move (FCOORD (0.0, bln_baseline_offset)); - } - - //Repeat for rej blobs - blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_x_centre = blob_box.left () + - (blob_box.right () - blob_box.left ()) / 2.0; - blob->move (FCOORD (-antidote.origin (), - -(row->base_line (blob_x_centre)))); - blob->scale (antidote.scale ()); - blob->move (FCOORD (0.0, bln_baseline_offset)); - } - + if (flags.bit(W_NORMALIZED)) { + WRONG_WORD.error("WERD::baseline_normalize", ABORT, + "Baseline unnormalized"); } - else { - mean_x = x_height; - x_count = 1; - segs = new DENORM_SEG[blob_it.length ()]; - segments = 0; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - if (blob_box.height () > bln_blshift_xfraction * x_height) { - blob_x_centre = blob_box.left () + - (blob_box.right () - blob_box.left ()) / 2.0; - blob_offset = - blob_box.bottom () - row->base_line (blob_x_centre); - top_offset = blob_offset + blob_box.height () - x_height - 1; - blob_x_height = top_offset + x_height; - if (top_offset < 0) - top_offset = -top_offset; - if (blob_offset < 0) - blob_offset = -blob_offset; - if (blob_offset < bln_blshift_maxshift * x_height) { - segs[segments].ycoord = blob_box.bottom (); - line.add (blob_x_centre, blob_box.bottom ()); - if (top_offset < bln_blshift_maxshift * x_height) { - segs[segments].scale_factor = blob_box.height () - 1.0f; - x_count++; - } - else - segs[segments].scale_factor = 0.0f; - //fix it later - } - else { - //not a goer - segs[segments].ycoord = -MAX_INT32; - if (top_offset < bln_blshift_maxshift * x_height) { - segs[segments].scale_factor = blob_x_height; - x_count++; - } - else - segs[segments].scale_factor = 0.0f; - //fix it later - } - } - else { - segs[segments].scale_factor = 0.0f; - segs[segments].ycoord = -MAX_INT32; - } - segs[segments].xstart = blob_box.left (); - segments++; - } - using_row = line.count () <= 1; - if (!using_row) { - line_m = line.m (); - line_c = line.c (line_m); - } - else - line_m = line_c = 0; - segments = 0; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_x_centre = blob_box.left () + - (blob_box.right () - blob_box.left ()) / 2.0; - if (segs[segments].ycoord == -MAX_INT32 - && segs[segments].scale_factor != 0 && !using_row) { - blob_offset = line_m * blob_x_centre + line_c; - segs[segments].scale_factor = blob_box.top () - blob_offset; - } - if (segs[segments].scale_factor != 0) - mean_x += segs[segments].scale_factor; - segments++; - } - mean_x /= x_count; - // printf("mean x=%g, count=%d, line_m=%g, line_c=%g\n", - // mean_x,x_count,line_m,line_c); - segments = 0; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_x_centre = blob_box.left () + - (blob_box.right () - blob_box.left ()) / 2.0; - if (segs[segments].ycoord != -MAX_INT32) - blob_offset = (float) segs[segments].ycoord; - else if (using_row) - blob_offset = row->base_line (blob_x_centre); - else - blob_offset = line_m * blob_x_centre + line_c; - if (segs[segments].scale_factor == 0) - segs[segments].scale_factor = mean_x; - segs[segments].scale_factor = - bln_x_height / segs[segments].scale_factor; - // printf("Blob sf=%g, top=%d, bot=%d, base=%g\n", - // segs[segments].scale_factor,blob_box.top(), - // blob_box.bottom(),blob_offset); - blob->move (FCOORD (-antidote.origin (), -blob_offset)); - blob-> - scale (FCOORD (antidote.scale (), segs[segments].scale_factor)); - blob->move (FCOORD (0.0, bln_baseline_offset)); - segments++; - } - //Repeat for rej blobs - blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs); - segment = 0; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - blob_x_centre = blob_box.left () + - (blob_box.right () - blob_box.left ()) / 2.0; - while (segment < segments - 1 - && segs[segment + 1].xstart <= blob_x_centre) - segment++; - if (segs[segment].ycoord != -MAX_INT32) - blob_offset = (float) segs[segment].ycoord; - else if (using_row) - blob_offset = row->base_line (blob_x_centre); - else - blob_offset = line_m * blob_x_centre + line_c; - blob->move (FCOORD (-antidote.origin (), -blob_offset)); - blob-> - scale (FCOORD (antidote.scale (), segs[segment].scale_factor)); - blob->move (FCOORD (0.0, bln_baseline_offset)); - } - if (line.count () > 0 || x_count > 1) - antidote = DENORM (antidote.origin (), antidote.scale (), - line_m, line_c, segments, segs, using_row, row); - delete[]segs; + if (numeric_mode) { + baseline_normalize_numerals(word_box, (PBLOB_LIST *)&cblobs, &antidote, + TRUE); + baseline_normalize_numerals(word_box, (PBLOB_LIST *)&rej_cblobs, &antidote, + FALSE); + } else { + baseline_normalize_text(antidote, (PBLOB_LIST *)&cblobs); + baseline_normalize_text(antidote, (PBLOB_LIST *)&rej_cblobs); } if (denorm != NULL) *denorm = antidote; - //it's normalised - flags.set_bit (W_NORMALIZED, TRUE); + + flags.set_bit(W_NORMALIZED, TRUE); } /** - * WERD::baseline_denormalise + * WERD::baseline_denormalize * - * Baseline DeNormalise the word in Tesseract style. (I.e origin at centre of - * word at bottom. x-height region scaled to region y = - * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height) - * - usually 64..192) + * Return a normalized word to its original coordinates. */ -void WERD::baseline_denormalise( // Tess style BL Norm - const DENORM *denorm //< antidote - ) { - PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); - // blob iterator +void WERD::baseline_denormalize(const DENORM *antidote) { + PBLOB_IT blob_it((PBLOB_LIST *)&cblobs); PBLOB *blob; - if (!flags.bit (W_NORMALIZED)) { - WRONG_WORD.error ("WERD::baseline_denormalise", ABORT, - "Baseline normalised"); + if (!flags.bit(W_NORMALIZED)) { + WRONG_WORD.error("WERD::baseline_denormalize", ABORT, + "Baseline normalized"); } - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - //denormalise it - blob->baseline_denormalise (denorm); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + blob->baseline_denormalise(antidote); } - //Repeat for rej blobs - blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - blob = blob_it.data (); - //denormalise it - blob->baseline_denormalise (denorm); + // Repeat for rejected blobs + blob_it.set_to_list((PBLOB_LIST *)&rej_cblobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + blob->baseline_denormalise(antidote); } - //it's not normalised - flags.set_bit (W_NORMALIZED, FALSE); + flags.set_bit(W_NORMALIZED, FALSE); } @@ -750,31 +562,28 @@ void WERD::baseline_denormalise( // Tess style BL Norm * Display members */ -void WERD::print( //print - FILE * //< file to print on - ) { - tprintf ("Blanks= %d\n", blanks); - bounding_box ().print (); - tprintf ("Flags = %d = 0%o\n", flags.val, flags.val); - tprintf (" W_SEGMENTED = %s\n", - flags.bit (W_SEGMENTED) ? "TRUE" : "FALSE "); - tprintf (" W_ITALIC = %s\n", flags.bit (W_ITALIC) ? "TRUE" : "FALSE "); - tprintf (" W_BOL = %s\n", flags.bit (W_BOL) ? "TRUE" : "FALSE "); - tprintf (" W_EOL = %s\n", flags.bit (W_EOL) ? "TRUE" : "FALSE "); - tprintf (" W_NORMALIZED = %s\n", - flags.bit (W_NORMALIZED) ? "TRUE" : "FALSE "); - tprintf (" W_POLYGON = %s\n", flags.bit (W_POLYGON) ? "TRUE" : "FALSE "); - tprintf (" W_LINEARC = %s\n", flags.bit (W_LINEARC) ? "TRUE" : "FALSE "); - tprintf (" W_DONT_CHOP = %s\n", - flags.bit (W_DONT_CHOP) ? "TRUE" : "FALSE "); - tprintf (" W_REP_CHAR = %s\n", - flags.bit (W_REP_CHAR) ? "TRUE" : "FALSE "); - tprintf (" W_FUZZY_SP = %s\n", - flags.bit (W_FUZZY_SP) ? "TRUE" : "FALSE "); - tprintf (" W_FUZZY_NON = %s\n", - flags.bit (W_FUZZY_NON) ? "TRUE" : "FALSE "); - tprintf ("Correct= %s\n", correct.string ()); - tprintf ("Rejected cblob count = %d\n", rej_cblobs.length ()); +void WERD::print() { + tprintf("Blanks= %d\n", blanks); + bounding_box().print(); + tprintf("Flags = %d = 0%o\n", flags.val, flags.val); + tprintf(" W_SEGMENTED = %s\n", flags.bit(W_SEGMENTED) ? "TRUE" : "FALSE "); + tprintf(" W_ITALIC = %s\n", flags.bit(W_ITALIC) ? "TRUE" : "FALSE "); + tprintf(" W_BOL = %s\n", flags.bit(W_BOL) ? "TRUE" : "FALSE "); + tprintf(" W_EOL = %s\n", flags.bit(W_EOL) ? "TRUE" : "FALSE "); + tprintf(" W_NORMALIZED = %s\n", + flags.bit(W_NORMALIZED) ? "TRUE" : "FALSE "); + tprintf(" W_POLYGON = %s\n", flags.bit(W_POLYGON) ? "TRUE" : "FALSE "); + tprintf(" W_SCRIPT_HAS_XHEIGHT = %s\n", + flags.bit(W_SCRIPT_HAS_XHEIGHT) ? "TRUE" : "FALSE "); + tprintf(" W_SCRIPT_IS_LATIN = %s\n", + flags.bit(W_SCRIPT_IS_LATIN) ? "TRUE" : "FALSE "); + tprintf(" W_DONT_CHOP = %s\n", flags.bit(W_DONT_CHOP) ? "TRUE" : "FALSE "); + tprintf(" W_REP_CHAR = %s\n", flags.bit(W_REP_CHAR) ? "TRUE" : "FALSE "); + tprintf(" W_FUZZY_SP = %s\n", flags.bit(W_FUZZY_SP) ? "TRUE" : "FALSE "); + tprintf(" W_FUZZY_NON = %s\n", flags.bit(W_FUZZY_NON) ? "TRUE" : "FALSE "); + tprintf("Correct= %s\n", correct.string()); + tprintf("Rejected cblob count = %d\n", rej_cblobs.length()); + tprintf("Script = %d\n", script_id_); } @@ -785,86 +594,55 @@ void WERD::print( //print */ #ifndef GRAPHICS_DISABLED -void WERD::plot( //draw it - ScrollView* window, //window to draw in - ScrollView::Color colour, //colour to draw in - BOOL8 solid //draw larcs solid - ) { - if (flags.bit (W_POLYGON)) { - //polygons - PBLOB_IT it = (PBLOB_LIST *) (&cblobs); - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - it.data ()->plot (window, colour, colour); +void WERD::plot(ScrollView *window, ScrollView::Color colour) { + if (flags.bit(W_POLYGON)) { // polygon blobs + PBLOB_IT it = (PBLOB_LIST *)(&cblobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->plot(window, colour, colour); } - } - // else if (flags.bit(W_LINEARC)) - // { - // LARC_BLOB_IT it=(LARC_BLOB_LIST*)(&cblobs); - - // for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() ) - // { - // it.data()->plot(window,solid,colour,solid ? BLACK : colour); - // } - // } - else { - C_BLOB_IT it = &cblobs; //blobs of WERD - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - it.data ()->plot (window, colour, colour); + } else { // chain code blobs + C_BLOB_IT it = &cblobs; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->plot(window, colour, colour); } } - plot_rej_blobs(window, solid); + plot_rej_blobs(window); } #endif +#ifndef GRAPHICS_DISABLED + +// Get the next color in the (looping) rainbow. +ScrollView::Color WERD::NextColor(ScrollView::Color colour) { + ScrollView::Color next = static_cast(colour + 1); + if (next >= LAST_COLOUR || next < FIRST_COLOUR) + next = FIRST_COLOUR; + return next; +} + /** * WERD::plot * - * Draw the WERD in rainbow colours. + * Draw the WERD in rainbow colours in window. */ -#ifndef GRAPHICS_DISABLED -void WERD::plot( //draw it - ScrollView* window, //< window to draw in - BOOL8 solid //< draw larcs solid - ) { - ScrollView::Color colour = FIRST_COLOUR; //current colour - if (flags.bit (W_POLYGON)) { - //polygons - PBLOB_IT it = (PBLOB_LIST *) (&cblobs); - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - it.data ()->plot (window, colour, CHILD_COLOUR); - colour = (ScrollView::Color) (colour + 1); - if (colour == LAST_COLOUR) - colour = FIRST_COLOUR; //cycle round +void WERD::plot(ScrollView* window) { + ScrollView::Color colour = FIRST_COLOUR; + if (flags.bit(W_POLYGON)) { + PBLOB_IT it = (PBLOB_LIST *)(&cblobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->plot(window, colour, CHILD_COLOUR); + colour = NextColor(colour); } - } - // else if (flags.bit(W_LINEARC)) - // { - // LARC_BLOB_IT it=(LARC_BLOB_LIST*)(&cblobs); - - // for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() ) - // { - // it.data()->plot(window,solid,colour,solid ? BLACK : CHILD_COLOUR); - // colour=(COLOUR)(colour+1); - // if (colour==LAST_COLOUR) - // colour=FIRST_COLOUR; - // } - // } - else { - C_BLOB_IT it = &cblobs; //blobs of WERD - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - it.data ()->plot (window, colour, CHILD_COLOUR); - colour = (ScrollView::Color) (colour + 1); - if (colour == LAST_COLOUR) - colour = FIRST_COLOUR; //cycle round + } else { + C_BLOB_IT it = &cblobs; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->plot(window, colour, CHILD_COLOUR); + colour = NextColor(colour); } } - plot_rej_blobs(window, solid); + plot_rej_blobs(window); } #endif @@ -872,26 +650,20 @@ void WERD::plot( //draw it /** * WERD::plot_rej_blobs * - * Draw the WERD rejected blobs - ALWAYS GREY + * Draw the WERD rejected blobs in window - ALWAYS GREY */ #ifndef GRAPHICS_DISABLED -void WERD::plot_rej_blobs( //draw it - ScrollView* window, //< window to draw in - BOOL8 solid //< draw larcs solid - ) { - if (flags.bit (W_POLYGON)) { - PBLOB_IT it = (PBLOB_LIST *) (&rej_cblobs); - //polygons - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY); +void WERD::plot_rej_blobs(ScrollView *window) { + if (flags.bit(W_POLYGON)) { + PBLOB_IT it = (PBLOB_LIST *)(&rej_cblobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->plot(window, ScrollView::GREY, ScrollView::GREY); } } else { - C_BLOB_IT it = &rej_cblobs; //blobs of WERD - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY); + C_BLOB_IT it = &rej_cblobs; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->plot(window, ScrollView::GREY, ScrollView::GREY); } } } @@ -904,7 +676,7 @@ void WERD::plot_rej_blobs( //draw it * Make a shallow copy of a word */ -WERD *WERD::shallow_copy() { //shallow copy +WERD *WERD::shallow_copy() { WERD *new_word = new WERD; new_word->blanks = blanks; @@ -921,15 +693,14 @@ WERD *WERD::shallow_copy() { //shallow copy * Assign a word, DEEP copying the blob list */ -WERD & WERD::operator= ( //assign words -const WERD & source //from this -) { - this->ELIST_LINK::operator= (source); +WERD & WERD::operator= (const WERD & source) { + this->ELIST2_LINK::operator= (source); blanks = source.blanks; flags = source.flags; + script_id_ = source.script_id_; dummy = source.dummy; correct = source.correct; - if (flags.bit (W_POLYGON)) { + if (flags.bit(W_POLYGON)) { if (!cblobs.empty()) reinterpret_cast(&cblobs)->clear(); reinterpret_cast(&cblobs)->deep_copy( @@ -941,12 +712,12 @@ const WERD & source //from this reinterpret_cast(&source.rej_cblobs), &PBLOB::deep_copy); } else { - if (!cblobs.empty ()) - cblobs.clear (); + if (!cblobs.empty()) + cblobs.clear(); cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy); - if (!rej_cblobs.empty ()) - rej_cblobs.clear (); + if (!rej_cblobs.empty()) + rej_cblobs.clear(); rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy); } return *this; @@ -960,14 +731,107 @@ const WERD & source //from this * order of left edge. */ -int word_comparator( //sort blobs - const void *word1p, //< ptr to ptr to word1 - const void *word2p //< ptr to ptr to word2 - ) { - WERD * - word1 = *(WERD **) word1p; - WERD * - word2 = *(WERD **) word2p; +int word_comparator(const void *word1p, const void *word2p) { + WERD *word1 = *(WERD **)word1p; + WERD *word2 = *(WERD **)word2p; + return word1->bounding_box().left() - word2->bounding_box().left(); +} - return word1->bounding_box ().left () - word2->bounding_box ().left (); +/** + * WERD::ConstructWerdWithNewBlobs() + * + * This method returns a new werd constructed using the blobs in the input + * all_blobs list, which correspond to the blobs in this werd object. The + * blobs used to construct the new word are consumed and removed from the + * input all_blobs list. + * Returns NULL if the word couldn't be constructed. + * Returns original blobs for which no matches were found in the output list + * orphan_blobs (appends). + */ + +WERD* WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs, + C_BLOB_LIST* orphan_blobs) { + C_BLOB_LIST current_blob_list; + C_BLOB_IT werd_blobs_it(¤t_blob_list); + // Add the word's c_blobs. + werd_blobs_it.add_list_after(cblob_list()); + + // New blob list. These contain the blobs which will form the new word. + C_BLOB_LIST new_werd_blobs; + C_BLOB_IT new_blobs_it(&new_werd_blobs); + + // not_found_blobs contains the list of current word's blobs for which a + // corresponding blob wasn't found in the input all_blobs list. + C_BLOB_LIST not_found_blobs; + C_BLOB_IT not_found_it(¬_found_blobs); + not_found_it.move_to_last(); + + werd_blobs_it.move_to_first(); + for (werd_blobs_it.mark_cycle_pt(); !werd_blobs_it.cycled_list(); + werd_blobs_it.forward()) { + C_BLOB* werd_blob = werd_blobs_it.extract(); + TBOX werd_blob_box = werd_blob->bounding_box(); + bool found = false; + // Now find the corresponding blob for this blob in the all_blobs + // list. For now, follow the inefficient method of pairwise + // comparisons. Ideally, one can pre-bucket the blobs by row. + C_BLOB_IT all_blobs_it(all_blobs); + for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); + all_blobs_it.forward()) { + C_BLOB* a_blob = all_blobs_it.data(); + // Compute the overlap of the two blobs. If major, a_blob should + // be added to the new blobs list. + TBOX a_blob_box = a_blob->bounding_box(); + if (a_blob_box.null_box()) { + tprintf("Bounding box couldn't be ascertained\n"); + } + if (werd_blob_box.contains(a_blob_box) || + werd_blob_box.major_overlap(a_blob_box)) { + // Old blobs are from minimal splits, therefore are expected to be + // bigger. The new small blobs should cover a significant portion. + // This is it. + all_blobs_it.extract(); + new_blobs_it.add_after_then_move(a_blob); + found = true; + } + } + if (!found) { + not_found_it.add_after_then_move(werd_blob); + } + } + // Iterate over all not found blobs. Some of them may be due to + // under-segmentation (which is OK, since the corresponding blob is already + // in the list in that case. + not_found_it.move_to_first(); + for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); + not_found_it.forward()) { + C_BLOB* not_found = not_found_it.data(); + TBOX not_found_box = not_found->bounding_box(); + bool found = false; + C_BLOB_IT existing_blobs_it(new_blobs_it); + for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list(); + existing_blobs_it.forward()) { + C_BLOB* a_blob = existing_blobs_it.data(); + TBOX a_blob_box = a_blob->bounding_box(); + if ((not_found_box.major_overlap(a_blob_box) || + a_blob_box.major_overlap(not_found_box)) && + not_found_box.y_overlap(a_blob_box) > 0.8) { + // Already taken care of. + found = true; + not_found_it.extract(); + } + } + } + if (orphan_blobs) { + C_BLOB_IT orphan_blobs_it(orphan_blobs); + orphan_blobs_it.move_to_last(); + orphan_blobs_it.add_list_after(¬_found_blobs); + } + + // New blobs are ready. Create a new werd object with these. + WERD* new_werd = NULL; + if (!new_werd_blobs.empty()) { + new_werd = new WERD(&new_werd_blobs, this); + } + return new_werd; } diff --git a/ccstruct/werd.h b/ccstruct/werd.h index eff12267a8..47b3248f56 100644 --- a/ccstruct/werd.h +++ b/ccstruct/werd.h @@ -20,13 +20,12 @@ #ifndef WERD_H #define WERD_H -#include "varable.h" +#include "params.h" #include "bits16.h" #include "strngs.h" #include "blckerr.h" #include "stepblob.h" #include "polyblob.h" -//#include "larcblob.h" enum WERD_FLAGS { @@ -37,7 +36,8 @@ enum WERD_FLAGS W_EOL, //< end of line W_NORMALIZED, //< flags W_POLYGON, //< approximation - W_LINEARC, //< linearc approx + W_SCRIPT_HAS_XHEIGHT, //< x-height concept makes sense. + W_SCRIPT_IS_LATIN, //< Special case latin for y. splitting. W_DONT_CHOP, //< fixed pitch chopped W_REP_CHAR, //< repeated character W_FUZZY_SP, //< fuzzy space @@ -57,221 +57,185 @@ enum DISPLAY_FLAGS class ROW; //forward decl -class WERD:public ELIST_LINK -{ +class WERD : public ELIST2_LINK { public: - WERD() { - } //empty constructor - WERD( //constructor - C_BLOB_LIST *blob_list, //blobs in word - uinT8 blanks, //blanks in front - const char *text); //correct text - WERD( //constructor - PBLOB_LIST *blob_list, //blobs in word - uinT8 blanks, //blanks in front - const char *text); //correct text - WERD( //constructor - PBLOB_LIST *blob_list, //blobs in word - WERD *clone); //use these flags etc. - WERD( //constructor - C_BLOB_LIST *blob_list, //blobs in word - WERD *clone); //use these flags etc. - ~WERD () { //destructor - if (flags.bit (W_POLYGON)) { - //use right destructor - ((PBLOB_LIST *) & cblobs)->clear (); - //use right destructor - ((PBLOB_LIST *) & rej_cblobs)->clear (); + WERD() {} + // WERD constructed with: + // blob_list - blobs of the word (we take this list's contents) + // blanks - number of blanks before the word + // text - correct text (outlives WERD) + WERD(C_BLOB_LIST *blob_list, uinT8 blanks, const char *text); + WERD(PBLOB_LIST *blob_list, uinT8 blanks, const char *text); + + // WERD constructed from: + // blob_list - blobs in the word + // clone - werd to clone flags, etc from. + WERD(PBLOB_LIST *blob_list, WERD *clone); + WERD(C_BLOB_LIST *blob_list, WERD *clone); + + // Construct a WERD from a single_blob and clone the flags from this. + // W_BOL and W_EOL flags are set according to the given values. + WERD* ConstructFromSingleBlob(bool bol, bool eol, C_BLOB* blob); + + ~WERD() { + if (flags.bit(W_POLYGON)) { + // use right destructor for PBLOBs + ((PBLOB_LIST *) &cblobs)->clear(); + ((PBLOB_LIST *) &rej_cblobs)->clear(); } - // else if (flags.bit(W_LINEARC)) - // ((LARC_BLOB_LIST*)&cblobs)->clear(); //use right destructor } - WERD *poly_copy( //make copy as poly - float xheight); //row xheight - WERD *larc_copy( //make copy as larc - float xheight); //row xheight + // assignment + WERD & operator= (const WERD &source); + + // This method returns a new werd constructed using the blobs in the input + // all_blobs list, which correspond to the blobs in this werd object. The + // blobs used to construct the new word are consumed and removed from the + // input all_blobs list. + // Returns NULL if the word couldn't be constructed. + // Returns original blobs for which no matches were found in the output list + // orphan_blobs (appends). + WERD *ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, + C_BLOB_LIST *orphan_blobs); + + WERD *poly_copy(); // make a copy - //get DUFF compact blobs - C_BLOB_LIST *rej_cblob_list() { - if (flags.bit (W_POLYGON)) - WRONG_WORD.error ("WERD::rej_cblob_list", ABORT, NULL); + // Accessors for reject / DUFF blobs in various formats + C_BLOB_LIST *rej_cblob_list() { // compact format + if (flags.bit(W_POLYGON)) + WRONG_WORD.error("WERD::rej_cblob_list", ABORT, NULL); return &rej_cblobs; } - - //get DUFF poly blobs - PBLOB_LIST *rej_blob_list() { - if (!flags.bit (W_POLYGON)) - WRONG_WORD.error ("WERD::rej_blob_list", ABORT, NULL); - return (PBLOB_LIST *) (&rej_cblobs); + PBLOB_LIST *rej_blob_list() { // poly format + if (!flags.bit(W_POLYGON)) + WRONG_WORD.error("WERD::rej_blob_list", ABORT, NULL); + return (PBLOB_LIST *)(&rej_cblobs); } - C_BLOB_LIST *cblob_list() { //get compact blobs - if (flags.bit (W_POLYGON) || flags.bit (W_LINEARC)) - WRONG_WORD.error ("WERD::cblob_list", ABORT, NULL); + // Accessors for good blobs in various formats. + C_BLOB_LIST *cblob_list() { // get compact blobs + if (flags.bit(W_POLYGON)) + WRONG_WORD.error("WERD::cblob_list", ABORT, NULL); return &cblobs; } - PBLOB_LIST *blob_list() { //get poly blobs - if (!flags.bit (W_POLYGON)) - WRONG_WORD.error ("WERD::blob_list", ABORT, NULL); - //make it right type - return (PBLOB_LIST *) (&cblobs); + PBLOB_LIST *blob_list() { // get poly blobs + if (!flags.bit(W_POLYGON)) + WRONG_WORD.error("WERD::blob_list", ABORT, NULL); + return (PBLOB_LIST *)(&cblobs); } - // LARC_BLOB_LIST *larc_blob_list() //get poly blobs - // { - // if (!flags.bit(W_LINEARC)) - // WRONG_WORD.error("WERD::larc_blob_list",ABORT,NULL); - // return (LARC_BLOB_LIST*)(&cblobs); //make it right type - // } - PBLOB_LIST *gblob_list() { //get generic blobs - //make it right type - return (PBLOB_LIST *) (&cblobs); + PBLOB_LIST *gblob_list() { // get generic blobs + return (PBLOB_LIST *)(&cblobs); } - const char *text() const { //correct text - return correct.string (); - } - uinT8 space() { //access function + uinT8 space() { // access function return blanks; } - void set_blanks( //set blanks - uinT8 new_blanks) { + void set_blanks(uinT8 new_blanks) { blanks = new_blanks; } - - void set_text( //replace correct text - const char *new_text) { //with this - correct = new_text; + int script_id() const { + return script_id_; + } + void set_script_id(int id) { + script_id_ = id; } - TBOX bounding_box(); //compute bounding box + TBOX bounding_box(); // compute bounding box - BOOL8 flag( //test flag - WERD_FLAGS mask) const { //flag to test - return flags.bit (mask); - } - void set_flag( //set flag value - WERD_FLAGS mask, //flag to test - BOOL8 value) { //value to set - flags.set_bit (mask, value); - } + const char *text() const { return correct.string(); } + void set_text(const char *new_text) { correct = new_text; } - BOOL8 display_flag( //test display flag - uinT8 flag) const { //flag to test - return disp_flags.bit (flag); - } + BOOL8 flag(WERD_FLAGS mask) const { return flags.bit(mask); } + void set_flag(WERD_FLAGS mask, BOOL8 value) { flags.set_bit(mask, value); } - void set_display_flag( //set display flag - uinT8 flag, //flag to set - BOOL8 value) { //value to set - disp_flags.set_bit (flag, value); + BOOL8 display_flag(uinT8 flag) const { return disp_flags.bit(flag); } + void set_display_flag(uinT8 flag, BOOL8 value) { + disp_flags.set_bit(flag, value); } - WERD *shallow_copy(); //shallow copy word - - void move( // reposition word - const ICOORD vec); // by vector + WERD *shallow_copy(); // shallow copy word - void scale( // scale word - const float vec); // by multiplier + // reposition word by vector + void move(const ICOORD vec); - void join_on( //append word - WERD *&other); //Deleting other + // scale word by multiplier + void scale(const float f); - void copy_on( //copy blobs - WERD *&other); //from other + // join other's blobs onto this werd, emptying out other. + void join_on(WERD* other); - void baseline_normalise ( // Tess style BL Norm - //optional antidote - ROW * row, DENORM * denorm = NULL); + // copy other's blobs onto this word, leaving other intact. + void copy_on(WERD* other); - void baseline_normalise_x ( //Use non standard xht - ROW * row, float x_height, //Weird value to use - DENORM * denorm = NULL); //optional antidote + // Normalize a word to tesseract coordinates, (x centered at 0, y between + // (bln_baseline_offset)..(bln_baseline_offset + bln_x_height) + // - usually 64..192) + // Optionally return an antidote (denorm) to undo this normalization. + // If xheight is given, we use that instead of row's xheight. + void baseline_normalize(ROW *row, DENORM *denorm, bool numeric_mode); + void baseline_normalize_x(ROW *row, float x_height, + DENORM *denorm, bool numeric_mode); - void baseline_denormalise( //un-normalise - const DENORM *denorm); + // return word to original coordinates + void baseline_denormalize(const DENORM *antidote); - void print( //print - FILE *fp); //file to print on + // tprintf word metadata (but not blob innards) + void print(); - void plot ( //draw one - ScrollView* window, //window to draw in - //uniform colour - ScrollView::Color colour, BOOL8 solid = FALSE); + // plot word on window in a uniform colour + void plot(ScrollView *window, ScrollView::Color colour); - void plot ( //draw one - //in rainbow colours - ScrollView* window, BOOL8 solid = FALSE); + // Get the next color in the (looping) rainbow. + static ScrollView::Color NextColor(ScrollView::Color colour); - void plot_rej_blobs ( //draw one - //in rainbow colours - ScrollView* window, BOOL8 solid = FALSE); + // plot word on window in a rainbow of colours + void plot(ScrollView *window); - WERD & operator= ( //assign words - const WERD & source); //from this + // plot rejected blobs in a rainbow of colours + void plot_rej_blobs(ScrollView *window); - void prep_serialise() { //set ptrs to counts - correct.prep_serialise (); - if (flags.bit (W_POLYGON)) - ((PBLOB_LIST *) (&cblobs))->prep_serialise (); - // else if (flags.bit(W_LINEARC)) - // ((LARC_BLOB_LIST*)(&cblobs))->prep_serialise(); + void prep_serialise() { // set ptrs to counts + correct.prep_serialise(); + if (flags.bit(W_POLYGON)) + ((PBLOB_LIST *)(&cblobs))->prep_serialise(); else - cblobs.prep_serialise (); - rej_cblobs.prep_serialise (); + cblobs.prep_serialise(); + rej_cblobs.prep_serialise(); } - void dump( //write external bits - FILE *f) { - correct.dump (f); - if (flags.bit (W_POLYGON)) - ((PBLOB_LIST *) (&cblobs))->dump (f); - // else if (flags.bit(W_LINEARC)) - // ((LARC_BLOB_LIST*)(&cblobs))->dump( f ); + // write external bits + void dump(FILE *f) { + correct.dump(f); + if (flags.bit(W_POLYGON)) + ((PBLOB_LIST *)(&cblobs))->dump(f); else - cblobs.dump (f); - rej_cblobs.dump (f); + cblobs.dump(f); + rej_cblobs.dump(f); } - void de_dump( //read external bits - FILE *f) { - correct.de_dump (f); - if (flags.bit (W_POLYGON)) - ((PBLOB_LIST *) (&cblobs))->de_dump (f); - // else if (flags.bit(W_LINEARC)) - // ((LARC_BLOB_LIST*)(&cblobs))->de_dump( f ); + // read external bits + void de_dump(FILE *f) { + correct.de_dump(f); + if (flags.bit(W_POLYGON)) + ((PBLOB_LIST *)(&cblobs))->de_dump(f); else - cblobs.de_dump (f); - rej_cblobs.de_dump (f); + cblobs.de_dump(f); + rej_cblobs.de_dump(f); } make_serialise (WERD) private: - uinT8 blanks; //no of blanks - uinT8 dummy; //padding - BITS16 flags; //flags about word - BITS16 disp_flags; //display flags - inT16 dummy2; //padding - STRING correct; //correct text - C_BLOB_LIST cblobs; //compacted blobs - C_BLOB_LIST rej_cblobs; //DUFF blobs + uinT8 blanks; // no of blanks + uinT8 dummy; // padding + BITS16 flags; // flags about word + BITS16 disp_flags; // display flags + inT16 script_id_; // From unicharset. + STRING correct; // correct text + C_BLOB_LIST cblobs; // compacted blobs + C_BLOB_LIST rej_cblobs; // DUFF blobs }; -ELISTIZEH_S (WERD) -#include "ocrrow.h" //placed here due to -extern BOOL_VAR_H (bln_numericmode, 0, "Optimize for numbers"); -extern INT_VAR_H (bln_x_height, 128, "Baseline Normalisation X-height"); -extern INT_VAR_H (bln_baseline_offset, 64, -"Baseline Norm. offset of baseline"); -//void poly_linearc_outlines( //do list of outlines -//LARC_OUTLINE_LIST *srclist, //list to convert -//OUTLINE_LIST *destlist //desstination list -//); -//OUTLINE *poly_larcline( //draw it -//LARC_OUTLINE *srcline //one to approximate -//); -int word_comparator( //sort blobs - const void *word1p, //ptr to ptr to word1 - const void *word2p //ptr to ptr to word2 - ); +ELIST2IZEH_S (WERD) +#include "ocrrow.h" // placed here due to +// compare words by increasing order of left edge, suitable for qsort(3) +int word_comparator(const void *word1p, const void *word2p); #endif diff --git a/ccutil/Makefile.am b/ccutil/Makefile.am index 0d4c289062..31cde300b3 100644 --- a/ccutil/Makefile.am +++ b/ccutil/Makefile.am @@ -1,21 +1,21 @@ SUBDIRS = AM_CXXFLAGS = -DTESSDATA_PREFIX=@datadir@/ -EXTRA_DIST = ccutil.vcproj mfcpch.cpp scanutils.cpp scanutils.h +EXTRA_DIST = mfcpch.cpp scanutils.cpp scanutils.h include_HEADERS = \ ambigs.h basedir.h bits16.h boxread.h \ - callback.h ccutil.h clst.h \ + tesscallback.h ccutil.h clst.h \ debugwin.h elst2.h elst.h errcode.h \ fileerr.h genericvector.h globaloc.h \ hashfn.h helpers.h host.h hosthplb.h lsterr.h \ - mainblk.h memblk.h memry.h memryerr.h mfcpch.h \ + memblk.h memry.h memryerr.h mfcpch.h \ ndminx.h notdll.h nwmain.h \ - ocrclass.h ocrshell.h platform.h qrsequence.h \ - secname.h serialis.h stderr.h strngs.h scanutils.h \ - tessclas.h tessdatamanager.h tessopt.h tordvars.h tprintf.h \ + ocrclass.h platform.h qrsequence.h \ + secname.h serialis.h sorthelper.h stderr.h strngs.h \ + tessdatamanager.h tprintf.h \ unichar.h unicharmap.h unicharset.h unicity_table.h \ - varable.h + params.h lib_LTLIBRARIES = libtesseract_ccutil.la libtesseract_ccutil_la_SOURCES = \ @@ -23,10 +23,10 @@ libtesseract_ccutil_la_SOURCES = \ ccutil.cpp clst.cpp debugwin.cpp \ elst2.cpp elst.cpp errcode.cpp \ globaloc.cpp hashfn.cpp \ - mainblk.cpp memblk.cpp memry.cpp ocrshell.cpp \ - serialis.cpp strngs.cpp scanutils.cpp\ - tessdatamanager.cpp tessopt.cpp tordvars.cpp tprintf.cpp \ + mainblk.cpp memblk.cpp memry.cpp \ + serialis.cpp strngs.cpp \ + tessdatamanager.cpp tprintf.cpp \ unichar.cpp unicharmap.cpp unicharset.cpp \ - varable.cpp + params.cpp libtesseract_ccutil_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) diff --git a/ccutil/Makefile.in b/ccutil/Makefile.in index 16a4858278..24da53b9d9 100644 --- a/ccutil/Makefile.in +++ b/ccutil/Makefile.in @@ -74,9 +74,8 @@ libtesseract_ccutil_la_LIBADD = am_libtesseract_ccutil_la_OBJECTS = ambigs.lo basedir.lo bits16.lo \ boxread.lo ccutil.lo clst.lo debugwin.lo elst2.lo elst.lo \ errcode.lo globaloc.lo hashfn.lo mainblk.lo memblk.lo memry.lo \ - ocrshell.lo serialis.lo strngs.lo scanutils.lo \ - tessdatamanager.lo tessopt.lo tordvars.lo tprintf.lo \ - unichar.lo unicharmap.lo unicharset.lo varable.lo + serialis.lo strngs.lo tessdatamanager.lo tprintf.lo unichar.lo \ + unicharmap.lo unicharset.lo params.lo libtesseract_ccutil_la_OBJECTS = $(am_libtesseract_ccutil_la_OBJECTS) libtesseract_ccutil_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ @@ -249,7 +248,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -267,20 +265,20 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = AM_CXXFLAGS = -DTESSDATA_PREFIX=@datadir@/ -EXTRA_DIST = ccutil.vcproj mfcpch.cpp scanutils.cpp scanutils.h +EXTRA_DIST = mfcpch.cpp scanutils.cpp scanutils.h include_HEADERS = \ ambigs.h basedir.h bits16.h boxread.h \ - callback.h ccutil.h clst.h \ + tesscallback.h ccutil.h clst.h \ debugwin.h elst2.h elst.h errcode.h \ fileerr.h genericvector.h globaloc.h \ hashfn.h helpers.h host.h hosthplb.h lsterr.h \ - mainblk.h memblk.h memry.h memryerr.h mfcpch.h \ + memblk.h memry.h memryerr.h mfcpch.h \ ndminx.h notdll.h nwmain.h \ - ocrclass.h ocrshell.h platform.h qrsequence.h \ - secname.h serialis.h stderr.h strngs.h scanutils.h \ - tessclas.h tessdatamanager.h tessopt.h tordvars.h tprintf.h \ + ocrclass.h platform.h qrsequence.h \ + secname.h serialis.h sorthelper.h stderr.h strngs.h \ + tessdatamanager.h tprintf.h \ unichar.h unicharmap.h unicharset.h unicity_table.h \ - varable.h + params.h lib_LTLIBRARIES = libtesseract_ccutil.la libtesseract_ccutil_la_SOURCES = \ @@ -288,11 +286,11 @@ libtesseract_ccutil_la_SOURCES = \ ccutil.cpp clst.cpp debugwin.cpp \ elst2.cpp elst.cpp errcode.cpp \ globaloc.cpp hashfn.cpp \ - mainblk.cpp memblk.cpp memry.cpp ocrshell.cpp \ - serialis.cpp strngs.cpp scanutils.cpp\ - tessdatamanager.cpp tessopt.cpp tordvars.cpp tprintf.cpp \ + mainblk.cpp memblk.cpp memry.cpp \ + serialis.cpp strngs.cpp \ + tessdatamanager.cpp tprintf.cpp \ unichar.cpp unicharmap.cpp unicharset.cpp \ - varable.cpp + params.cpp libtesseract_ccutil_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) all: all-recursive @@ -384,18 +382,14 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mainblk.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memblk.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memry.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ocrshell.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scanutils.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/params.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serialis.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strngs.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tessdatamanager.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tessopt.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tordvars.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tprintf.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unichar.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unicharmap.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unicharset.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/varable.Plo@am__quote@ .cpp.o: @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< diff --git a/ccutil/ambigs.cpp b/ccutil/ambigs.cpp index 637974b70c..c5f93308a0 100644 --- a/ccutil/ambigs.cpp +++ b/ccutil/ambigs.cpp @@ -21,9 +21,9 @@ #include "ambigs.h" #include "helpers.h" -INT_VAR(global_ambigs_debug_level, 0, "Debug level for unichar ambiguities"); -BOOL_VAR(use_definite_ambigs_for_classifier, 0, - "Use definite ambiguities when running character classifier"); +#ifdef WIN32 +#define strtok_r strtok_s +#endif namespace tesseract { @@ -37,15 +37,23 @@ AmbigSpec::AmbigSpec() { ELISTIZE(AmbigSpec); -void UnicharAmbigs::LoadUnicharAmbigs(FILE *AmbigFile, inT64 end_offset, +void UnicharAmbigs::LoadUnicharAmbigs(FILE *AmbigFile, + inT64 end_offset, + int debug_level, + bool use_ambigs_for_adaption, UNICHARSET *unicharset) { - int i; + int i, j; + UnicharIdVector *adaption_ambigs_entry; for (i = 0; i < unicharset->size(); ++i) { replace_ambigs_.push_back(NULL); dang_ambigs_.push_back(NULL); one_to_one_definite_ambigs_.push_back(NULL); + if (use_ambigs_for_adaption) { + ambigs_for_adaption_.push_back(NULL); + reverse_ambigs_for_adaption_.push_back(NULL); + } } - if (global_ambigs_debug_level) tprintf("Reading ambiguities\n"); + if (debug_level) tprintf("Reading ambiguities\n"); int TestAmbigPartSize; int ReplacementAmbigPartSize; @@ -75,10 +83,10 @@ void UnicharAmbigs::LoadUnicharAmbigs(FILE *AmbigFile, inT64 end_offset, while ((end_offset < 0 || ftell(AmbigFile) < end_offset) && fgets(buffer, kBufferSize, AmbigFile) != NULL) { chomp_string(buffer); - if (global_ambigs_debug_level > 2) tprintf("read line %s\n", buffer); + if (debug_level > 2) tprintf("read line %s\n", buffer); ++line_num; - if (!ParseAmbiguityLine(line_num, version, *unicharset, buffer, - &TestAmbigPartSize, TestUnicharIds, + if (!ParseAmbiguityLine(line_num, version, debug_level, *unicharset, + buffer, &TestAmbigPartSize, TestUnicharIds, &ReplacementAmbigPartSize, ReplacementString, &type)) continue; // Construct AmbigSpec and add it to the appropriate AmbigSpec_LIST. @@ -89,7 +97,7 @@ void UnicharAmbigs::LoadUnicharAmbigs(FILE *AmbigFile, inT64 end_offset, ambig_spec, unicharset); // Update one_to_one_definite_ambigs_. - if (use_definite_ambigs_for_classifier && TestAmbigPartSize == 1 && + if (TestAmbigPartSize == 1 && ReplacementAmbigPartSize == 1 && type == DEFINITE_AMBIG) { if (one_to_one_definite_ambigs_[TestUnicharIds[0]] == NULL) { one_to_one_definite_ambigs_[TestUnicharIds[0]] = new UnicharIdVector(); @@ -97,10 +105,56 @@ void UnicharAmbigs::LoadUnicharAmbigs(FILE *AmbigFile, inT64 end_offset, one_to_one_definite_ambigs_[TestUnicharIds[0]]->push_back( ambig_spec->correct_ngram_id); } + // Update ambigs_for_adaption_. + if (use_ambigs_for_adaption) { + for (i = 0; i < TestAmbigPartSize; ++i) { + if (ambigs_for_adaption_[TestUnicharIds[i]] == NULL) { + ambigs_for_adaption_[TestUnicharIds[i]] = new UnicharIdVector(); + } + adaption_ambigs_entry = ambigs_for_adaption_[TestUnicharIds[i]]; + const char *tmp_ptr = ReplacementString; + const char *tmp_ptr_end = ReplacementString + strlen(ReplacementString); + int step = unicharset->step(tmp_ptr); + while (step > 0) { + UNICHAR_ID id_to_insert = unicharset->unichar_to_id(tmp_ptr, step); + ASSERT_HOST(id_to_insert != INVALID_UNICHAR_ID); + // Add the new unichar id to adaption_ambigs_entry (only if the + // vector does not already contain it) keeping it in sorted order. + for (j = 0; j < adaption_ambigs_entry->size() && + (*adaption_ambigs_entry)[j] > id_to_insert; ++j); + if (j < adaption_ambigs_entry->size()) { + if ((*adaption_ambigs_entry)[j] != id_to_insert) { + adaption_ambigs_entry->insert(id_to_insert, j); + } + } else { + adaption_ambigs_entry->push_back(id_to_insert); + } + // Update tmp_ptr and step. + tmp_ptr += step; + step = tmp_ptr < tmp_ptr_end ? unicharset->step(tmp_ptr) : 0; + } + } + } } delete[] buffer; + + // Fill in reverse_ambigs_for_adaption from ambigs_for_adaption vector. + if (use_ambigs_for_adaption) { + for (i = 0; i < ambigs_for_adaption_.size(); ++i) { + adaption_ambigs_entry = ambigs_for_adaption_[i]; + if (adaption_ambigs_entry == NULL) continue; + for (j = 0; j < adaption_ambigs_entry->size(); ++j) { + UNICHAR_ID ambig_id = (*adaption_ambigs_entry)[j]; + if (reverse_ambigs_for_adaption_[ambig_id] == NULL) { + reverse_ambigs_for_adaption_[ambig_id] = new UnicharIdVector(); + } + reverse_ambigs_for_adaption_[ambig_id]->push_back(i); + } + } + } + // Print what was read from the input file. - if (global_ambigs_debug_level > 2) { + if (debug_level > 1) { for (int tbl = 0; tbl < 2; ++tbl) { const UnicharAmbigsVector &print_table = (tbl == 0) ? replace_ambigs_ : dang_ambigs_; @@ -122,11 +176,30 @@ void UnicharAmbigs::LoadUnicharAmbigs(FILE *AmbigFile, inT64 end_offset, } } } + if (use_ambigs_for_adaption) { + for (int vec_id = 0; vec_id < 2; ++vec_id) { + const GenericVector &vec = (vec_id == 0) ? + ambigs_for_adaption_ : reverse_ambigs_for_adaption_; + for (i = 0; i < vec.size(); ++i) { + adaption_ambigs_entry = vec[i]; + if (adaption_ambigs_entry != NULL) { + tprintf("%sAmbigs for adaption for %s:\n", + (vec_id == 0) ? "" : "Reverse ", + unicharset->debug_str(i).string()); + for (j = 0; j < adaption_ambigs_entry->size(); ++j) { + tprintf("%s ", unicharset->debug_str( + (*adaption_ambigs_entry)[j]).string()); + } + tprintf("\n"); + } + } + } + } } } bool UnicharAmbigs::ParseAmbiguityLine( - int line_num, int version, const UNICHARSET &unicharset, + int line_num, int version, int debug_level, const UNICHARSET &unicharset, char *buffer, int *TestAmbigPartSize, UNICHAR_ID *TestUnicharIds, int *ReplacementAmbigPartSize, char *ReplacementString, int *type) { int i; @@ -134,7 +207,7 @@ bool UnicharAmbigs::ParseAmbiguityLine( char *next_token; if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", TestAmbigPartSize) || TestAmbigPartSize <= 0) { - if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num); + if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*TestAmbigPartSize > MAX_AMBIG_SIZE) { @@ -144,7 +217,7 @@ bool UnicharAmbigs::ParseAmbiguityLine( for (i = 0; i < *TestAmbigPartSize; ++i) { if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; if (!unicharset.contains_unichar(token)) { - if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token); + if (debug_level) tprintf(kIllegalUnicharMsg, token); break; } TestUnicharIds[i] = unicharset.unichar_to_id(token); @@ -155,7 +228,7 @@ bool UnicharAmbigs::ParseAmbiguityLine( !(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", ReplacementAmbigPartSize) || *ReplacementAmbigPartSize <= 0) { - if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num); + if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*ReplacementAmbigPartSize > MAX_AMBIG_SIZE) { @@ -167,12 +240,12 @@ bool UnicharAmbigs::ParseAmbiguityLine( if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; strcat(ReplacementString, token); if (!unicharset.contains_unichar(token)) { - if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token); + if (debug_level) tprintf(kIllegalUnicharMsg, token); break; } } if (i != *ReplacementAmbigPartSize) { - if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num); + if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (version > 0) { @@ -187,7 +260,7 @@ bool UnicharAmbigs::ParseAmbiguityLine( // has limited support for ngram unichar (e.g. dawg permuter). if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", type)) { - if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num); + if (debug_level) tprintf(kIllegalMsg, line_num); return false; } } @@ -226,7 +299,7 @@ void UnicharAmbigs::InsertIntoTable( if (ReplacementAmbigPartSize > 1) { unicharset->set_isngram(ambig_spec->correct_ngram_id, true); } - // Add the corresponding fragments of the correct ngram to unicharset. + // Add the corresponding fragments of the wrong ngram to unicharset. int i; for (i = 0; i < TestAmbigPartSize; ++i) { UNICHAR_ID unichar_id; @@ -248,7 +321,7 @@ void UnicharAmbigs::InsertIntoTable( table[TestUnicharIds[0]] = new AmbigSpec_LIST(); } table[TestUnicharIds[0]]->add_sorted( - AmbigSpec::compare_ambig_specs, ambig_spec); + AmbigSpec::compare_ambig_specs, false, ambig_spec); } } // namespace tesseract diff --git a/ccutil/ambigs.h b/ccutil/ambigs.h index e3bde2fd99..a72e751633 100644 --- a/ccutil/ambigs.h +++ b/ccutil/ambigs.h @@ -29,13 +29,10 @@ #define MAX_AMBIG_SIZE 10 -extern INT_VAR_H(global_ambigs_debug_level, 0, - "Debug level for unichar ambiguities"); -extern BOOL_VAR_H(use_definite_ambigs_for_classifier, 0, - "Use definite ambiguities when running character classifier"); - namespace tesseract { +typedef GenericVector UnicharIdVector; + static const int kUnigramAmbigsBufferSize = 1000; static const char kAmbigNgramSeparator[] = { ' ', '\0' }; static const char kAmbigDelimiters[] = "\t "; @@ -75,6 +72,15 @@ class UnicharIdArrayUtils { return *ptr1 == INVALID_UNICHAR_ID ? -1 : 1; } + // Look uid in the vector of uids. If found, the index of the matched + // element is returned. Otherwise, it returns -1. + static inline int find_in(const UnicharIdVector& uid_vec, + const UNICHAR_ID uid) { + for (int i = 0; i < uid_vec.size(); ++i) + if (uid_vec[i] == uid) return i; + return -1; + } + // Copies UNICHAR_IDs from dst to src. Returns the number of ids copied. // The function assumes that the arrays are terminated by INVALID_UNICHAR_ID // and that dst has enough space for all the elements from src. @@ -131,7 +137,6 @@ ELISTIZEH(AmbigSpec); // AMBIG_TABLE[i] stores a set of ambiguities whose // wrong ngram starts with unichar id i. typedef GenericVector UnicharAmbigsVector; -typedef GenericVector UnicharIdVector; class UnicharAmbigs { public: @@ -155,18 +160,39 @@ class UnicharAmbigs { // one_to_one_definite_ambigs_. This vector is also indexed by the class id // of the wrong part of the ambiguity and each entry contains a vector of // unichar ids that are ambiguous to it. - void LoadUnicharAmbigs(FILE *ambigs_file, inT64 end_offset, - UNICHARSET *unicharset); + void LoadUnicharAmbigs(FILE *ambigs_file, inT64 end_offset, int debug_level, + bool use_ambigs_for_adaption, UNICHARSET *unicharset); - // Return definite 1-1 ambigs. - const UnicharIdVector *OneToOneDefiniteAmbigs(UNICHAR_ID unichar_id) const { + // Returns definite 1-1 ambigs for the given unichar id. + inline const UnicharIdVector *OneToOneDefiniteAmbigs( + UNICHAR_ID unichar_id) const { if (one_to_one_definite_ambigs_.empty()) return NULL; return one_to_one_definite_ambigs_[unichar_id]; } + // Returns a pointer to the vector with all unichar ids that appear in the + // 'correct' part of the ambiguity pair when the given unichar id appears + // in the 'wrong' part of the ambiguity. E.g. if DangAmbigs file consist of + // m->rn,rn->m,m->iii, UnicharAmbigsForAdaption() called with unichar id of + // m will return a pointer to a vector with unichar ids of r,n,i. + inline const UnicharIdVector *AmbigsForAdaption( + UNICHAR_ID unichar_id) const { + if (ambigs_for_adaption_.empty()) return NULL; + return ambigs_for_adaption_[unichar_id]; + } + + // Similar to the above, but return the vector of unichar ids for which + // the given unichar_id is an ambiguity (appears in the 'wrong' part of + // some ambiguity pair). + inline const UnicharIdVector *ReverseAmbigsForAdaption( + UNICHAR_ID unichar_id) const { + if (reverse_ambigs_for_adaption_.empty()) return NULL; + return reverse_ambigs_for_adaption_[unichar_id]; + } + private: - bool ParseAmbiguityLine(int line_num, int version, + bool ParseAmbiguityLine(int line_num, int version, int debug_level, const UNICHARSET &unicharset, char *buffer, int *TestAmbigPartSize, UNICHAR_ID *TestUnicharIds, int *ReplacementAmbigPartSize, @@ -179,6 +205,8 @@ class UnicharAmbigs { UnicharAmbigsVector dang_ambigs_; UnicharAmbigsVector replace_ambigs_; GenericVector one_to_one_definite_ambigs_; + GenericVector ambigs_for_adaption_; + GenericVector reverse_ambigs_for_adaption_; }; } // namespace tesseract diff --git a/ccutil/basedir.cpp b/ccutil/basedir.cpp index 9acfcd5eec..9f07803095 100644 --- a/ccutil/basedir.cpp +++ b/ccutil/basedir.cpp @@ -22,17 +22,14 @@ #ifdef __UNIX__ #include #include +#else +#include #endif #include #include "basedir.h" -#include "varable.h" +#include "params.h" #include "notdll.h" //must be last include -#ifdef __MSW32__ -STRING_VAR(tessedit_module_name, "tessdll.dll", - "Module colocated with tessdata dir"); -#endif - /********************************************************************** * getpath * @@ -42,6 +39,7 @@ STRING_VAR(tessedit_module_name, "tessdll.dll", DLLSYM inT8 getpath( //get dir name of code const char *code, //executable to locate + const STRING &dll_module_name, STRING &path //output path name ) { char directory[MAX_PATH]; //main directory @@ -96,7 +94,7 @@ DLLSYM inT8 getpath( //get dir name of code // Attempt to get the path of the most relevant module. If the dll // is being used, this will be the dll. Otherwise GetModuleHandle will // return NULL and default to the path of the executable. - if (GetModuleFileName(GetModuleHandle(tessedit_module_name.string()), + if (GetModuleFileName(GetModuleHandle(dll_module_name.string()), directory, MAX_PATH - 1) == 0) { return -1; } diff --git a/ccutil/basedir.h b/ccutil/basedir.h index 1c8d61c7b9..856e9e6e77 100644 --- a/ccutil/basedir.h +++ b/ccutil/basedir.h @@ -27,6 +27,7 @@ DLLSYM inT8 getpath( //get dir name of code const char *code, //executable to locate + const STRING &dll_module_name, STRING &path //output path name ); #endif diff --git a/ccutil/boxread.cpp b/ccutil/boxread.cpp index c1f9ae845d..fd4b479c94 100644 --- a/ccutil/boxread.cpp +++ b/ccutil/boxread.cpp @@ -23,6 +23,26 @@ #include "unichar.h" #include "tprintf.h" +// Special char code used to identify multi-blob labels. +static const char* kMultiBlobLabelCode = "WordStr"; + +// Open the boxfile based on the given image filename. +FILE* OpenBoxFile(const STRING& fname) { + STRING filename = fname; + const char *lastdot = strrchr(filename.string(), '.'); + if (lastdot != NULL) + filename[lastdot - filename.string()] = '\0'; + + filename += ".box"; + FILE* box_file = NULL; + if (!(box_file = fopen(filename.string(), "r"))) { + CANTOPENFILE.error("read_next_box", TESSEXIT, + "Cant open box file %s", + filename.string()); + } + return box_file; +} + // Box files are used ONLY DURING TRAINING, but by both processes of // creating tr files with tesseract, and unicharset_extractor. // read_next_box factors out the code to interpret a line of a box @@ -33,26 +53,26 @@ // space or tab between fields. // utf8_str must be at least kBoxReadBufSize in length. // If there are page numbers in the file, it reads them all. -bool read_next_box(FILE* box_file, char* utf8_str, +bool read_next_box(int *line_number, FILE* box_file, char* utf8_str, int* x_min, int* y_min, int* x_max, int* y_max) { - return read_next_box(-1, box_file, utf8_str, + return read_next_box(-1, line_number, box_file, utf8_str, x_min, y_min, x_max, y_max); } // As read_next_box above, but get a specific page number. (0-based) // Use -1 to read any page number. Files without page number all // read as if they are page 0. -bool read_next_box(int target_page, FILE* box_file, char* utf8_str, +bool read_next_box(int target_page, int *line_number, + FILE* box_file, char* utf8_str, int* x_min, int* y_min, int* x_max, int* y_max) { - static int line = 0; int count = 0; int page = 0; - char buff[kBoxReadBufSize]; //boxfile read buffer + char buff[kBoxReadBufSize]; // boxfile read buffer char uch[kBoxReadBufSize]; char *buffptr = buff; while (fgets(buff, sizeof(buff) - 1, box_file)) { - line++; + (*line_number)++; buffptr = buff; const unsigned char *ubuf = reinterpret_cast(buffptr); @@ -63,6 +83,9 @@ bool read_next_box(int target_page, FILE* box_file, char* utf8_str, buffptr++; if (*buffptr != '\0') { // Read the unichar without messing up on Tibetan. + // According to issue 253 the utf-8 surrogates 85 and A0 are treated + // as whitespace by sscanf, so it is more reliable to just find + // ascii space and tab. int uch_len = 0; while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t') uch[uch_len++] = *buffptr++; @@ -76,40 +99,40 @@ bool read_next_box(int target_page, FILE* box_file, char* utf8_str, page = 0; count = sscanf(buffptr, "%d %d %d %d", x_min, y_min, x_max, y_max); } else { - tprintf("Box file format error on line %i; ignored\n", line); + tprintf("Box file format error on line %i; ignored\n", *line_number); continue; } } if (target_page >= 0 && target_page != page) continue; // Not on the appropriate page. - if (count >= 4) { - // Validate UTF8 by making unichars with it. - int used = 0; - while (used < uch_len) { - UNICHAR ch(uch + used, uch_len - used); - int new_used = ch.utf8_len(); - if (new_used == 0) { - tprintf("Bad UTF-8 str %s starts with 0x%02x at line %d, col %d\n", - uch + used, uch[used], line, used + 1); - count = 0; - break; - } - used += new_used; - } - if (uch_len > UNICHAR_LEN) { - tprintf("utf-8 string too long at line %d\n", line); + // Test for long space-delimited string label. + if (strcmp(uch, kMultiBlobLabelCode) == 0 && + (buffptr = strchr(buffptr, '#')) != NULL) { + strcpy(uch, buffptr + 1); + chomp_string(uch); + uch_len = strlen(uch); + } + // Validate UTF8 by making unichars with it. + int used = 0; + while (used < uch_len) { + UNICHAR ch(uch + used, uch_len - used); + int new_used = ch.utf8_len(); + if (new_used == 0) { + tprintf("Bad UTF-8 str %s starts with 0x%02x at line %d, col %d\n", + uch + used, uch[used], *line_number, used + 1); count = 0; + break; } + used += new_used; } - if ((count < 5 && target_page > 0) || (count < 4 && target_page <= 0)) { - tprintf("Box file format error on line %i ignored\n", line); + if (count < 4 || used == 0) { + tprintf("Box file format error on line %i; ignored\n", *line_number); } else { - strcpy(utf8_str, uch); + strncpy(utf8_str, uch, kBoxReadBufSize); return true; // Successfully read a box. } } } fclose(box_file); - line = 0; - return false; //EOF + return false; // EOF } diff --git a/ccutil/boxread.h b/ccutil/boxread.h index f00cbd13a5..a326df5abc 100644 --- a/ccutil/boxread.h +++ b/ccutil/boxread.h @@ -21,9 +21,13 @@ #define TESSERACT_CCUTIL_BOXREAD_H__ #include +#include "strngs.h" // Size of buffer used to read a line from a box file. -const int kBoxReadBufSize = 256; +const int kBoxReadBufSize = 1024; + +// Open the boxfile based on the given image filename. +FILE* OpenBoxFile(const STRING& fname); // read_next_box factors out the code to interpret a line of a box // file so that applybox and unicharset_extractor interpret the same way. @@ -33,12 +37,12 @@ const int kBoxReadBufSize = 256; // space or tab between fields. // utf8_str must be at least kBoxReadBufSize in length. // If there are page numbers in the file, it reads them all. -bool read_next_box(FILE* box_file, char* utf8_str, +bool read_next_box(int *line_number, FILE* box_file, char* utf8_str, int* x_min, int* y_min, int* x_max, int* y_max); // As read_next_box above, but get a specific page number. (0-based) // Use -1 to read any page number. Files without page number all // read as if they are page 0. -bool read_next_box(int page, FILE* box_file, char* utf8_str, +bool read_next_box(int page, int *line_number, FILE* box_file, char* utf8_str, int* x_min, int* y_min, int* x_max, int* y_max); #endif // TESSERACT_CCUTIL_BOXREAD_H__ diff --git a/ccutil/ccutil.cpp b/ccutil/ccutil.cpp index 7b064ffc1d..4d5e37c08a 100644 --- a/ccutil/ccutil.cpp +++ b/ccutil/ccutil.cpp @@ -4,15 +4,20 @@ #include "ccutil.h" namespace tesseract { -CCUtil::CCUtil() - : //// mainblk.* ///////////////////////////////////////////////////// - BOOL_MEMBER(m_print_variables, FALSE, - "Print initial values of all variables"), - STRING_MEMBER(m_data_sub_dir, - "tessdata/", "Directory for data files") - //////////////////////////////////////////////////////////////////// - { - +CCUtil::CCUtil() : + params_(), + STRING_INIT_MEMBER(m_data_sub_dir, + "tessdata/", "Directory for data files", ¶ms_), +#ifdef __MSW32__ + STRING_INIT_MEMBER(tessedit_module_name, "tessdll.dll", + "Module colocated with tessdata dir", ¶ms_), +#endif + INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", + ¶ms_), + BOOL_INIT_MEMBER(use_definite_ambigs_for_classifier, 0, "Use definite" + " ambiguities when running character classifier", ¶ms_), + BOOL_INIT_MEMBER(use_ambigs_for_adaption, 0, "Use ambigs for deciding" + " whether to adapt to a character", ¶ms_) { } CCUtil::~CCUtil() { @@ -43,6 +48,5 @@ void CCUtilMutex::Unlock() { #endif } - -CCUtilMutex tprintfMutex; +CCUtilMutex tprintfMutex; // should remain global } // namespace tesseract diff --git a/ccutil/ccutil.h b/ccutil/ccutil.h index 731d419e8c..55744ca23f 100644 --- a/ccutil/ccutil.h +++ b/ccutil/ccutil.h @@ -23,7 +23,7 @@ #include "errcode.h" #include "strngs.h" #include "tessdatamanager.h" -#include "varable.h" +#include "params.h" #include "unicharset.h" #ifdef WIN32 @@ -54,20 +54,18 @@ class CCUtilMutex { class CCUtil { public: CCUtil(); - ~CCUtil(); + virtual ~CCUtil(); public: + // Read the arguments and set up the data path. void main_setup( const char *argv0, // program name const char *basename // name of image ); - public: + ParamsVectors *params() { return ¶ms_; } + STRING datadir; // dir for data files STRING imagebasename; // name of image - - BOOL_VAR_H (m_print_variables, FALSE, - "Print initial values of all variables"); - STRING_VAR_H (m_data_sub_dir, "tessdata/", "Directory for data files"); STRING lang; STRING language_data_path_prefix; TessdataManager tessdata_manager; @@ -75,9 +73,27 @@ class CCUtil { UnicharAmbigs unichar_ambigs; STRING imagefile; // image file name STRING directory; // main directory + + private: + ParamsVectors params_; + + public: + // Member parameters. + // These have to be declared and initialized after params_ member, since + // params_ should be initialized before parameters are added to it. + STRING_VAR_H(m_data_sub_dir, "tessdata/", "Directory for data files"); + #ifdef __MSW32__ + STRING_VAR_H(tessedit_module_name, "tessdll.dll", + "Module colocated with tessdata dir"); + #endif + INT_VAR_H(ambigs_debug_level, 0, "Debug level for unichar ambiguities"); + BOOL_VAR_H(use_definite_ambigs_for_classifier, 0, + "Use definite ambiguities when running character classifier"); + BOOL_VAR_H(use_ambigs_for_adaption, 0, + "Use ambigs for deciding whether to adapt to a character"); }; -extern CCUtilMutex tprintfMutex; +extern CCUtilMutex tprintfMutex; // should remain global } // namespace tesseract #endif // TESSERACT_CCUTIL_CCUTIL_H__ diff --git a/ccutil/clst.cpp b/ccutil/clst.cpp index 3b7590f3bb..48a801bcf7 100644 --- a/ccutil/clst.cpp +++ b/ccutil/clst.cpp @@ -160,8 +160,8 @@ void CLIST::assign_to_sublist( //to this list * Return count of elements on list **********************************************************************/ -inT32 CLIST::length() { //count elements - CLIST_ITERATOR it(this); +inT32 CLIST::length() const { //count elements + CLIST_ITERATOR it(const_cast(this)); inT32 count = 0; #ifndef NDEBUG @@ -169,7 +169,7 @@ inT32 CLIST::length() { //count elements NULL_OBJECT.error ("CLIST::length", ABORT, NULL); #endif - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) count++; return count; } @@ -225,7 +225,8 @@ const void *, const void *)) { // indirection. Time is O(1) to add to beginning or end. // Time is linear to add pre-sorted items to an empty list. // If unique, then don't add duplicate entries. -void CLIST::add_sorted(int comparator(const void*, const void*), +// Returns true if the element was added to the list. +bool CLIST::add_sorted(int comparator(const void*, const void*), bool unique, void* new_data) { // Check for adding at the end. if (last == NULL || comparator(&last->data, &new_data) < 0) { @@ -238,13 +239,14 @@ void CLIST::add_sorted(int comparator(const void*, const void*), last->next = new_element; } last = new_element; + return true; } else if (!unique || last->data != new_data) { // Need to use an iterator. CLIST_ITERATOR it(this); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { void* data = it.data(); if (data == new_data && unique) - return; + return false; if (comparator(&data, &new_data) > 0) break; } @@ -252,6 +254,37 @@ void CLIST::add_sorted(int comparator(const void*, const void*), it.add_to_end(new_data); else it.add_before_then_move(new_data); + return true; + } + return false; +} + +// Assuming that the minuend and subtrahend are already sorted with +// the same comparison function, shallow clears this and then copies +// the set difference minuend - subtrahend to this, being the elements +// of minuend that do not compare equal to anything in subtrahend. +// If unique is true, any duplicates in minuend are also eliminated. +void CLIST::set_subtract(int comparator(const void*, const void*), + bool unique, + CLIST* minuend, CLIST* subtrahend) { + shallow_clear(); + CLIST_ITERATOR m_it(minuend); + CLIST_ITERATOR s_it(subtrahend); + // Since both lists are sorted, finding the subtras that are not + // minus is a case of a parallel iteration. + for (m_it.mark_cycle_pt(); !m_it.cycled_list(); m_it.forward()) { + void* minu = m_it.data(); + void* subtra = NULL; + if (!s_it.empty()) { + subtra = s_it.data(); + while (!s_it.at_last() && + comparator(&subtra, &minu) < 0) { + s_it.forward(); + subtra = s_it.data(); + } + } + if (subtra == NULL || comparator(&subtra, &minu) != 0) + add_sorted(comparator, unique, minu); } } diff --git a/ccutil/clst.h b/ccutil/clst.h index 33697e7550..8b65126f79 100644 --- a/ccutil/clst.h +++ b/ccutil/clst.h @@ -96,12 +96,12 @@ class DLLSYM CLIST void shallow_clear(); //clear list but dont //delete data elements - bool empty() { //is list empty? + bool empty() const { //is list empty? return !last; } - bool singleton() { - return last != NULL ? (last == last->next) : FALSE; + bool singleton() const { + return last != NULL ? (last == last->next) : false; } void shallow_copy( //dangerous!! @@ -117,7 +117,7 @@ class DLLSYM CLIST CLIST_ITERATOR *start_it, //from list start CLIST_ITERATOR *end_it); //from list end - inT32 length(); //# elements in list + inT32 length() const; //# elements in list void sort ( //sort elements int comparator ( //comparison routine @@ -129,9 +129,18 @@ class DLLSYM CLIST // indirection. Time is O(1) to add to beginning or end. // Time is linear to add pre-sorted items to an empty list. // If unique, then don't add duplicate entries. - void add_sorted(int comparator(const void*, const void*), + // Returns true if the element was added to the list. + bool add_sorted(int comparator(const void*, const void*), bool unique, void* new_data); + // Assuming that the minuend and subtrahend are already sorted with + // the same comparison function, shallow clears this and then copies + // the set difference minuend - subtrahend to this, being the elements + // of minuend that do not compare equal to anything in subtrahend. + // If unique is true, any duplicates in minuend are also eliminated. + void set_subtract(int comparator(const void*, const void*), bool unique, + CLIST* minuend, CLIST* subtrahend); + void internal_dump ( //serialise each elem FILE * f, //to this file void element_serialiser ( //using this function @@ -165,13 +174,13 @@ class DLLSYM CLIST_ITERATOR CLIST_LINK *prev; //prev element CLIST_LINK *current; //current element CLIST_LINK *next; //next element - bool ex_current_was_last; //current extracted + BOOL8 ex_current_was_last; //current extracted //was end of list - bool ex_current_was_cycle_pt; //current extracted + BOOL8 ex_current_was_cycle_pt; //current extracted //was cycle point CLIST_LINK *cycle_pt; //point we are cycling //the list to. - bool started_cycling; //Have we moved off + BOOL8 started_cycling; //Have we moved off //the start? CLIST_LINK *extract_sublist( //from this current... @@ -229,7 +238,7 @@ class DLLSYM CLIST_ITERATOR void mark_cycle_pt(); //remember current - bool empty() { //is list empty? + BOOL8 empty() { //is list empty? #ifndef NDEBUG if (!list) NO_LIST.error ("CLIST_ITERATOR::empty", ABORT, NULL); @@ -237,15 +246,15 @@ class DLLSYM CLIST_ITERATOR return list->empty (); } - bool current_extracted() { //current extracted? + BOOL8 current_extracted() { //current extracted? return !current; } - bool at_first(); //Current is first? + BOOL8 at_first(); //Current is first? - bool at_last(); //Current is last? + BOOL8 at_last(); //Current is last? - bool cycled_list(); //Completed a cycle? + BOOL8 cycled_list(); //Completed a cycle? void add_to_end( //add at end & void *new_data); //dont move @@ -695,7 +704,7 @@ inline void CLIST_ITERATOR::mark_cycle_pt() { * **********************************************************************/ -inline bool CLIST_ITERATOR::at_first() { +inline BOOL8 CLIST_ITERATOR::at_first() { #ifndef NDEBUG if (!this) NULL_OBJECT.error ("CLIST_ITERATOR::at_first", ABORT, NULL); @@ -717,7 +726,7 @@ inline bool CLIST_ITERATOR::at_first() { * **********************************************************************/ -inline bool CLIST_ITERATOR::at_last() { +inline BOOL8 CLIST_ITERATOR::at_last() { #ifndef NDEBUG if (!this) NULL_OBJECT.error ("CLIST_ITERATOR::at_last", ABORT, NULL); @@ -739,7 +748,7 @@ inline bool CLIST_ITERATOR::at_last() { * **********************************************************************/ -inline bool CLIST_ITERATOR::cycled_list() { +inline BOOL8 CLIST_ITERATOR::cycled_list() { #ifndef NDEBUG if (!this) NULL_OBJECT.error ("CLIST_ITERATOR::cycled_list", ABORT, NULL); diff --git a/ccutil/debugwin.cpp b/ccutil/debugwin.cpp index 8efbff0af2..ae073f5409 100644 --- a/ccutil/debugwin.cpp +++ b/ccutil/debugwin.cpp @@ -23,12 +23,14 @@ #include "mfcpch.h" //precompiled headers #include #include "debugwin.h" +#include "params.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif +// Should remain a global parameter, since this is only used for debug editor. DLLSYM INT_VAR (debug_lines, 256, "Number of lines in debug window"); #ifndef GRAPHICS_DISABLED @@ -41,12 +43,14 @@ DLLSYM INT_VAR (debug_lines, 256, "Number of lines in debug window"); #define scrl_SCROLLER 101 #define text_FLOWED 100 +// Should remain a global variable, since this is only used for debug editor. static LCommander *pCommander = NULL; #endif //NT implementation #if defined(__MSW32__) && !defined(_CONSOLE) +#include #define ID_DEBUG_MSG 32779 /********************************************************************** @@ -407,80 +411,7 @@ DEBUG_WIN::~DEBUG_WIN ( void DEBUG_WIN::dprintf ( //debug printf const char *format, ... //special message -) { - #if 0 - LTextEdit *pTextEdit; - va_list args; //variable args - static char msg[1024]; - - inT32 i; - inT32 OriginalLength; - inT32 NewLength; - TEHandle hTextEdit; - char *pTempBuffer; - CharsHandle hChar; - char *pOriginalText; - inT32 StringLength; - - pTextEdit = (LTextEdit *) pWindow->FindPaneByID (text_FLOWED); - if (pTextEdit == NULL) - DebugStr ("\pwhoops"); - - // get a C String from the format and args passed in - - va_start(args, format); //variable list - vsprintf(msg, format, args); //Format into msg - va_end(args); - - StringLength = strlen (msg); - - // get the handle for the text - - hTextEdit = pTextEdit->GetMacTEH (); - if (hTextEdit == NULL) - DebugStr ("\pDEBUG_WIN,WriteCharsToConsole()"); - - // get a pointer to the characters and the length of the character stream - - hChar = TEGetText (hTextEdit); - if (hChar == NULL) - DebugStr ("\pDEBUG_WIN,WriteCharsToConsole()"); - - pOriginalText = *hChar; // get pointer to existing text - - // get the length of the original data - OriginalLength = (*hTextEdit)->teLength; - - // setup a temporary buffer for the new text - - NewLength = OriginalLength + StringLength; - - pTempBuffer = NewPtr (NewLength); - if (pTempBuffer == NULL) - DebugStr ("\pDEBUG_WIN,WriteCharsToConsole()"); - - // copy the original data into the new buffer - - for (i = 0; i < OriginalLength; i++) - pTempBuffer[i] = pOriginalText[i]; - - // append the new data onto the end of the original buffer - - for (i = 0; i < StringLength; i++) { - if (msg[i] == '\n') - pTempBuffer[i + OriginalLength] = '\r'; - else - pTempBuffer[i + OriginalLength] = msg[i]; - } - - // put the new text into the text edit item - - TESetText(pTempBuffer, NewLength, hTextEdit); - - // clean up - - DisposePtr(pTempBuffer); - #endif +) { } #endif //Mac Implmentation @@ -504,4 +435,3 @@ void await_destruction() { #endif - diff --git a/ccutil/debugwin.h b/ccutil/debugwin.h index 58e74c660f..6e3bad6929 100644 --- a/ccutil/debugwin.h +++ b/ccutil/debugwin.h @@ -21,7 +21,7 @@ #define DEBUGWIN_H #include "host.h" -#include "varable.h" +#include "params.h" #ifdef __MAC__ #include @@ -35,7 +35,7 @@ #define DEBUG_WIN_XSIZE 700 //default size #define DEBUG_WIN_YSIZE 300 //default size -//number of lines in the scrollable area of the window +// Should remain a global parameter, since this is only used for debug editor. extern DLLSYM INT_VAR_H (debug_lines, 256, "Number of lines in debug window"); //the API for the debug window is simple, see below. diff --git a/ccutil/elst.cpp b/ccutil/elst.cpp index 275a8fb9d2..24e9d4ffc5 100644 --- a/ccutil/elst.cpp +++ b/ccutil/elst.cpp @@ -124,8 +124,8 @@ void ELIST::assign_to_sublist( //to this list * Return count of elements on list **********************************************************************/ -inT32 ELIST::length() { //count elements - ELIST_ITERATOR it(this); +inT32 ELIST::length() const { // count elements + ELIST_ITERATOR it(const_cast(this)); inT32 count = 0; #ifndef NDEBUG @@ -190,8 +190,14 @@ const void *, const void *)) { // Comparision function is the same as used by sort, i.e. uses double // indirection. Time is O(1) to add to beginning or end. // Time is linear to add pre-sorted items to an empty list. -void ELIST::add_sorted(int comparator(const void*, const void*), - ELIST_LINK* new_link) { +// If unique is set to true and comparator() returns 0 (an entry with the +// same information as the one contained in new_link is already in the +// list) - new_link is not added to the list and the function returns the +// pointer to the identical entry that already exists in the list +// (otherwise the function returns new_link). +ELIST_LINK *ELIST::add_sorted_and_find( + int comparator(const void*, const void*), + bool unique, ELIST_LINK* new_link) { // Check for adding at the end. if (last == NULL || comparator(&last, &new_link) < 0) { if (last == NULL) { @@ -206,14 +212,19 @@ void ELIST::add_sorted(int comparator(const void*, const void*), ELIST_ITERATOR it(this); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ELIST_LINK* link = it.data(); - if (comparator(&link, &new_link) > 0) + int compare = comparator(&link, &new_link); + if (compare > 0) { break; + } else if (unique && compare == 0) { + return link; + } } if (it.cycled_list()) it.add_to_end(new_link); else it.add_before_then_move(new_link); } + return new_link; } /*********************************************************************** diff --git a/ccutil/elst.h b/ccutil/elst.h index 41fea3089b..e07e40f643 100644 --- a/ccutil/elst.h +++ b/ccutil/elst.h @@ -137,20 +137,16 @@ class DLLSYM ELIST last = NULL; } - virtual ~ELIST() { - // Empty - } - void internal_clear ( //destroy all links //ptr to zapper functn void (*zapper) (ELIST_LINK *)); - bool empty() { //is list empty? + bool empty() const { //is list empty? return !last; } - bool singleton() { - return last ? (last == last->next) : FALSE; + bool singleton() const { + return last ? (last == last->next) : false; } void shallow_copy( //dangerous!! @@ -166,7 +162,7 @@ class DLLSYM ELIST ELIST_ITERATOR *start_it, //from list start ELIST_ITERATOR *end_it); //from list end - inT32 length(); //# elements in list + inT32 length() const; // # elements in list void sort ( //sort elements int comparator ( //comparison routine @@ -177,8 +173,20 @@ class DLLSYM ELIST // Comparision function is the same as used by sort, i.e. uses double // indirection. Time is O(1) to add to beginning or end. // Time is linear to add pre-sorted items to an empty list. - void add_sorted(int comparator(const void*, const void*), - ELIST_LINK* new_link); + // If unique is set to true and comparator() returns 0 (an entry with the + // same information as the one contained in new_link is already in the + // list) - new_link is not added to the list and the function returns the + // pointer to the identical entry that already exists in the list + // (otherwise the function returns new_link). + ELIST_LINK *add_sorted_and_find(int comparator(const void*, const void*), + bool unique, ELIST_LINK* new_link); + + // Same as above, but returns true if the new entry was inserted, false + // if the identical entry already existed in the list. + bool add_sorted(int comparator(const void*, const void*), + bool unique, ELIST_LINK* new_link) { + return (add_sorted_and_find(comparator, unique, new_link) == new_link); + } void internal_dump ( //serialise each elem FILE * f, //to this file diff --git a/ccutil/elst2.cpp b/ccutil/elst2.cpp index 11063fb4da..1820d923f2 100644 --- a/ccutil/elst2.cpp +++ b/ccutil/elst2.cpp @@ -100,8 +100,8 @@ void ELIST2::assign_to_sublist( //to this list * Return count of elements on list **********************************************************************/ -inT32 ELIST2::length() { //count elements - ELIST2_ITERATOR it(this); +inT32 ELIST2::length() const { // count elements + ELIST2_ITERATOR it(const_cast(this)); inT32 count = 0; #ifndef NDEBUG diff --git a/ccutil/elst2.h b/ccutil/elst2.h index 42daa8b398..4ee7e6d7f6 100644 --- a/ccutil/elst2.h +++ b/ccutil/elst2.h @@ -110,12 +110,12 @@ class DLLSYM ELIST2 void (*zapper) (ELIST2_LINK *)); //ptr to zapper functn - bool empty() { //is list empty? + bool empty() const { //is list empty? return !last; } - bool singleton() { - return last ? (last == last->next) : FALSE; + bool singleton() const { + return last ? (last == last->next) : false; } void shallow_copy( //dangerous!! @@ -131,7 +131,7 @@ class DLLSYM ELIST2 ELIST2_ITERATOR *start_it, //from list start ELIST2_ITERATOR *end_it); //from list end - inT32 length(); //# elements in list + inT32 length() const; // # elements in list void sort ( //sort elements int comparator ( //comparison routine @@ -179,13 +179,13 @@ class DLLSYM ELIST2_ITERATOR ELIST2_LINK *prev; //prev element ELIST2_LINK *current; //current element ELIST2_LINK *next; //next element - bool ex_current_was_last; //current extracted + BOOL8 ex_current_was_last; //current extracted //was end of list - bool ex_current_was_cycle_pt; //current extracted + BOOL8 ex_current_was_cycle_pt; //current extracted //was cycle point ELIST2_LINK *cycle_pt; //point we are cycling //the list to. - bool started_cycling; //Have we moved off + BOOL8 started_cycling; //Have we moved off //the start? ELIST2_LINK *extract_sublist( //from this current... @@ -246,7 +246,7 @@ class DLLSYM ELIST2_ITERATOR void mark_cycle_pt(); //remember current - bool empty() { //is list empty? + BOOL8 empty() { //is list empty? #ifndef NDEBUG if (!list) NO_LIST.error ("ELIST2_ITERATOR::empty", ABORT, NULL); @@ -254,15 +254,15 @@ class DLLSYM ELIST2_ITERATOR return list->empty (); } - bool current_extracted() { //current extracted? + BOOL8 current_extracted() { //current extracted? return !current; } - bool at_first(); //Current is first? + BOOL8 at_first(); //Current is first? - bool at_last(); //Current is last? + BOOL8 at_last(); //Current is last? - bool cycled_list(); //Completed a cycle? + BOOL8 cycled_list(); //Completed a cycle? void add_to_end( //add at end & ELIST2_LINK *new_link); //dont move @@ -750,7 +750,7 @@ inline void ELIST2_ITERATOR::mark_cycle_pt() { * **********************************************************************/ -inline bool ELIST2_ITERATOR::at_first() { +inline BOOL8 ELIST2_ITERATOR::at_first() { #ifndef NDEBUG if (!this) NULL_OBJECT.error ("ELIST2_ITERATOR::at_first", ABORT, NULL); @@ -772,7 +772,7 @@ inline bool ELIST2_ITERATOR::at_first() { * **********************************************************************/ -inline bool ELIST2_ITERATOR::at_last() { +inline BOOL8 ELIST2_ITERATOR::at_last() { #ifndef NDEBUG if (!this) NULL_OBJECT.error ("ELIST2_ITERATOR::at_last", ABORT, NULL); @@ -794,7 +794,7 @@ inline bool ELIST2_ITERATOR::at_last() { * **********************************************************************/ -inline bool ELIST2_ITERATOR::cycled_list() { +inline BOOL8 ELIST2_ITERATOR::cycled_list() { #ifndef NDEBUG if (!this) NULL_OBJECT.error ("ELIST2_ITERATOR::cycled_list", ABORT, NULL); diff --git a/ccutil/errcode.cpp b/ccutil/errcode.cpp index b2fd96c6e3..be27543106 100644 --- a/ccutil/errcode.cpp +++ b/ccutil/errcode.cpp @@ -92,7 +92,7 @@ const char *format, ... //special message case DBG: case TESSLOG: return; //report only - case EXIT: + case TESSEXIT: //err_exit(); case ABORT: // Create a deliberate segv as the stack trace is more useful that way. diff --git a/ccutil/errcode.h b/ccutil/errcode.h index 5d5602fd38..1eff6c2d64 100644 --- a/ccutil/errcode.h +++ b/ccutil/errcode.h @@ -24,8 +24,8 @@ /*Control parameters for error()*/ #define DBG -1 /*log without alert */ -#define TESSLOG 0 /*alert user */ -#define EXIT 1 /*exit after erro */ +#define TESSLOG 0 /*alert user */ +#define TESSEXIT 1 /*exit after erro */ #define ABORT 2 /*abort after error */ /* Explicit Error Abort codes */ diff --git a/ccutil/genericvector.h b/ccutil/genericvector.h index e567ee11cc..7ee7275bda 100644 --- a/ccutil/genericvector.h +++ b/ccutil/genericvector.h @@ -21,8 +21,9 @@ #define TESSERACT_CCUTIL_GENERICVECTOR_H_ #include +#include -#include "callback.h" +#include "tesscallback.h" #include "errcode.h" #include "helpers.h" @@ -47,6 +48,9 @@ class GenericVector { // Double the size of the internal array. void double_the_size(); + // Resizes to size and sets all values to t. + void init_to_size(int size, T t); + // Return the size used. int size() const { return size_used_; @@ -80,6 +84,10 @@ class GenericVector { int push_back(T object); void operator+=(T t); + // Push an element in the front of the array + // Note: This function is O(n) + int push_front(T object); + // Set the value at the given index void set(T t, int index); @@ -90,17 +98,24 @@ class GenericVector { // shifts the remaining elements to the left. void remove(int index); + // Truncates the array to the given size by removing the end. + // If the current size is less, the array is not expanded. + void truncate(int size) { + if (size < size_used_) + size_used_ = size; + } + // Add a callback to be called to delete the elements when the array took // their ownership. - void set_clear_callback(Callback1* cb); + void set_clear_callback(TessCallback1* cb); // Add a callback to be called to compare the elements when needed (contains, // get_id, ...) - void set_compare_callback(ResultCallback2* cb); + void set_compare_callback(TessResultCallback2* cb); // Clear the array, calling the clear callback function if any. - // All the owned Callbacks are also deleted. - // If you don't want the Callbacks to be deleted, before calling clear, set + // All the owned callbacks are also deleted. + // If you don't want the callbacks to be deleted, before calling clear, set // the callback to NULL. virtual void clear(); @@ -113,13 +128,13 @@ class GenericVector { void move(GenericVector* from); // Read/Write the array to a file. This does _NOT_ read/write the callbacks. - // The Callback given must be permanent since they will be called more than + // The callback given must be permanent since they will be called more than // once. The given callback will be deleted at the end. // If the callbacks are NULL, then the data is simply read/written using // fread (and swapping)/fwrite. // Returns false on error or if the callback returns false. - bool write(FILE* f, ResultCallback2* cb) const; - bool read(FILE* f, ResultCallback3* cb, bool swap); + bool write(FILE* f, TessResultCallback2* cb) const; + bool read(FILE* f, TessResultCallback3* cb, bool swap); // Allocates a new array of double the current_size, copies over the // information from data to the new location, deletes data and returns @@ -133,6 +148,56 @@ class GenericVector { return data_new; } + // Sorts the members of this vector using the less than comparator (cmp_lt), + // which compares the values. Useful for GenericVectors to primitive types. + // Will not work so great for pointers (unless you just want to sort some + // pointers). You need to provide a specialization to sort_cmp to use + // your type. + void sort(); + + // Sort the array into the order defined by the qsort function comparator. + // The comparator function is as defined by qsort, ie. it receives pointers + // to two Ts and returns negative if the first element is to appear earlier + // in the result and positive if it is to appear later, with 0 for equal. + void sort(int (*comparator)(const void*, const void*)) { + qsort(data_, size_used_, sizeof(*data_), comparator); + } + + // Compact the vector by deleting elements using operator!= on basic types. + // The vector must be sorted. + void compact_sorted() { + if (size_used_ == 0) + return; + + // First element is in no matter what, hence the i = 1. + int last_write = 0; + for (int i = 1; i < size_used_; ++i) { + // Finds next unique item and writes it. + if (data_[last_write] != data_[i]) + data_[++last_write] = data_[i]; + } + // last_write is the index of a valid data cell, so add 1. + size_used_ = last_write + 1; + } + + // Compact the vector by deleting elements for which delete_cb returns + // true. delete_cb is a permanent callback and will be deleted. + void compact(TessResultCallback1* delete_cb) { + int new_size = 0; + int old_index = 0; + // Until the callback returns true, the elements stay the same. + while (old_index < size_used_ && !delete_cb->Run(old_index++)) + ++new_size; + // Now just copy anything else that gets false from delete_cb. + for (; old_index < size_used_; ++old_index) { + if (!delete_cb->Run(old_index)) { + data_[new_size++] = data_[old_index]; + } + } + size_used_ = new_size; + delete delete_cb; + } + protected: // Init the object, allocating size memory. @@ -145,9 +210,9 @@ class GenericVector { inT32 size_used_; inT32 size_reserved_; T* data_; - Callback1* clear_cb_; + TessCallback1* clear_cb_; // Mutable because Run method is not const - mutable ResultCallback2* compare_cb_; + mutable TessResultCallback2* compare_cb_; }; namespace tesseract { @@ -157,6 +222,23 @@ bool cmp_eq(T const & t1, T const & t2) { return t1 == t2; } +// Used by sort() +// return < 0 if t1 < t2 +// return 0 if t1 == t2 +// return > 0 if t1 > t2 +template +int sort_cmp(const void* t1, const void* t2) { + const T* a = static_cast (t1); + const T* b = static_cast (t2); + if (*a < *b) { + return -1; + } else if (*b < *a) { + return 1; + } else { + return 0; + } +} + } // namespace tesseract // A useful vector that uses operator== to do comparisons. @@ -165,11 +247,11 @@ class GenericVectorEqEq : public GenericVector { public: GenericVectorEqEq() { GenericVector::set_compare_callback( - NewPermanentCallback(tesseract::cmp_eq)); + NewPermanentTessCallback(tesseract::cmp_eq)); } GenericVectorEqEq(int size) : GenericVector(size) { GenericVector::set_compare_callback( - NewPermanentCallback(tesseract::cmp_eq)); + NewPermanentTessCallback(tesseract::cmp_eq)); } }; @@ -192,7 +274,7 @@ GenericVector::~GenericVector() { // copied. template void GenericVector::reserve(int size) { - if (size_reserved_ > size || size <= 0) + if (size_reserved_ >= size || size <= 0) return; T* new_array = new T[size]; for (int i = 0; i < size_used_; ++i) @@ -212,6 +294,14 @@ void GenericVector::double_the_size() { } } +// Resizes to size and sets all values to t. +template +void GenericVector::init_to_size(int size, T t) { + reserve(size); + size_used_ = size; + for (int i = 0; i < size; ++i) + data_[i] = t; +} // Return the object from an index. @@ -293,6 +383,18 @@ int GenericVector::push_back(T object) { return index; } +// Add an element in the array (front) +template +int GenericVector::push_front(T object) { + if (size_used_ == size_reserved_) + double_the_size(); + for (int i = size_used_; i > 0; --i) + data_[i] = data_[i-1]; + data_[0] = object; + ++size_used_; + return 0; +} + template void GenericVector::operator+=(T t) { push_back(t); @@ -300,6 +402,7 @@ void GenericVector::operator+=(T t) { template GenericVector &GenericVector::operator+=(const GenericVector& other) { + this->reserve(size_used_ + other.size_used_); for (int i = 0; i < other.size(); ++i) { this->operator+=(other.data_[i]); } @@ -316,14 +419,14 @@ GenericVector &GenericVector::operator=(const GenericVector& other) { // Add a callback to be called to delete the elements when the array took // their ownership. template -void GenericVector::set_clear_callback(Callback1* cb) { +void GenericVector::set_clear_callback(TessCallback1* cb) { clear_cb_ = cb; } // Add a callback to be called to delete the elements when the array took // their ownership. template -void GenericVector::set_compare_callback(ResultCallback2* cb) { +void GenericVector::set_compare_callback(TessResultCallback2* cb) { compare_cb_ = cb; } @@ -360,7 +463,7 @@ void GenericVector::delete_data_pointers() { template bool GenericVector::write( - FILE* f, ResultCallback2* cb) const { + FILE* f, TessResultCallback2* cb) const { if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false; if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false; if (cb != NULL) { @@ -379,7 +482,7 @@ bool GenericVector::write( template bool GenericVector::read(FILE* f, - ResultCallback3* cb, + TessResultCallback3* cb, bool swap) { uinT32 reserved; if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false; @@ -422,4 +525,9 @@ void GenericVector::move(GenericVector* from) { from->size_reserved_ = 0; } +template +void GenericVector::sort() { + sort(&tesseract::sort_cmp); +} + #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_ diff --git a/ccutil/globaloc.cpp b/ccutil/globaloc.cpp index 6601902ada..ab6f3828fb 100644 --- a/ccutil/globaloc.cpp +++ b/ccutil/globaloc.cpp @@ -75,9 +75,9 @@ void err_exit() { } -void signal_termination_handler( //The real signal - int sig) { - tprintf ("Signal_termination_handler called with signal %d\n", sig); +void signal_termination_handler(int sig) { + const ERRCODE SIGNAL_HANDLER_ERR = "Signal_termination_handler called"; + SIGNAL_HANDLER_ERR.error("signal_termination_handler", ABORT, "Code %d", sig); switch (sig) { case SIGABRT: signal_exit (-1); //use abort code diff --git a/ccutil/helpers.h b/ccutil/helpers.h index a26c5b4aac..b28a9e5777 100644 --- a/ccutil/helpers.h +++ b/ccutil/helpers.h @@ -31,7 +31,7 @@ // Remove newline (if any) at the end of the string. inline void chomp_string(char *str) { int last_index = strlen(str) - 1; - if (str[last_index] == '\n') { + if (last_index >= 0 && str[last_index] == '\n') { str[last_index] = '\0'; } } diff --git a/ccutil/mainblk.cpp b/ccutil/mainblk.cpp index bdf2e6b39e..1d52db34e9 100644 --- a/ccutil/mainblk.cpp +++ b/ccutil/mainblk.cpp @@ -22,25 +22,16 @@ #ifdef __UNIX__ #include #include +#else +#include #endif #include #include "basedir.h" -#include "mainblk.h" #include "ccutil.h" #define VARDIR "configs/" /*variables files */ #define EXTERN -/* -EXTERN DLLSYM STRING datadir; //dir for data files - //name of image -EXTERN DLLSYM STRING imagebasename; -EXTERN BOOL_VAR (m_print_variables, FALSE, -"Print initial values of all variables"); -EXTERN STRING_VAR (m_data_sub_dir, "tessdata/", "Directory for data files"); -EXTERN INT_VAR (memgrab_size, 0, "Preallocation size for batch use");*/ - - const ERRCODE NO_PATH = "Warning:explicit path for executable will not be used for configs"; static const ERRCODE USAGE = "Usage"; @@ -57,6 +48,10 @@ void CCUtil::main_setup( /*main demo program */ const char *basename //name of image ) { imagebasename = basename; /*name of image */ + STRING dll_module_name; + #ifdef __MSW32__ + dll_module_name = tessedit_module_name; + #endif // TESSDATA_PREFIX Environment variable overrules everything. // Compiled in -DTESSDATA_PREFIX is next. @@ -71,7 +66,7 @@ void CCUtil::main_setup( /*main demo program */ #undef _STR #else if (argv0 != NULL) { - if (getpath(argv0, datadir) < 0) + if (getpath(argv0, dll_module_name, datadir) < 0) #ifdef __UNIX__ CANTOPENFILE.error("main", ABORT, "%s to get path", argv0); #else diff --git a/ccutil/mainblk.h b/ccutil/mainblk.h deleted file mode 100644 index d4a4cf6ac8..0000000000 --- a/ccutil/mainblk.h +++ /dev/null @@ -1,40 +0,0 @@ -/********************************************************************** - * File: mainblk.h (Formerly main.h) - * Description: Function to call from main() to setup. - * Author: Ray Smith - * Created: Tue Oct 22 11:09:40 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef MAINBLK_H -#define MAINBLK_H - -#include "varable.h" -#include "notdll.h" - -extern DLLSYM STRING datadir; //dir for data files - //name of image -extern DLLSYM STRING imagebasename; -extern BOOL_VAR_H(m_print_variables, FALSE, - "Print initial values of all variables"); -extern STRING_VAR_H(m_data_sub_dir, "data/", "Directory for data files"); -extern INT_VAR_H(memgrab_size, 13000000, "Preallocation size for batch use"); -// > ccutil.h -//void main_setup( /*main demo program */ -// const char *argv0, //program name -// const char *basename, //name of image -// int argc, /*argument count */ -// const char *const *argv /*arguments */ -// ); -#endif diff --git a/ccutil/memblk.h b/ccutil/memblk.h index 24f3f6b44b..f94173ada0 100644 --- a/ccutil/memblk.h +++ b/ccutil/memblk.h @@ -20,7 +20,7 @@ #ifndef MEMBLK_H #define MEMBLK_H -#include "varable.h" +#include "params.h" #define MAXBLOCKS 16 /*max allowed to grab */ #define MAX_STRUCTS 20 //no of units maintained diff --git a/ccutil/nwmain.h b/ccutil/nwmain.h index fe685ff19d..409cd6e008 100644 --- a/ccutil/nwmain.h +++ b/ccutil/nwmain.h @@ -21,7 +21,7 @@ #define RUNMAIN_H #include "host.h" -#include "varable.h" +#include "params.h" #include "notdll.h" //must be last include #define DECLARE_MAIN(ARGC,ARGV)\ diff --git a/ccutil/ocrclass.h b/ccutil/ocrclass.h index cd70c02234..3bc8acf643 100644 --- a/ccutil/ocrclass.h +++ b/ccutil/ocrclass.h @@ -27,10 +27,13 @@ #ifndef OCRCLASS_H #define OCRCLASS_H -#include #ifdef __MSW32__ #include +#include "gettimeofday.h" +#else +#include #endif +#include #include "host.h" /*Maximum lengths of various strings*/ @@ -292,54 +295,46 @@ typedef struct /*single character */ **********************************************************************/ typedef bool (*CANCEL_FUNC)(void* cancel_this, int words); -typedef struct ETEXT_STRUCT /*output header */ +class ETEXT_DESC /*output header */ { + public: inT16 count; /*chars in this buffer(0) */ inT16 progress; /*percent complete increasing (0-100) */ inT8 more_to_come; /*true if not last */ - inT8 ocr_alive; /*ocr sets to 1, HP 0 */ + volatile inT8 ocr_alive; /*ocr sets to 1, HP 0 */ inT8 err_code; /*for errcode use */ CANCEL_FUNC cancel; /*returns true to cancel */ void* cancel_this; /*this or other data for cancel*/ - clock_t end_time; /*time to stop if not 0*/ + struct timeval end_time; /*time to stop. expected to be set only by call + to set_deadline_msecs()*/ EANYCODE_CHAR text[1]; /*character data */ -} ETEXT_DESC; /*output header */ -#ifdef __MSW32__ -/********************************************************************** - * ESHM_INFO - * This data structure is used internally to the API to hold the handles - * to the operating system tools used for interprocess communications. - * API users do not access this structure directly. - **********************************************************************/ -typedef struct /*shared mem info */ -{ - HANDLE shm_hand; /*handle to shm */ - HANDLE mutex; /*alive check */ - HANDLE ocr_sem; /*ocr semaphore */ - HANDLE hp_sem; /*hp semaphore */ - void *shm_mem; /*shared memory */ - inT32 shm_size; /*size of shm */ -} ESHM_INFO; /*shared mem info */ -#elif defined (__MAC__) -typedef struct /*shared mem info */ -{ - Boolean mutex; /*alive check */ - Boolean ocr_sem; /*ocr semaphore */ - Boolean hp_sem; /*hp semaphore */ - void *shm_mem; /*shared memory */ - inT32 shm_size; /*size of shm */ - inT16 language; + ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0), + err_code(0), cancel(NULL), cancel_this(NULL) { + end_time.tv_sec = 0; + end_time.tv_usec = 0; + } + + // Sets the end time to be deadline_msecs milliseconds from now. + void set_deadline_msecs(inT32 deadline_msecs) { + gettimeofday(&end_time, NULL); + inT32 deadline_secs = deadline_msecs / 1000; + end_time.tv_sec += deadline_secs; + end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000; + if (end_time.tv_usec > 1000000) { + end_time.tv_usec -= 1000000; + ++end_time.tv_sec; + } + } + + // Returns false if we've not passed the end_time, or have not set a deadline. + bool deadline_exceeded() const { + if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false; + struct timeval now; + gettimeofday(&now, NULL); + return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec && + now.tv_usec > end_time.tv_usec)); + } +}; - // Process management information follows: - ProcessSerialNumber IPEProcess; - ProcessSerialNumber OCRProcess; -} ESHM_INFO; -#elif defined (__UNIX__) -typedef struct /*shared mem info */ -{ - void *shm_mem; /*shared memory */ - inT32 shm_size; /*size of shm */ -} ESHM_INFO; -#endif #endif diff --git a/ccutil/ocrshell.cpp b/ccutil/ocrshell.cpp deleted file mode 100644 index 6b371d8856..0000000000 --- a/ccutil/ocrshell.cpp +++ /dev/null @@ -1,772 +0,0 @@ -/********************************************************************** - * File: ocrshell.cpp - * Description: Code for the OCR side of the OCR API. - * Author: Hewlett-Packard Co - * - * (C) Copyright 1996, Hewlett-Packard Co. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -/********************************************************************** - * This file contains code for the OCR side of the HP OCR interface. - * The code is designed to be used with either an ANSI C or C++ compiler. - * The structures are designed to allow them to be used with any - * structure alignment upto 8. - **********************************************************************/ - -#include "mfcpch.h" -#include "ocrshell.h" -#include "tprintf.h" -#include - -#define EXTERN - -#ifdef __UNIX__ -EXTERN ESHM_INFO shm; /*info on shm */ -#define TICKS 1 -#endif - -#ifdef __MSW32__ -EXTERN ESHM_INFO shm; /*info on shm */ -#define TICKS 1000 -#endif - -#ifdef __MAC__ - -#if defined(__CFM68K__) && !defined(__USING_STATIC_LIBS__) -#pragma import on -#endif - -extern volatile ESHM_INFO shm; /*info on shm */ -extern unsigned short WaitForSingleObject( /*"C" */ - volatile Boolean &semaphore, - unsigned long timeout); -extern unsigned short ReleaseSemaphore( /*"C" */ - volatile Boolean &semaphore); -#if defined(__CFM68K__) && !defined(__USING_STATIC_LIBS__) -#pragma import reset -#endif -#define WAIT_OBJECT_0 1 -#define TICKS 60 -#endif - -typedef enum { - OCS_UNINIT, /*uninitialized */ - OCS_SETUP_SHM, /*shm setup done */ - OCS_SETUP_INFO, /*startinfo sent */ - OCS_READING_STRIPS, /*read first but more to come */ - OCS_READ_STRIPS, /*read all but no monitor yet */ - OCS_RECOGNIZING, /*OCR incomplete */ - OCS_SENDING_TEXT, /*sent buffer but more to come */ - OCS_DEAD /*disconnected */ -} OCR_STATE; - -/* forward declarations - not in .h file as not needed outside this file*/ -inT16 ocr_internal_shutdown(); /*closedown */ -inT16 wait_for_mutex(); /*wait for HP to be ready */ -inT16 wait_for_hp( /*wait for semaphore */ - inT32 timeout /*in seconds */ - ); -inT16 release_mutex(); /*release mutex */ -inT16 release_ocr(); /*release semaphore */ - -static inT32 font_count = 0; /*number of fonts */ -static inT16 lines_read = 0; /*no read in this image */ - /*current state */ -static OCR_STATE ocr_state = OCS_UNINIT; - -#ifdef __MAC__ -pascal short TerminateOCR(AppleEvent *theEvent, - AppleEvent *theReply, - long refCon) { - ocr_internal_shutdown(); - ExitToShell(); - -} -#endif - -/********************************************************************** - * ocr_open_shm - * - * Attempt to connect to the shared memory segment and semaphores used - * in talking to the OCR engine. Called from OCR engine. - * The parameters are the command line arguments in order. - **********************************************************************/ -#ifdef __MAC__ -inT16 -ocr_open_shm (uinT16 * lang) -#else -inT16 -ocr_open_shm ( /*open the shm */ -const char *shm_h, /*handle of shm */ -const char *shm_size, /*size of shm segment */ -const char *mutex_h, /*hp mutex */ -const char *ocr_h, /*ocr semaphore */ -const char *hp_h, /*hp semaphore */ -const char *lang_str, /*language */ -uinT16 * lang /*required language */ -) -#endif -{ - font_count = 0; /*no fonts yet */ - #ifdef __MAC__ - if (shm.OCRProcess.lowLongOfPSN && shm.OCRProcess.highLongOfPSN) - return HPERR; - *lang = shm.language; - GetCurrentProcess (&shm.OCRProcess); - if (WakeUpProcess (&shm.IPEProcess)) - ExitToShell(); - AEInstallEventHandler (kCoreEventClass, kAEQuitApplication, - (AEEventHandlerUPP) TerminateOCR, 0, FALSE); - #else - if (lang != NULL) - /*get language */ - *lang = (uinT16) strtol (lang_str, NULL, 10); - #endif - if (ocr_state != OCS_UNINIT) { - ocr_error(OCR_ERR_BAD_STATE); - return OCR_API_BAD_STATE; /*incorrect state */ - } - #ifdef __MSW32__ - shm.shm_size = strtol (shm_size, NULL, 10); - /*convert to handle */ - shm.shm_hand = (HANDLE) strtol (shm_h, NULL, 10); - shm.shm_mem = MapViewOfFile (shm.shm_hand, FILE_MAP_WRITE, 0, 0, 0); - if (shm.shm_mem == NULL) - return HPERR; /*failed */ - /*convert to handle */ - shm.mutex = (HANDLE) strtol (mutex_h, NULL, 10); - /*convert to handle */ - shm.ocr_sem = (HANDLE) strtol (ocr_h, NULL, 10); - /*convert to handle */ - shm.hp_sem = (HANDLE) strtol (hp_h, NULL, 10); - #endif - - ocr_state = OCS_SETUP_SHM; /*record state */ - return OKAY; - -} - - -/********************************************************************** - * ocr_error - * - * Inform the HP side of an error. - * The OCR engine should do any cleanup of its own and exit aferwards. - * Uses the current state to determine how to send it and cleanup. - **********************************************************************/ - -void ocr_error( /*send an error code */ - OCR_ERR_CODE code /*error code */ - ) { - ESTRIP_DESC *strip = (ESTRIP_DESC *) shm.shm_mem; - /*strip info */ - ETEXT_DESC *monitor = (ETEXT_DESC *) shm.shm_mem; - /*progress monitor */ - - switch (ocr_state) { - case OCS_UNINIT: /*uninitialized */ - case OCS_DEAD: /*uninitialized */ - return; /*can't do anything else */ - case OCS_SETUP_SHM: /*shm setup done */ - if (font_count < 1) - font_count = 1; - ocr_setup_startinfo_ansi (-code, LANGE_NONE, "", ""); - /*report error */ - break; - case OCS_SETUP_INFO: /*startinfo sent */ - if (ocr_get_first_image_strip () == NULL) - break; /*disconnected */ - case OCS_READING_STRIPS: /*read first but more to come */ - strip->x_size = -code; /*report error */ - release_ocr(); /*send ack */ - release_mutex(); - break; - case OCS_READ_STRIPS: /*read all but no monitor yet */ - monitor->count = 0; /*chars in this buffer(-1) */ - monitor->progress = 0; /*percent complete increasing (0-100) */ - /*text not complete */ - monitor->more_to_come = FALSE; - monitor->ocr_alive = TRUE; /*ocr sets to 1, hp 0 */ - monitor->err_code = -code; /*report error */ - monitor->cancel = FALSE; /*0=continue, 1=cancel */ - release_ocr(); /*send ack */ - break; - case OCS_RECOGNIZING: /*OCR incomplete */ - case OCS_SENDING_TEXT: /*sent buffer but more to come */ - monitor->err_code = -code; /*report error */ - release_ocr(); /*send ack */ - } - ocr_internal_shutdown(); /*get ready for exit */ -} - - -/********************************************************************** - * ocr_append_fontinfo - * - * Initialize one of the font descriptors. - **********************************************************************/ - -inT16 ocr_append_fontinfo( /*put info into shm */ - uinT16 language, /*default language */ - uinT8 font_family, /*serif/not, fixed/not */ - uinT8 char_set, /*character set standard */ - uinT8 pitch, /*fixed or prop */ - const char *name /*plain ascii name */ - ) { - EOCR_DESC *desc; /*ocr engine info */ - int index; /*char index */ - inT32 font_index; /*which font */ - - if (ocr_state != OCS_SETUP_SHM) { - ocr_error(OCR_ERR_BAD_STATE); - return OCR_API_BAD_STATE; /*incorrect state */ - } - - /*turn to right type */ - desc = (EOCR_DESC *) shm.shm_mem; - if (font_count > - (inT32) ((shm.shm_size - sizeof (EOCR_DESC)) / sizeof (EFONT_DESC))) - return OCR_API_NO_MEM; /*insufficient space */ - font_index = font_count++; /*add a font */ - /*setup structure */ - desc->fonts[font_index].language = language; - /*setup structure */ - desc->fonts[font_index].font_family = font_family; - /*setup structure */ - desc->fonts[font_index].char_set = char_set; - /*setup structure */ - desc->fonts[font_index].pitch = pitch; - if (name != NULL) { - for (index = 0; index < MAX_FONT_NAME && name[index] != 0; index++) - desc->fonts[font_index].name[index] = name[index]; - } - else - index = 0; - desc->fonts[font_index].name[index] = 0; - return OKAY; -} - - -/********************************************************************** - * ocr_setup_startinfo - * - * Setup the info on the OCR engine. Uses 16 bit chars to name the - * engine. - **********************************************************************/ - -inT16 ocr_setup_startinfo( /*put info into shm */ - inT32 protocol, /*interface version */ - uinT16 language, /*default language */ - const uinT16 *name, /*name of engine */ - const uinT16 *version /*version of engine */ - ) { - EOCR_DESC *desc; /*ocr engine info */ - int index; /*char index */ - inT16 result; /*from open */ - - if (ocr_state != OCS_SETUP_SHM || font_count < 1) { - ocr_error(OCR_ERR_BAD_STATE); - return OCR_API_BAD_STATE; /*incorrect state */ - } - - /*turn to right type */ - desc = (EOCR_DESC *) shm.shm_mem; - desc->protocol = protocol; /*setup structure */ - desc->font_count = font_count; - desc->language = language; - for (index = 0; index < MAX_OCR_NAME && name[index] != 0; index++) - desc->name[index] = name[index]; - desc->name[index] = 0; - for (index = 0; index < MAX_OCR_VERSION && version[index] != 0; index++) - desc->version[index] = version[index]; - desc->version[index] = 0; - - result = release_ocr (); - if (result != OKAY) - return result; - ocr_state = OCS_SETUP_INFO; /*record state */ - return OKAY; -} - - -/********************************************************************** - * ocr_setup_startinfo_ansi - * - * Setup the info on the OCR engine. Uses 8 bit chars to name the - * engine. - **********************************************************************/ - -inT16 ocr_setup_startinfo_ansi( /*put info into shm */ - uinT32 protocol, /*interface version */ - uinT16 language, /*default language */ - const char *name, /*name of engine */ - const char *version /*version of engine */ - ) { - EOCR_DESC *desc; /*ocr engine info */ - int index; /*char index */ - inT16 result; /*from open */ - - if (ocr_state != OCS_SETUP_SHM || font_count < 1) { - ocr_error(OCR_ERR_BAD_STATE); - return OCR_API_BAD_STATE; /*incorrect state */ - } - - /*turn to right type */ - desc = (EOCR_DESC *) shm.shm_mem; - desc->protocol = protocol; /*setup structure */ - desc->font_count = font_count; - desc->language = language; - for (index = 0; index < MAX_OCR_NAME && name[index] != 0; index++) - desc->name[index] = name[index]; - desc->name[index] = 0; - for (index = 0; index < MAX_OCR_VERSION && version[index] != 0; index++) - desc->version[index] = version[index]; - desc->version[index] = 0; - - result = release_ocr (); - if (result != OKAY) - return result; - ocr_state = OCS_SETUP_INFO; /*record state */ - return OKAY; -} - - -/********************************************************************** - * ocr_get_first_image_strip - * - * Wait for the master to send the first image strip and return a - * pointer to it. The result is NULL if it is time to exit. - **********************************************************************/ - -ESTRIP_DESC *ocr_get_first_image_strip() { /*get image strip */ - ESTRIP_DESC *strip; /*strip info */ - inT16 result; /*of wait/release */ - - if (ocr_state != OCS_SETUP_INFO) { - tprintf ("Bad state reading strip"); - ocr_error(OCR_ERR_BAD_STATE); - return NULL; /*incorrect state */ - } - - /*strip info */ - strip = (ESTRIP_DESC *) shm.shm_mem; - lines_read = 0; - - result = wait_for_mutex (); - if (result != OKAY) { - tprintf ("Mutax wait failed reading strip"); - return NULL; /*HP dead */ - } - result = release_mutex (); - if (result != OKAY) { - tprintf ("Mutax release failed reading strip"); - return NULL; /*HP dead */ - } - result = wait_for_hp (READIM_TIMEOUT); - if (result != OKAY) { - tprintf ("Wait for HP failed reading strip"); - return NULL; /*HP dead */ - } - lines_read = strip->strip_size;/*lines read so far */ - if (lines_read < strip->y_size) - /*record state */ - ocr_state = OCS_READING_STRIPS; - else - ocr_state = OCS_READ_STRIPS; - if (strip->x_size == 0 || strip->y_size == 0) - return NULL; /*end of job */ - - return strip; -} - - -/********************************************************************** - * ocr_get_next_image_strip - * - * Wait for the master to send the next image strip and return a - * pointer to it. The result is NULL if it is time to exit. - **********************************************************************/ - -ESTRIP_DESC *ocr_get_next_image_strip() { /*get image strip */ - ESTRIP_DESC *strip; /*strip info */ - inT16 result; /*of wait/release */ - - if (ocr_state != OCS_READING_STRIPS) { - ocr_error(OCR_ERR_BAD_STATE); - return NULL; /*incorrect state */ - } - - /*strip info */ - strip = (ESTRIP_DESC *) shm.shm_mem; - result = release_ocr (); - if (result != OKAY) - return NULL; /*HP dead */ - result = wait_for_hp (READIM_TIMEOUT); - if (result != OKAY) - return NULL; /*HP dead */ - /*lines read so far */ - lines_read += strip->strip_size; - if (lines_read < strip->y_size) - /*record state */ - ocr_state = OCS_READING_STRIPS; - else - ocr_state = OCS_READ_STRIPS; - - return strip; -} - - -/********************************************************************** - * ocr_setup_monitor - * - * Setup the progress monitor. Call before starting the recognize task. - **********************************************************************/ - -ETEXT_DESC *ocr_setup_monitor() { /*setup monitor */ - ETEXT_DESC *monitor; /*progress monitor */ - - /*text info */ - monitor = (ETEXT_DESC *) shm.shm_mem; - monitor->count = 0; /*chars in this buffer(-1) */ - monitor->progress = 0; /*percent complete increasing (0-100) */ - monitor->more_to_come = TRUE; /*text not complete */ - monitor->ocr_alive = TRUE; /*ocr sets to 1, hp 0 */ - monitor->err_code = 0; /*used by ocr_error */ - monitor->cancel = FALSE; /*0=continue, 1=cancel */ - - -//by jetsoft -//the sem functions are old and were meant for an hp product - // if (release_ocr () != OKAY) - // return NULL; /*release failed */ - - ocr_state = OCS_RECOGNIZING; /*record state */ - return monitor; -} - - -/********************************************************************** - * ocr_char_space - * - * Return the number of chars that can be fitted into the buffer. - **********************************************************************/ - -inT32 ocr_char_space() { /*put char into shm */ - ETEXT_DESC *buf; /*text buffer */ - int result; - - /*progress info */ - buf = (ETEXT_DESC *) shm.shm_mem; - if (buf == NULL) - return 0; - - result = - (shm.shm_size - sizeof (ETEXT_DESC)) / sizeof (EANYCODE_CHAR) - - buf->count + 1; - - // while (buf->hp_alive==-1) - // Sleep(50); /*wait for HP*/ - - return result; -} - - -/********************************************************************** - * ocr_append_char - * - * Add a character to the output. Returns OKAY if successful, OCR_API_NO_MEM - * if there was insufficient room in the buffer. - **********************************************************************/ - -inT16 ocr_append_char( /*put char into shm */ - uinT16 char_code, /*character itself */ - inT16 left, /*of char (-1) */ - inT16 right, /*of char (-1) */ - inT16 top, /*of char (-1) */ - inT16 bottom, /*of char (-1) */ - inT16 font_index, /*what font (-1) */ - uinT8 confidence, /*0=perfect, 100=reject (0/100) */ - uinT8 point_size, /*of char, 72=i inch, (10) */ - inT8 blanks, /*no of spaces before this char (1) */ - uinT8 enhancement, /*char enhancement (0) */ - OCR_CHAR_DIRECTION text_dir, /*rendering direction (OCR_CDIR_RIGHT_LEFT) */ - OCR_LINE_DIRECTION line_dir, /*line rendering direction (OCR_LDIR_DOWN_RIGHT) */ - OCR_NEWLINE_TYPE nl_type /*type of newline (if any) (OCR_NL_NONE) */ - ) { - ETEXT_DESC *buf; /*text buffer */ - int index; /*char index */ - inT16 result; /*of callback */ - - if (ocr_state != OCS_RECOGNIZING && ocr_state != OCS_SENDING_TEXT) { - ocr_error(OCR_ERR_BAD_STATE); - return OCR_API_BAD_STATE; /*incorrect state */ - } - - if (char_code == ' ' || char_code == '\n' || char_code == '\r' - || char_code == '\t') - return OCR_API_BAD_CHAR; /*illegal char */ - - /*progress info */ - buf = (ETEXT_DESC *) shm.shm_mem; - - result = - (shm.shm_size - sizeof (ETEXT_DESC)) / sizeof (EANYCODE_CHAR) - - buf->count; - if (result < 1) - return OCR_API_NO_MEM; /*insufficient room */ - - index = buf->count++; /*count of chars */ - /*setup structure */ - buf->text[index].char_code = char_code; - buf->text[index].left = left; /*setup structure */ - buf->text[index].right = right;/*setup structure */ - buf->text[index].top = top; /*setup structure */ - /*setup structure */ - buf->text[index].bottom = bottom; - /*setup structure */ - buf->text[index].font_index = font_index; - /*setup structure */ - buf->text[index].confidence = confidence; - /*setup structure */ - buf->text[index].point_size = point_size; - /*setup structure */ - buf->text[index].blanks = blanks; - if (nl_type == OCR_NL_NONE) { - if (text_dir == OCR_CDIR_TOP_BOTTOM || text_dir == OCR_CDIR_BOTTOM_TOP) - buf->text[index].formatting = (text_dir << 5) | 128; - /*setup structure */ - else - /*setup structure */ - buf->text[index].formatting = text_dir << 5; - } - else { - buf->text[index].formatting = (nl_type << 6) | (line_dir << 5); - /*setup structure */ - } - buf->text[index].formatting |= enhancement & (~EUC_FORMAT_MASK); - return OKAY; -} - - -/********************************************************************** - * ocr_send_text - * - * Send the text to the host and wait for the ack. - * Use this function after a sequence of ocr_append_char calls to - * actually sent the text to the master process. - * Set more to come TRUE if there is more text in this page, FALSE - * if the OCR engine is now ready to receive another image. - **********************************************************************/ - -inT16 ocr_send_text( /*send shm */ - BOOL8 more_to_come /*any text left */ - ) { - ETEXT_DESC *buf; /*text buffer */ - - if (ocr_state != OCS_RECOGNIZING && ocr_state != OCS_SENDING_TEXT) { - ocr_error(OCR_ERR_BAD_STATE); - return OCR_API_BAD_STATE; /*incorrect state */ - } - - /*progress info */ - buf = (ETEXT_DESC *) shm.shm_mem; - - /*setup structure */ - buf->more_to_come = more_to_come; - if (more_to_come) { - if ((buf->text[buf->count - 1].formatting >> 6) != OCR_NL_NEWLINE - && (buf->text[buf->count - 1].formatting >> 6) != OCR_NL_NEWPARA) { - /*force line end */ - buf->text[buf->count - 1].formatting &= 63; - buf->text[buf->count - 1].formatting |= OCR_NL_NEWLINE << 6; - } - } - else { - if (buf->count < 1) - ocr_append_char ('~', -1, -1, -1, -1, 0, 100, 10, 0, - 0, OCR_CDIR_RIGHT_LEFT, OCR_LDIR_DOWN_RIGHT, - OCR_NL_NEWPARA); - /*dummy character */ - else if ((buf->text[buf->count - 1].formatting >> 6) != OCR_NL_NEWPARA) { - /*force para end */ - buf->text[buf->count - 1].formatting &= 63; - buf->text[buf->count - 1].formatting |= OCR_NL_NEWPARA << 6; - } - } - - if (release_ocr () != OKAY) - return HPERR; /*release failed */ - if (wait_for_hp (READTEXT_TIMEOUT) != OKAY) - return HPERR; - if (more_to_come) { - buf->count = 0; /*setup structure */ - ocr_state = OCS_SENDING_TEXT;/*record state */ - } - else - ocr_state = OCS_SETUP_INFO; /*record state */ - return OKAY; -} - - -/********************************************************************** - * ocr_shutdown - * - * Closedown communications with the HP side and free up handles. - **********************************************************************/ - -inT16 ocr_shutdown() { /*closedown */ - #ifdef __MAC__ - shm.OCRProcess.lowLongOfPSN = kNoProcess; - shm.OCRProcess.highLongOfPSN = 0; - #endif - ocr_error(OCR_ERR_CLEAN_EXIT); /*signal exit */ - - return OKAY; -} - - -/********************************************************************** - * ocr_internal_shutdown - * - * Free up handles or whatever to clean up without attempting to communicate. - **********************************************************************/ - -inT16 ocr_internal_shutdown() { /*closedown */ - ocr_state = OCS_DEAD; /*record state */ - #ifdef __MSW32__ - if (shm.shm_mem != NULL) { - UnmapViewOfFile (shm.shm_mem); - CloseHandle (shm.shm_hand); /*no longer used */ - CloseHandle (shm.mutex); /*release handles */ - CloseHandle (shm.ocr_sem); - CloseHandle (shm.hp_sem); - shm.shm_mem = NULL; - } - #elif defined (__MAC__) - shm.OCRProcess.lowLongOfPSN = kNoProcess; - shm.OCRProcess.highLongOfPSN = 0; - #endif - return OKAY; -} - - -/********************************************************************** - * wait_for_mutex - * - * Wait for the HP side to release its mutex. - * The return value is HPERR if the HP side has terminated. - **********************************************************************/ - -inT16 wait_for_mutex() { /*wait for HP to be ready */ - inT16 result = HPERR; /*return code */ - #if defined (__MSW32__) || defined (__MAC__) - result = WaitForSingleObject (shm.mutex, (unsigned long) -1) - /*wait for thread to move */ - /*bad if timeout */ - == WAIT_OBJECT_0 ? OKAY : HPERR; - #endif - if (result != OKAY) - ocr_internal_shutdown(); - return result; -} - - -/********************************************************************** - * wait_for_hp - * - * Wait for the HP side to release its semaphore. - * The return value is HPERR if the timeout (in seconds) elapsed. - **********************************************************************/ - -inT16 wait_for_hp( /*wait for semaphore */ - inT32 timeout /*in seconds */ - ) { - inT16 result = HPERR; /*return code */ - #if defined (__MSW32__) || defined (__MAC__) - /*wait for thread to move */ - result = WaitForSingleObject (shm.hp_sem, timeout * TICKS) - /*bad if timeout */ - == WAIT_OBJECT_0 ? OKAY : HPERR; - #endif - if (result != OKAY) - ocr_internal_shutdown(); - return result; -} - - -/********************************************************************** - * release_mutex - * - * Release the HP mutex. - * The return value is OKAY if the call succeeds. - **********************************************************************/ - -inT16 release_mutex() { /*release mutex */ - inT16 result = HPERR; /*return code */ - #ifdef __MSW32__ - /*release it */ - result = ReleaseMutex (shm.mutex) ? OKAY : HPERR; - #elif defined (__MAC__) - /*release it */ - result = ReleaseSemaphore (shm.mutex) ? OKAY : HPERR; - #endif - if (result != OKAY) - ocr_internal_shutdown(); - return result; -} - - -/********************************************************************** - * release_ocr - * - * Release the OCR semaphore. - * The return value is OKAY if the call succeeds. - **********************************************************************/ - -inT16 release_ocr() { /*release semaphore */ - inT32 timeout; //time allowed - - timeout = RELEASE_TIMEOUT * TICKS; - #ifdef __MSW32__ - -//jetsoft -// this stuff is old and no longer applies - - return OKAY; -// - - BOOL result = 0; //of release - do { - //release it - result = ReleaseSemaphore (shm.ocr_sem, 1, NULL); - if (result == FALSE) { - timeout -= 50; - Sleep (50); - } - } - while (result == FALSE && timeout > 0); - if (!result) - ocr_internal_shutdown(); - return OKAY; - #elif defined (__MAC__) - inT16 result = HPERR; /*return code */ - /*release it */ - result = ReleaseSemaphore (shm.ocr_sem) ? OKAY : HPERR; - - if (result != OKAY) - ocr_internal_shutdown(); - return result; - #elif defined (__UNIX__) - return 0; - #endif -} diff --git a/ccutil/ocrshell.h b/ccutil/ocrshell.h deleted file mode 100644 index 771c3e5097..0000000000 --- a/ccutil/ocrshell.h +++ /dev/null @@ -1,191 +0,0 @@ -/********************************************************************** - * File: ocrshell.h - * Description: Code for the OCR side of the OCR API. - * Author: Hewlett-Packard Co - * - * (C) Copyright 1996, Hewlett-Packard Co. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef OCRSHELL_H -#define OCRSHELL_H - -/********************************************************************** - * This file contains code for the OCR side of the HP OCR interface. - * The code is designed to be used with either an ANSI C or C++ compiler. - * The structures are designed to allow them to be used with any - * structure alignment upto 8. - **********************************************************************/ - -#include "ocrclass.h" - -#define EUC_FORMAT_MASK 0xe0 - -/********************************************************************** - * ocr_open_shm - * - * Attempt to connect to the shared memory segment and semaphores used - * in talking to the OCR engine. Called from OCR engine. - * The parameters are the command line arguments in order. - * The final parameter is a return value indicating the user-requested - * language. The value will be LANGE_NONE if the user wishes to use - * the default. - **********************************************************************/ -#ifdef __MAC__ -inT16 ocr_open_shm(uinT16 *lang); -#else -inT16 ocr_open_shm( /*open the shm */ - const char *shm_h, /*handle of shm */ - const char *shm_size, /*size of shm segment */ - const char *mutex_h, /*hp mutex */ - const char *ocr_h, /*ocr semaphore */ - const char *hp_h, /*hp semaphore */ - const char *lang_str, /*language */ - uinT16 *lang /*required language */ - ); -#endif - -/********************************************************************** - * ocr_append_fontinfo - * - * Initialize one of the font descriptors. - **********************************************************************/ - -inT16 ocr_append_fontinfo( /*put info into shm */ - uinT16 language, /*default language */ - uinT8 font_family, /*serif/not, fixed/not */ - uinT8 char_set, /*character set standard */ - uinT8 pitch, /*fixed or prop */ - const char *name /*plain ascii name */ - ); - -/********************************************************************** - * ocr_setup_startinfo - * - * Setup the info on the OCR engine. Uses 16 bit chars to name the - * engine. - **********************************************************************/ - -inT16 ocr_setup_startinfo( /*put info into shm */ - uinT32 protocol, /*interface version */ - uinT16 language, /*default language */ - const uinT16 *name, /*name of engine */ - const uinT16 *version /*version of engine */ - ); - -/********************************************************************** - * ocr_setup_startinfo_ansi - * - * Setup the info on the OCR engine. Uses 8 bit chars to name the - * engine. - **********************************************************************/ - -inT16 ocr_setup_startinfo_ansi( /*put info into shm */ - uinT32 protocol, /*interface version */ - uinT16 language, /*default language */ - const char *name, /*name of engine */ - const char *version /*version of engine */ - ); - -/********************************************************************** - * ocr_get_first_image_strip - * - * Wait for the master to send the first image strip and return a - * pointer to it. The result is NULL if it is time to exit. - **********************************************************************/ - - /*get image strip */ -ESTRIP_DESC *ocr_get_first_image_strip(); - -/********************************************************************** - * ocr_get_next_image_strip - * - * Wait for the master to send the next image strip and return a - * pointer to it. The result is NULL if it is time to exit. - **********************************************************************/ - - /*get image strip */ -ESTRIP_DESC *ocr_get_next_image_strip(); - -/********************************************************************** - * ocr_setup_monitor - * - * Setup the progress monitor. Call before starting the recognize task. - **********************************************************************/ - -ETEXT_DESC *ocr_setup_monitor(); /*setup monitor */ - -/********************************************************************** - * ocr_char_space - * - * Return the number of chars that can be fitted into the buffer. - **********************************************************************/ - -inT32 ocr_char_space(); /*put char into shm */ - -/********************************************************************** - * ocr_append_char - * - * Add a character to the output. Returns OKAY if successful, HPERR - * if there was insufficient room in the buffer. - **********************************************************************/ - -inT16 ocr_append_char( /*put char into shm */ - uinT16 char_code, /*character itself */ - inT16 left, /*of char (-1) */ - inT16 right, /*of char (-1) */ - inT16 top, /*of char (-1) */ - inT16 bottom, /*of char (-1) */ - inT16 font_index, /*what font (-1) */ - uinT8 confidence, /*0=perfect, 100=reject (0/100) */ - uinT8 point_size, /*of char, 72=i inch, (10) */ - inT8 blanks, /*no of spaces before this char (1) */ - uinT8 enhancement, /*char enhancement (0) */ - OCR_CHAR_DIRECTION text_dir, /*rendering direction (OCR_CDIR_RIGHT_LEFT) */ - OCR_LINE_DIRECTION line_dir, /*line rendering direction (OCR_LDIR_DOWN_RIGHT) */ - OCR_NEWLINE_TYPE nl_type /*type of newline (if any) (OCR_NL_NONE) */ - ); - -/********************************************************************** - * ocr_send_text - * - * Send the text to the host and wait for the ack. - * Use this function after a sequence of ocr_append_text calls to - * actually sent the text to the master process. - * Set more to come TRUE if there is more text in this page, FALSE - * if the OCR engine is now ready to receive another image. - **********************************************************************/ - -inT16 ocr_send_text( /*send shm */ - BOOL8 more_to_come /*any text left */ - ); - -/********************************************************************** - * ocr_shutdown - * - * Closedown communications with the HP side and free up handles. - **********************************************************************/ - -inT16 ocr_shutdown(); /*closedown */ - -/********************************************************************** - * ocr_error - * - * Inform the HP side of an error. - * The OCR engine should do any cleanup of its own and exit aferwards. - * Uses the current state to determine how to send it and cleanup. - **********************************************************************/ - -void ocr_error( /*send an error code */ - OCR_ERR_CODE code /*error code */ - ); -#endif diff --git a/ccutil/params.cpp b/ccutil/params.cpp new file mode 100644 index 0000000000..71e2e1ab9e --- /dev/null +++ b/ccutil/params.cpp @@ -0,0 +1,171 @@ +/********************************************************************** + * File: params.cpp + * Description: Initialization and setting of Tesseract parameters. + * Author: Ray Smith + * Created: Fri Feb 22 16:22:34 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" //precompiled headers + +#include +#include +#include + +#include "genericvector.h" +#include "scanutils.h" +#include "tprintf.h" +#include "params.h" + +#define PLUS '+' //flag states +#define MINUS '-' +#define EQUAL '=' + +tesseract::ParamsVectors *GlobalParams() { + static tesseract::ParamsVectors *global_params = + new tesseract::ParamsVectors(); + return global_params; +} + +namespace tesseract { + +bool ParamUtils::ReadParamsFile(const char *file, bool init_only, + ParamsVectors *member_params) { + char flag; // file flag + inT16 nameoffset; // offset for real name + FILE *fp; // file pointer + // iterators + + if (*file == PLUS) { + flag = PLUS; // file has flag + nameoffset = 1; + } else if (*file == MINUS) { + flag = MINUS; + nameoffset = 1; + } else { + flag = EQUAL; + nameoffset = 0; + } + + fp = fopen(file + nameoffset, "r"); + if (fp == NULL) { + tprintf("read_params_file: Can't open %s\n", file + nameoffset); + return true; + } + return ReadParamsFromFp(fp, -1, init_only, member_params); + fclose(fp); +} + +bool ParamUtils::ReadParamsFromFp(FILE *fp, inT64 end_offset, bool init_only, + ParamsVectors *member_params) { + char line[MAX_PATH]; // input line + bool anyerr = false; // true if any error + bool foundit; // found parameter + inT16 length; // length of line + char *valptr; // value field + + while ((end_offset < 0 || ftell(fp) < end_offset) && + fgets(line, MAX_PATH, fp)) { + if (line[0] != '\n' && line[0] != '#') { + length = strlen (line); + if (line[length - 1] == '\n') + line[length - 1] = '\0'; // cut newline + for (valptr = line; *valptr && *valptr != ' ' && *valptr != '\t'; + valptr++); + if (*valptr) { // found blank + *valptr = '\0'; // make name a string + do + valptr++; // find end of blanks + while (*valptr == ' ' || *valptr == '\t'); + } + foundit = SetParam(line, valptr, init_only, member_params); + + if (!foundit) { + anyerr = true; // had an error + tprintf("read_params_file: parameter not found: %s\n", line); + exit(1); + } + } + } + return anyerr; +} + +bool ParamUtils::SetParam(const char *name, const char* value, + bool init_only, ParamsVectors *member_params) { + // Look for the parameter among string parameters. + StringParam *sp = FindParam(name, GlobalParams()->string_params, + member_params->string_params); + if (sp != NULL && (!init_only || sp->is_init())) sp->set_value(value); + if (*value == '\0') return (sp != NULL); + + // Look for the parameter among int parameters. + int intval; + IntParam *ip = FindParam(name, GlobalParams()->int_params, + member_params->int_params); + if (ip && (!init_only || ip->is_init()) && + sscanf(value, INT32FORMAT, &intval) == 1) ip->set_value(intval); + + // Look for the parameter among bool parameters. + BoolParam *bp = FindParam(name, GlobalParams()->bool_params, + member_params->bool_params); + if (bp != NULL && (!init_only || bp->is_init())) { + if (*value == 'T' || *value == 't' || + *value == 'Y' || *value == 'y' || *value == '1') { + bp->set_value(true); + } else if (*value == 'F' || *value == 'f' || + *value == 'N' || *value == 'n' || *value == '0') { + bp->set_value(false); + } + } + + // Look for the parameter among double parameters. + double doubleval; + DoubleParam *dp = FindParam(name, GlobalParams()->double_params, + member_params->double_params); + if (dp != NULL && (!init_only || dp->is_init())) { +#ifdef EMBEDDED + doubleval = strtofloat(value); +#else + if (sscanf(value, "%lf", &doubleval) == 1) +#endif + dp->set_value(doubleval); + } + return (sp || ip || bp || dp); +} + +void ParamUtils::PrintParams(FILE *fp, const ParamsVectors *member_params) { + int v, i; + int num_iterations = (member_params == NULL) ? 1 : 2; + for (v = 0; v < num_iterations; ++v) { + const ParamsVectors *vec = (v == 0) ? GlobalParams() : member_params; + for (i = 0; i < vec->int_params.size(); ++i) { + fprintf(fp, "%s\t%d\n", vec->int_params[i]->name_str(), + (inT32)(*vec->int_params[i])); + } + for (i = 0; i < vec->bool_params.size(); ++i) { + fprintf(fp, "%s\t%d\n", vec->bool_params[i]->name_str(), + (BOOL8)(*vec->bool_params[i])); + } + for (int i = 0; i < vec->string_params.size(); ++i) { + fprintf(fp, "%s\t%s\n", vec->string_params[i]->name_str(), + vec->string_params[i]->string()); + } + for (int i = 0; i < vec->double_params.size(); ++i) { + fprintf(fp, "%s\t%g\n", vec->double_params[i]->name_str(), + (double)(*vec->double_params[i])); + } + } +} + +} // namespace tesseract diff --git a/ccutil/params.h b/ccutil/params.h new file mode 100644 index 0000000000..2818d4062c --- /dev/null +++ b/ccutil/params.h @@ -0,0 +1,268 @@ +/********************************************************************** + * File: params.h + * Description: Class definitions of the *_VAR classes for tunable constants. + * Author: Ray Smith + * Created: Fri Feb 22 11:26:25 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef PARAMS_H +#define PARAMS_H + +#include + +#include "genericvector.h" +#include "strngs.h" + +namespace tesseract { + +class IntParam; +class BoolParam; +class StringParam; +class DoubleParam; + +struct ParamsVectors { + GenericVector int_params; + GenericVector bool_params; + GenericVector string_params; + GenericVector double_params; +}; + +// Utility functions for working with Tesseract parameters. +class ParamUtils { + public: + // Reads a file of parameter definitions and set/modify the values therein. + // If the filename begins with a + or -, the BoolVariables will be + // ORed or ANDed with any current values. + // Blank lines and lines beginning # are ignored. + // Values may have any whitespace after the name and are the rest of line. + static bool ReadParamsFile( + const char *file, // filename to read + bool init_only, // only set parameters that need to be + // initialized when Init() is called + ParamsVectors *member_params); + + // Read parameters from the given file pointer (stop at end_offset). + static bool ReadParamsFromFp(FILE *fp, inT64 end_offset, bool init_only, + ParamsVectors *member_params); + + // Set a parameters to have the given value. + static bool SetParam(const char *name, const char* value, + bool init_only, ParamsVectors *member_params); + + // Returns the pointer to the parameter with the given name (of the + // appropriate type) if it was found in the vector obtained from + // GlobalParams() or in the given member_params. + template + static T *FindParam(const char *name, + const GenericVector &global_vec, + const GenericVector &member_vec) { + int i; + for (i = 0; i < global_vec.size(); ++i) { + if (strcmp(global_vec[i]->name_str(), name) == 0) return global_vec[i]; + } + for (i = 0; i < member_vec.size(); ++i) { + if (strcmp(member_vec[i]->name_str(), name) == 0) return member_vec[i]; + } + return NULL; + } + // Removes the given pointer to the param from the given vector. + template + static void RemoveParam(T *param_ptr, GenericVector *vec) { + for (int i = 0; i < vec->size(); ++i) { + if ((*vec)[i] == param_ptr) { + vec->remove(i); + return; + } + } + } + + // Print parameters to the given file. + static void PrintParams(FILE *fp, const ParamsVectors *member_params); +}; + +// Definition of various parameter types. +class Param { + public: + ~Param() {} + + const char *name_str() const { return name_; } + const char *info_str() const { return info_; } + bool is_init() const { return init_; } + + protected: + Param(const char *name, const char *comment, bool init) : + name_(name), info_(comment), init_(init) {} + + const char *name_; // name of this parameter + const char *info_; // for menus + bool init_; // needs to be set before init +}; + +class IntParam : public Param { + public: + IntParam(inT32 value, const char *name, const char *comment, bool init, + ParamsVectors *vec) : Param(name, comment, init) { + value_ = value; + params_vec_ = &(vec->int_params); + vec->int_params.push_back(this); + } + ~IntParam() { ParamUtils::RemoveParam(this, params_vec_); } + operator inT32() { return value_; } + void set_value(inT32 value) { value_ = value; } + + private: + inT32 value_; + // Pointer to the vector that contains this param (not owened by this class). + GenericVector *params_vec_; +}; + +class BoolParam : public Param { + public: + BoolParam(bool value, const char *name, const char *comment, bool init, + ParamsVectors *vec) : Param(name, comment, init) { + value_ = value; + params_vec_ = &(vec->bool_params); + vec->bool_params.push_back(this); + } + ~BoolParam() { ParamUtils::RemoveParam(this, params_vec_); } + operator BOOL8() { return value_; } + void set_value(BOOL8 value) { value_ = value; } + + private: + BOOL8 value_; + // Pointer to the vector that contains this param (not owened by this class). + GenericVector *params_vec_; +}; + +class StringParam : public Param { + public: + StringParam(const char *value, const char *name, + const char *comment, bool init, + ParamsVectors *vec) : Param(name, comment, init) { + value_ = value; + params_vec_ = &(vec->string_params); + vec->string_params.push_back(this); + } + ~StringParam() { ParamUtils::RemoveParam(this, params_vec_); } + operator STRING &() { return value_; } + const char *string() const { return value_.string(); } + void set_value(const STRING &value) { value_ = value; } + + private: + STRING value_; + // Pointer to the vector that contains this param (not owened by this class). + GenericVector *params_vec_; +}; + +class DoubleParam : public Param { + public: + DoubleParam(double value, const char *name, const char *comment, + bool init, ParamsVectors *vec) : Param(name, comment, init) { + value_ = value; + params_vec_ = &(vec->double_params); + vec->double_params.push_back(this); + } + ~DoubleParam() { ParamUtils::RemoveParam(this, params_vec_); } + operator double() { return value_; } + void set_value(double value) { value_ = value; } + + private: + double value_; + // Pointer to the vector that contains this param (not owened by this class). + GenericVector *params_vec_; +}; + +} // namespace tesseract + +// Global parameter lists. +// +// To avoid the problem of undetermined order of static initialization +// global_params are accessed through the GlobalParams function that +// initializes the static pointer to global_params only on the first +// first time GlobalParams() is called. +// +// TODO(daria): remove GlobalParams() when all global Tesseract +// parameters are converted to members. +tesseract::ParamsVectors *GlobalParams(); + +/************************************************************************* + * Note on defining parameters. + * + * The values of the parameters defined with *_INIT_* macros are guaranteed + * to be loaded from config files before Tesseract initialization is done + * (there is no such guarantee for parameters defined with the other macros). + *************************************************************************/ + +#define INT_VAR_H(name,val,comment)\ + tesseract::IntParam name + +#define BOOL_VAR_H(name,val,comment)\ + tesseract::BoolParam name + +#define STRING_VAR_H(name,val,comment)\ + tesseract::StringParam name + +#define double_VAR_H(name,val,comment)\ + tesseract::DoubleParam name + +#define INT_VAR(name,val,comment)\ + tesseract::IntParam name(val,#name,comment,false,GlobalParams()) + +#define BOOL_VAR(name,val,comment)\ + tesseract::BoolParam name(val,#name,comment,false,GlobalParams()) + +#define STRING_VAR(name,val,comment)\ + tesseract::StringParam name(val,#name,comment,false,GlobalParams()) + +#define double_VAR(name,val,comment)\ + tesseract::DoubleParam name(val,#name,comment,false,GlobalParams()) + +#define INT_INIT_VAR(name,val,comment)\ + tesseract::IntParam name(val,#name,comment,true,GlobalParams()) + +#define BOOL_INIT_VAR(name,val,comment)\ + tesseract::BoolParam name(val,#name,comment,true,GlobalParams()) + +#define STRING_INIT_VAR(name,val,comment)\ + tesseract::StringParam name(val,#name,comment,true,GlobalParams()) + +#define double_INIT_VAR(name,val,comment)\ + tesseract::DoubleParam name(val,#name,comment,true,GlobalParams()) + +#define INT_MEMBER(name, val, comment, vec)\ + name(val, #name, comment, false, vec) + +#define BOOL_MEMBER(name, val, comment, vec)\ + name(val, #name, comment, false, vec) + +#define STRING_MEMBER(name, val, comment, vec)\ + name(val, #name, comment, false, vec) + +#define double_MEMBER(name, val, comment, vec)\ + name(val, #name, comment, false, vec) + +#define INT_INIT_MEMBER(name, val, comment, vec)\ + name(val, #name, comment, true, vec) + +#define BOOL_INIT_MEMBER(name, val, comment, vec)\ + name(val, #name, comment, true, vec) + +#define STRING_INIT_MEMBER(name, val, comment, vec)\ + name(val, #name, comment, true, vec) + +#define double_INIT_MEMBER(name, val, comment, vec)\ + name(val, #name, comment, true, vec) + +#endif diff --git a/ccutil/platform.h b/ccutil/platform.h index 6d9c697467..1e0da58824 100644 --- a/ccutil/platform.h +++ b/ccutil/platform.h @@ -3,14 +3,6 @@ #ifdef __MSW32__ #define SIGNED #define snprintf _snprintf -#define read _read -#define write _write -#define close _close -#define lseek _lseek -#define open _open -#define ultoa _ultoa -#define ltoa _ltoa -#define strtok_r(s, d, p) strtok(s, d) #if (_MSC_VER <= 1400) #define vsnprintf _vsnprintf #endif diff --git a/ccutil/qrsequence.h b/ccutil/qrsequence.h index ed63789082..d49d755028 100644 --- a/ccutil/qrsequence.h +++ b/ccutil/qrsequence.h @@ -33,7 +33,7 @@ class QRSequenceGenerator { public: // Object is initalized with the size of the output range. explicit QRSequenceGenerator(int N) : N_(N), next_num_(0) { - num_bits_ = ceil(log(static_cast(N)) / log(2.0)); + num_bits_ = static_cast(ceil(log(static_cast(N)) / log(2.0))); } // Main worker method that retrieves the next number in the sequence. diff --git a/ccutil/scanutils.cpp b/ccutil/scanutils.cpp index 41045f2099..6ec29976ca 100644 --- a/ccutil/scanutils.cpp +++ b/ccutil/scanutils.cpp @@ -47,7 +47,6 @@ enum Ranks { RANK_LONG = 1, RANK_LONGLONG = 2, RANK_PTR = INT_MAX // Special value used for pointers -// RANK_PTR = 3 // Special value used for pointers }; const enum Ranks kMinRank = RANK_CHAR; @@ -214,56 +213,26 @@ double strtofloat(const char* s) return minus ? -f : f; } -static int tess_vfscanf(FILE* stream, const char *format, va_list ap); - -int tess_fscanf(FILE* stream, const char *format, ...) -{ - va_list ap; - int rv; - - va_start(ap, format); - rv = tess_vfscanf(stream, format, ap); - va_end(ap); - - return rv; -} - -#ifdef EMBEDDED int fscanf(FILE* stream, const char *format, ...) { va_list ap; int rv; va_start(ap, format); - rv = tess_vfscanf(stream, format, ap); + rv = vfscanf(stream, format, ap); va_end(ap); return rv; } -int vfscanf(FILE* stream, const char *format, ...) -{ - va_list ap; - int rv; - - va_start(ap, format); - rv = tess_vfscanf(stream, format, ap); - va_end(ap); - - return rv; -} -#endif - -#ifndef _MSV_VER -static -int tess_vfscanf(FILE* stream, const char *format, va_list ap) +int vfscanf(FILE* stream, const char *format, va_list ap) { const char *p = format; char ch; int q = 0; uintmax_t val = 0; int rank = RANK_INT; // Default rank - unsigned int width = ~0; + unsigned int width = UINT_MAX; int base; int flags = 0; enum { @@ -283,7 +252,6 @@ int tess_vfscanf(FILE* stream, const char *format, va_list ap) int matchinv = 0; // Is match map inverted? unsigned char range_start = 0; off_t start_off = ftell(stream); - double fval; // Skip leading spaces SkipSpace(stream); @@ -293,7 +261,7 @@ int tess_vfscanf(FILE* stream, const char *format, va_list ap) case ST_NORMAL: if (ch == '%') { state = ST_FLAGS; - flags = 0; rank = RANK_INT; width = ~0; + flags = 0; rank = RANK_INT; width = UINT_MAX; } else if (isspace(static_cast(ch))) { SkipSpace(stream); } else { @@ -445,7 +413,8 @@ int tess_vfscanf(FILE* stream, const char *format, va_list ap) break; } - fval = streamtofloat(stream); + { + double fval = streamtofloat(stream); switch(rank) { case RANK_INT: *va_arg(ap, float *) = static_cast(fval); @@ -455,6 +424,7 @@ int tess_vfscanf(FILE* stream, const char *format, va_list ap) break; } converted++; + } break; case 'c': // Character @@ -569,11 +539,8 @@ int tess_vfscanf(FILE* stream, const char *format, va_list ap) return converted; } -#endif -#ifdef EMBEDDED int creat(const char *pathname, mode_t mode) { return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode); } -#endif diff --git a/ccutil/scanutils.h b/ccutil/scanutils.h index b73b410181..fb96616f6b 100644 --- a/ccutil/scanutils.h +++ b/ccutil/scanutils.h @@ -19,23 +19,14 @@ #ifndef SCANUTILS_H #define SCANUTILS_H +#ifdef EMBEDDED + #include #include #include +//#include #include -#ifndef _MSC_VER -// Parse a file stream according to the given format. See the fscanf manpage -// for more information, as this function attempts to mimic its behavior. -// Note that scientific floating-point notation is not supported. -// This variant is used to ensure correct reading regardless of locale. -int tess_fscanf(FILE* stream, const char *format, ...); -#endif - -#ifdef EMBEDDED - -#include - // Attempts to parse the given file stream s as an integer of the base // 'base'. Returns the first successfully parsed integer as a uintmax_t, or // 0, if none was found. @@ -43,12 +34,12 @@ uintmax_t streamtoumax(FILE* s, int base); // Parse a file stream according to the given format. See the fscanf manpage // for more information, as this function attempts to mimic its behavior. -// Note that scientific floating-point notation is not supported. +// Note that scientific loating-point notation is not supported. int fscanf(FILE* stream, const char *format, ...); // Parse a file stream according to the given format. See the fscanf manpage // for more information, as this function attempts to mimic its behavior. -// Note that scientific floating-point notation is not supported. +// Note that scientific loating-point notation is not supported. int vfscanf(FILE* stream, const char *format, va_list ap); // Create a file at the specified path. See the creat manpage for more diff --git a/ccutil/serialis.cpp b/ccutil/serialis.cpp index 624f9c01ce..f9cc031512 100644 --- a/ccutil/serialis.cpp +++ b/ccutil/serialis.cpp @@ -79,11 +79,7 @@ DLLSYM void serialise_FLOAT64(FILE *f, double the_float) { DLLSYM double de_serialise_FLOAT64(FILE *f) { double the_float; -#ifndef _MSC_VER - if (tess_fscanf (f, "%lg", &the_float) != 1) -#else if (fscanf (f, "%lg", &the_float) != 1) -#endif READFAILED.error ("de_serialise_FLOAT64", ABORT, NULL); return the_float; } diff --git a/ccutil/sorthelper.h b/ccutil/sorthelper.h new file mode 100644 index 0000000000..4e6542de9b --- /dev/null +++ b/ccutil/sorthelper.h @@ -0,0 +1,106 @@ +/////////////////////////////////////////////////////////////////////// +// File: sorthelper.h +// Description: Generic sort and maxfinding class. +// Author: Ray Smith +// Created: Thu May 20 17:48:21 PDT 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CCUTIL_SORTHELPER_H_ +#define TESSERACT_CCUTIL_SORTHELPER_H_ + +#include +#include "genericvector.h" + +// Generic class to provide functions based on a pair. +// T is the value type. +// The class keeps a count of each value and can return the most frequent +// value or a sorted array of the values with counts. +// Note that this class uses linear search for adding. It is better +// to use the STATS class to get the mode of a large number of values +// in a small space. SortHelper is better to get the mode of a small number +// of values from a large space. +// T must have a copy constructor. +template +class SortHelper { + public: + // Simple pair class to hold the values and counts. + template struct SortPair { + PairT value; + int count; + }; + // qsort function to sort by decreasing count. + static int SortPairsByCount(const void* v1, const void* v2) { + const SortPair* p1 = reinterpret_cast*>(v1); + const SortPair* p2 = reinterpret_cast*>(v2); + return p2->count - p1->count; + } + // qsort function to sort by decreasing value. + static int SortPairsByValue(const void* v1, const void* v2) { + const SortPair* p1 = reinterpret_cast*>(v1); + const SortPair* p2 = reinterpret_cast*>(v2); + if (p2->value - p1->value < 0) return -1; + if (p2->value - p1->value > 0) return 1; + return 0; + } + + // Constructor takes a hint of the array size, but it need not be accurate. + explicit SortHelper(int sizehint) : counts_(sizehint) {} + + // Add a value that may be a duplicate of an existing value. + // Uses a linear search. + void Add(T value, int count) { + // Linear search for value. + for (int i = 0; i < counts_.size(); ++i) { + if (counts_[i].value == value) { + counts_[i].count += count; + return; + } + } + SortPair new_pair = {value, count}; + counts_.push_back(SortPair(new_pair)); + } + + // Returns the frequency of the most frequent value. + // If max_value is not NULL, returns the most frequent value. + // If the array is empty, returns -MAX_INT32 and max_value is unchanged. + int MaxCount(T* max_value) const { + int best_count = -MAX_INT32; + for (int i = 0; i < counts_.size(); ++i) { + if (counts_[i].count > best_count) { + best_count = counts_[i].count; + if (max_value != NULL) + *max_value = counts_[i].value; + } + } + return best_count; + } + + // Returns the data array sorted by decreasing frequency. + const GenericVector >& SortByCount() { + counts_.sort(&SortPairsByCount); + return counts_; + } + // Returns the data array sorted by decreasing value. + const GenericVector >& SortByValue() { + counts_.sort(&SortPairsByValue); + return counts_; + } + + private: + GenericVector > counts_; +}; + + +#endif // TESSERACT_CCUTIL_SORTHELPER_H_. diff --git a/ccutil/strngs.cpp b/ccutil/strngs.cpp index 398392ad99..1060a93890 100644 --- a/ccutil/strngs.cpp +++ b/ccutil/strngs.cpp @@ -21,86 +21,11 @@ #include "tprintf.h" #include "strngs.h" -/********************************************************************** - * DataCache for reducing initial allocations, such as the default - * constructor. The memory in this cache is not special, it is just - * held locally rather than freeing. Only blocks with the default - * capacity are considered for the cache. - * - * In practice it does not appear that this cache grows very big, - * so even 2-4 elements are probably sufficient to realize most - * gains. - * - * The cache is maintained globally with a global destructor to - * avoid memory leaks being reported on exit. - **********************************************************************/ -// kDataCacheSize is cache of last n min sized buffers freed for -// cheap recyling -const int kDataCacheSize = 8; // max number of buffers cached +#include // Size of buffer needed to host the decimal representation of the maximum // possible length of an int (in 64 bits, being -<20 digits>. const int kMaxIntSize = 22; - -#if 1 -#define CHECK_INVARIANT(s) // EMPTY -#else -static void check_used_(int len, const char *s) { - bool ok; - - if (len == 0) - ok = (s == NULL); - else - ok = (len == (strlen(s) + 1)); - - if (!ok) - abort(); -} - -#define CHECK_INVARIANT(s) check_used_(s->GetHeader()->used_, s->string()) -#endif - -// put recycled buffers into a class so we can destroy it on exit -class DataCache { - public: - DataCache() { - top_ = 0; - } - ~DataCache() { - while (--top_ >= 0) - free_string((char *)stack_[top_]); - } - - // Allocate a buffer out of this cache. - // Returs NULL if there are no cached buffers. - // The buffers in the cache can be freed using string_free. - void* alloc() { - if (top_ == 0) - return NULL; - - return stack_[--top_]; - } - - // Free pointer either by caching it on the stack of pointers - // or freeing it with string_free if there isnt space left to cache it. - // s should have capacity kMinCapacity. - void free(void* p) { - if (top_ == kDataCacheSize) - free_string((char *)p); - else - stack_[top_++] = p; - } - - // Stack of discarded but not-yet freed pointers. - void* stack_[kDataCacheSize]; - - // Top of stack, points to element after last cached pointer - int top_; -}; - -static DataCache MinCapacityDataCache; - - /********************************************************************** * STRING_HEADER provides metadata about the allocated buffer, * including total capacity and how much used (strlen with '\0'). @@ -118,9 +43,7 @@ static DataCache MinCapacityDataCache; const int kMinCapacity = 16; char* STRING::AllocData(int used, int capacity) { - if ((capacity != kMinCapacity) - || ((data_ = (STRING_HEADER *)MinCapacityDataCache.alloc()) == NULL)) - data_ = (STRING_HEADER *)alloc_string(capacity + sizeof(STRING_HEADER)); + data_ = (STRING_HEADER *)alloc_string(capacity + sizeof(STRING_HEADER)); // header is the metadata for this memory block STRING_HEADER* header = GetHeader(); @@ -130,11 +53,7 @@ char* STRING::AllocData(int used, int capacity) { } void STRING::DiscardData() { - STRING_HEADER* header = GetHeader(); - if (header->capacity_ == kMinCapacity) - MinCapacityDataCache.free(data_); - else - free_string((char *)data_); + free_string((char *)data_); } // This is a private method; ensure FixHeader is called (or used_ is well defined) @@ -161,7 +80,7 @@ char* STRING::ensure_cstr(inT32 min_capacity) { DiscardData(); data_ = new_header; - CHECK_INVARIANT(this); + assert(InvariantOk()); return ((char *)data_) + sizeof(STRING_HEADER); } @@ -185,7 +104,7 @@ STRING::STRING(const STRING& str) { int str_used = str_header->used_; char *this_cstr = AllocData(str_used, str_used); memcpy(this_cstr, str.GetCStr(), str_used); - CHECK_INVARIANT(this); + assert(InvariantOk()); } STRING::STRING(const char* cstr) { @@ -196,7 +115,7 @@ STRING::STRING(const char* cstr) { char* this_cstr = AllocData(len, len); memcpy(this_cstr, cstr, len); } - CHECK_INVARIANT(this); + assert(InvariantOk()); } STRING::~STRING() { @@ -265,7 +184,7 @@ void STRING::insert_range(inT32 index, const char* str, int len) { memcpy(this_cstr + index, str, len); this_header->used_ += len; - CHECK_INVARIANT(this); + assert(InvariantOk()); } void STRING::erase_range(inT32 index, int len) { @@ -275,14 +194,14 @@ void STRING::erase_range(inT32 index, int len) { memcpy(this_cstr+index, this_cstr+index+len, this_header->used_ - index - len); this_header->used_ -= len; - CHECK_INVARIANT(this); + assert(InvariantOk()); } void STRING::truncate_at(inT32 index) { char* this_cstr = ensure_cstr(index); this_cstr[index] = '\0'; GetHeader()->used_ = index; - CHECK_INVARIANT(this); + assert(InvariantOk()); } #else @@ -343,7 +262,7 @@ STRING& STRING::operator=(const STRING& str) { memcpy(this_cstr, str.GetCStr(), str_used); this_header->used_ = str_used; - CHECK_INVARIANT(this); + assert(InvariantOk()); return *this; } @@ -366,20 +285,17 @@ STRING & STRING::operator+=(const STRING& str) { this_header->used_ = str_used; } - CHECK_INVARIANT(this); + assert(InvariantOk()); return *this; } -// Appends the given string and int (as a %d) to this. -// += cannot be used for ints as there as a char += operator that would -// be ambiguous, and ints usually need a string before or between them -// anyway. void STRING::add_str_int(const char* str, int number) { - *this += str; + if (str != NULL) + *this += str; // Allow space for the maximum possible length of inT64. char num_buffer[kMaxIntSize]; - num_buffer[kMaxIntSize - 1] = '\0'; snprintf(num_buffer, kMaxIntSize - 1, "%d", number); + num_buffer[kMaxIntSize - 1] = '\0'; *this += num_buffer; } @@ -410,7 +326,7 @@ void STRING::de_dump(FILE* f) { this_header->used_ = len; free_mem(instring); - CHECK_INVARIANT(this); + assert(InvariantOk()); } @@ -432,7 +348,7 @@ STRING & STRING::operator=(const char* cstr) { AllocData(0, 0); } - CHECK_INVARIANT(this); + assert(InvariantOk()); return *this; } @@ -441,7 +357,7 @@ STRING STRING::operator+(const STRING& str) const { STRING result(*this); result += str; - CHECK_INVARIANT(this); + assert(InvariantOk()); return result; } @@ -461,7 +377,7 @@ STRING STRING::operator+(const char ch) const { result_cstr[result_used + 1] = '\0'; // append on '\0' ++result_header->used_; - CHECK_INVARIANT(this); + assert(InvariantOk()); return result; } @@ -486,7 +402,7 @@ STRING& STRING::operator+=(const char *str) { this_header->used_ = len; } - CHECK_INVARIANT(this); + assert(InvariantOk()); return *this; } @@ -507,6 +423,6 @@ STRING& STRING::operator+=(const char ch) { this_cstr[this_used++] = '\0'; // append '\0' after ch this_header->used_ = this_used; - CHECK_INVARIANT(this); + assert(InvariantOk()); return *this; } diff --git a/ccutil/strngs.h b/ccutil/strngs.h index 7a0439ccb9..f4536ac54f 100644 --- a/ccutil/strngs.h +++ b/ccutil/strngs.h @@ -130,6 +130,14 @@ class DLLSYM STRING inline const char* GetCStr() const { return ((const char *)data_) + sizeof(STRING_HEADER); }; + inline bool InvariantOk() const { +#if STRING_IS_PROTECTED + return (GetHeader()->used_ == 0) ? + (string() == NULL) : (GetHeader()->used_ == (strlen(string()) + 1)); +#else + return true; +#endif + } // Ensure string has requested capacity as optimization // to avoid unnecessary reallocations. diff --git a/ccutil/callback.h b/ccutil/tesscallback.h similarity index 65% rename from ccutil/callback.h rename to ccutil/tesscallback.h index f24ae3ff76..c0e02aec3e 100644 --- a/ccutil/callback.h +++ b/ccutil/tesscallback.h @@ -1,5 +1,5 @@ /////////////////////////////////////////////////////////////////////// -// File: callback.h +// File: tesscallback.h // Description: classes and functions to replace pointer-to-functions // Author: Samuel Charron // @@ -16,31 +16,33 @@ // /////////////////////////////////////////////////////////////////////// -#ifndef _CALLBACK_SPECIALIZATIONS_H -#define _CALLBACK_SPECIALIZATIONS_H +#ifndef _TESS_CALLBACK_SPECIALIZATIONS_H +#define _TESS_CALLBACK_SPECIALIZATIONS_H -struct CallbackUtils_ { +#include "host.h" // For NULL. + +struct TessCallbackUtils_ { static void FailIsRepeatable(const char* name); }; -class Closure { +class TessClosure { public: - virtual ~Closure() { } + virtual ~TessClosure() { } virtual void Run() = 0; }; template -class ResultCallback { +class TessResultCallback { public: - virtual ~ResultCallback() { } + virtual ~TessResultCallback() { } virtual R Run() = 0; }; template -class _ConstMemberResultCallback_0_0 : public ResultCallback { +class _ConstTessMemberResultCallback_0_0 : public TessResultCallback { public: - typedef ResultCallback base; + typedef TessResultCallback base; typedef R (T::*MemberSignature)() const; private: @@ -48,7 +50,7 @@ class _ConstMemberResultCallback_0_0 : public ResultCallback { MemberSignature member_; public: - inline _ConstMemberResultCallback_0_0( + inline _ConstTessMemberResultCallback_0_0( const T* object, MemberSignature member) : object_(object), member_(member) { @@ -69,10 +71,10 @@ class _ConstMemberResultCallback_0_0 : public ResultCallback { }; template -class _ConstMemberResultCallback_0_0 - : public Closure { +class _ConstTessMemberResultCallback_0_0 + : public TessClosure { public: - typedef Closure base; + typedef TessClosure base; typedef void (T::*MemberSignature)() const; private: @@ -80,7 +82,7 @@ class _ConstMemberResultCallback_0_0 MemberSignature member_; public: - inline _ConstMemberResultCallback_0_0( + inline _ConstTessMemberResultCallback_0_0( const T* object, MemberSignature member) : object_(object), member_(member) { @@ -100,36 +102,36 @@ class _ConstMemberResultCallback_0_0 #ifndef SWIG template -inline typename _ConstMemberResultCallback_0_0::base* -NewCallback( +inline typename _ConstTessMemberResultCallback_0_0::base* +NewTessCallback( const T1* obj, R (T2::*member)() const) { - return new _ConstMemberResultCallback_0_0( + return new _ConstTessMemberResultCallback_0_0( obj, member); } #endif #ifndef SWIG template -inline typename _ConstMemberResultCallback_0_0::base* -NewPermanentCallback( +inline typename _ConstTessMemberResultCallback_0_0::base* +NewPermanentTessCallback( const T1* obj, R (T2::*member)() const) { - return new _ConstMemberResultCallback_0_0( + return new _ConstTessMemberResultCallback_0_0( obj, member); } #endif template -class _MemberResultCallback_0_0 : public ResultCallback { +class _TessMemberResultCallback_0_0 : public TessResultCallback { public: - typedef ResultCallback base; + typedef TessResultCallback base; typedef R (T::*MemberSignature)() ; private: - T* object_; + T* object_; MemberSignature member_; public: - inline _MemberResultCallback_0_0( + inline _TessMemberResultCallback_0_0( T* object, MemberSignature member) : object_(object), member_(member) { @@ -150,18 +152,18 @@ class _MemberResultCallback_0_0 : public ResultCallback { }; template -class _MemberResultCallback_0_0 - : public Closure { +class _TessMemberResultCallback_0_0 + : public TessClosure { public: - typedef Closure base; + typedef TessClosure base; typedef void (T::*MemberSignature)() ; private: - T* object_; + T* object_; MemberSignature member_; public: - inline _MemberResultCallback_0_0( + inline _TessMemberResultCallback_0_0( T* object, MemberSignature member) : object_(object), member_(member) { @@ -181,35 +183,35 @@ class _MemberResultCallback_0_0 #ifndef SWIG template -inline typename _MemberResultCallback_0_0::base* -NewCallback( +inline typename _TessMemberResultCallback_0_0::base* +NewTessCallback( T1* obj, R (T2::*member)() ) { - return new _MemberResultCallback_0_0( + return new _TessMemberResultCallback_0_0( obj, member); } #endif #ifndef SWIG template -inline typename _MemberResultCallback_0_0::base* -NewPermanentCallback( +inline typename _TessMemberResultCallback_0_0::base* +NewPermanentTessCallback( T1* obj, R (T2::*member)() ) { - return new _MemberResultCallback_0_0( + return new _TessMemberResultCallback_0_0( obj, member); } #endif template -class _FunctionResultCallback_0_0 : public ResultCallback { +class _TessFunctionResultCallback_0_0 : public TessResultCallback { public: - typedef ResultCallback base; + typedef TessResultCallback base; typedef R (*FunctionSignature)(); private: FunctionSignature function_; public: - inline _FunctionResultCallback_0_0( + inline _TessFunctionResultCallback_0_0( FunctionSignature function) : function_(function) { } @@ -229,17 +231,17 @@ class _FunctionResultCallback_0_0 : public ResultCallback { }; template -class _FunctionResultCallback_0_0 - : public Closure { +class _TessFunctionResultCallback_0_0 + : public TessClosure { public: - typedef Closure base; + typedef TessClosure base; typedef void (*FunctionSignature)(); private: FunctionSignature function_; public: - inline _FunctionResultCallback_0_0( + inline _TessFunctionResultCallback_0_0( FunctionSignature function) : function_(function) { } @@ -257,35 +259,35 @@ class _FunctionResultCallback_0_0 }; template -inline typename _FunctionResultCallback_0_0::base* -NewCallback(R (*function)()) { - return new _FunctionResultCallback_0_0(function); +inline typename _TessFunctionResultCallback_0_0::base* +NewTessCallback(R (*function)()) { + return new _TessFunctionResultCallback_0_0(function); } template -inline typename _FunctionResultCallback_0_0::base* -NewPermanentCallback(R (*function)()) { - return new _FunctionResultCallback_0_0(function); +inline typename _TessFunctionResultCallback_0_0::base* +NewPermanentTessCallback(R (*function)()) { + return new _TessFunctionResultCallback_0_0(function); } template -class Callback1 { +class TessCallback1 { public: - virtual ~Callback1() { } + virtual ~TessCallback1() { } virtual void Run(A1) = 0; }; template -class ResultCallback1 { +class TessResultCallback1 { public: - virtual ~ResultCallback1() { } + virtual ~TessResultCallback1() { } virtual R Run(A1) = 0; }; template -class _ConstMemberResultCallback_0_1 : public ResultCallback1 { +class _ConstTessMemberResultCallback_0_1 : public TessResultCallback1 { public: - typedef ResultCallback1 base; + typedef TessResultCallback1 base; typedef R (T::*MemberSignature)(A1) const; private: @@ -293,7 +295,7 @@ class _ConstMemberResultCallback_0_1 : public ResultCallback1 { MemberSignature member_; public: - inline _ConstMemberResultCallback_0_1( + inline _ConstTessMemberResultCallback_0_1( const T* object, MemberSignature member) : object_(object), member_(member) { @@ -314,10 +316,10 @@ class _ConstMemberResultCallback_0_1 : public ResultCallback1 { }; template -class _ConstMemberResultCallback_0_1 - : public Callback1 { +class _ConstTessMemberResultCallback_0_1 + : public TessCallback1 { public: - typedef Callback1 base; + typedef TessCallback1 base; typedef void (T::*MemberSignature)(A1) const; private: @@ -325,7 +327,7 @@ class _ConstMemberResultCallback_0_1 MemberSignature member_; public: - inline _ConstMemberResultCallback_0_1( + inline _ConstTessMemberResultCallback_0_1( const T* object, MemberSignature member) : object_(object), member_(member) { @@ -345,28 +347,28 @@ class _ConstMemberResultCallback_0_1 #ifndef SWIG template -inline typename _ConstMemberResultCallback_0_1::base* -NewCallback( +inline typename _ConstTessMemberResultCallback_0_1::base* +NewTessCallback( const T1* obj, R (T2::*member)(A1) const) { - return new _ConstMemberResultCallback_0_1( + return new _ConstTessMemberResultCallback_0_1( obj, member); } #endif #ifndef SWIG template -inline typename _ConstMemberResultCallback_0_1::base* -NewPermanentCallback( +inline typename _ConstTessMemberResultCallback_0_1::base* +NewPermanentTessCallback( const T1* obj, R (T2::*member)(A1) const) { - return new _ConstMemberResultCallback_0_1( + return new _ConstTessMemberResultCallback_0_1( obj, member); } #endif template -class _MemberResultCallback_0_1 : public ResultCallback1 { +class _TessMemberResultCallback_0_1 : public TessResultCallback1 { public: - typedef ResultCallback1 base; + typedef TessResultCallback1 base; typedef R (T::*MemberSignature)(A1) ; private: @@ -374,7 +376,7 @@ class _MemberResultCallback_0_1 : public ResultCallback1 { MemberSignature member_; public: - inline _MemberResultCallback_0_1( + inline _TessMemberResultCallback_0_1( T* object, MemberSignature member) : object_(object), member_(member) { @@ -395,10 +397,10 @@ class _MemberResultCallback_0_1 : public ResultCallback1 { }; template -class _MemberResultCallback_0_1 - : public Callback1 { +class _TessMemberResultCallback_0_1 + : public TessCallback1 { public: - typedef Callback1 base; + typedef TessCallback1 base; typedef void (T::*MemberSignature)(A1) ; private: @@ -406,7 +408,7 @@ class _MemberResultCallback_0_1 MemberSignature member_; public: - inline _MemberResultCallback_0_1( + inline _TessMemberResultCallback_0_1( T* object, MemberSignature member) : object_(object), member_(member) { @@ -426,35 +428,35 @@ class _MemberResultCallback_0_1 #ifndef SWIG template -inline typename _MemberResultCallback_0_1::base* -NewCallback( +inline typename _TessMemberResultCallback_0_1::base* +NewTessCallback( T1* obj, R (T2::*member)(A1) ) { - return new _MemberResultCallback_0_1( + return new _TessMemberResultCallback_0_1( obj, member); } #endif #ifndef SWIG template -inline typename _MemberResultCallback_0_1::base* -NewPermanentCallback( +inline typename _TessMemberResultCallback_0_1::base* +NewPermanentTessCallback( T1* obj, R (T2::*member)(A1) ) { - return new _MemberResultCallback_0_1( + return new _TessMemberResultCallback_0_1( obj, member); } #endif template -class _FunctionResultCallback_0_1 : public ResultCallback1 { +class _TessFunctionResultCallback_0_1 : public TessResultCallback1 { public: - typedef ResultCallback1 base; + typedef TessResultCallback1 base; typedef R (*FunctionSignature)(A1); private: FunctionSignature function_; public: - inline _FunctionResultCallback_0_1( + inline _TessFunctionResultCallback_0_1( FunctionSignature function) : function_(function) { } @@ -474,17 +476,17 @@ class _FunctionResultCallback_0_1 : public ResultCallback1 { }; template -class _FunctionResultCallback_0_1 - : public Callback1 { +class _TessFunctionResultCallback_0_1 + : public TessCallback1 { public: - typedef Callback1 base; + typedef TessCallback1 base; typedef void (*FunctionSignature)(A1); private: FunctionSignature function_; public: - inline _FunctionResultCallback_0_1( + inline _TessFunctionResultCallback_0_1( FunctionSignature function) : function_(function) { } @@ -502,35 +504,35 @@ class _FunctionResultCallback_0_1 }; template -inline typename _FunctionResultCallback_0_1::base* -NewCallback(R (*function)(A1)) { - return new _FunctionResultCallback_0_1(function); +inline typename _TessFunctionResultCallback_0_1::base* +NewTessCallback(R (*function)(A1)) { + return new _TessFunctionResultCallback_0_1(function); } template -inline typename _FunctionResultCallback_0_1::base* -NewPermanentCallback(R (*function)(A1)) { - return new _FunctionResultCallback_0_1(function); +inline typename _TessFunctionResultCallback_0_1::base* +NewPermanentTessCallback(R (*function)(A1)) { + return new _TessFunctionResultCallback_0_1(function); } template -class Callback2 { +class TessCallback2 { public: - virtual ~Callback2() { } + virtual ~TessCallback2() { } virtual void Run(A1,A2) = 0; }; template -class ResultCallback2 { +class TessResultCallback2 { public: - virtual ~ResultCallback2() { } + virtual ~TessResultCallback2() { } virtual R Run(A1,A2) = 0; }; template -class _ConstMemberResultCallback_0_2 : public ResultCallback2 { +class _ConstTessMemberResultCallback_0_2 : public TessResultCallback2 { public: - typedef ResultCallback2 base; + typedef TessResultCallback2 base; typedef R (T::*MemberSignature)(A1,A2) const; private: @@ -538,7 +540,7 @@ class _ConstMemberResultCallback_0_2 : public ResultCallback2 { MemberSignature member_; public: - inline _ConstMemberResultCallback_0_2( + inline _ConstTessMemberResultCallback_0_2( const T* object, MemberSignature member) : object_(object), member_(member) { @@ -559,10 +561,10 @@ class _ConstMemberResultCallback_0_2 : public ResultCallback2 { }; template -class _ConstMemberResultCallback_0_2 - : public Callback2 { +class _ConstTessMemberResultCallback_0_2 + : public TessCallback2 { public: - typedef Callback2 base; + typedef TessCallback2 base; typedef void (T::*MemberSignature)(A1,A2) const; private: @@ -570,7 +572,7 @@ class _ConstMemberResultCallback_0_2 MemberSignature member_; public: - inline _ConstMemberResultCallback_0_2( + inline _ConstTessMemberResultCallback_0_2( const T* object, MemberSignature member) : object_(object), member_(member) { @@ -590,28 +592,28 @@ class _ConstMemberResultCallback_0_2 #ifndef SWIG template -inline typename _ConstMemberResultCallback_0_2::base* -NewCallback( +inline typename _ConstTessMemberResultCallback_0_2::base* +NewTessCallback( const T1* obj, R (T2::*member)(A1,A2) const) { - return new _ConstMemberResultCallback_0_2( + return new _ConstTessMemberResultCallback_0_2( obj, member); } #endif #ifndef SWIG template -inline typename _ConstMemberResultCallback_0_2::base* -NewPermanentCallback( +inline typename _ConstTessMemberResultCallback_0_2::base* +NewPermanentTessCallback( const T1* obj, R (T2::*member)(A1,A2) const) { - return new _ConstMemberResultCallback_0_2( + return new _ConstTessMemberResultCallback_0_2( obj, member); } #endif template -class _MemberResultCallback_0_2 : public ResultCallback2 { +class _TessMemberResultCallback_0_2 : public TessResultCallback2 { public: - typedef ResultCallback2 base; + typedef TessResultCallback2 base; typedef R (T::*MemberSignature)(A1,A2) ; private: @@ -619,7 +621,7 @@ class _MemberResultCallback_0_2 : public ResultCallback2 { MemberSignature member_; public: - inline _MemberResultCallback_0_2( + inline _TessMemberResultCallback_0_2( T* object, MemberSignature member) : object_(object), member_(member) { @@ -640,10 +642,10 @@ class _MemberResultCallback_0_2 : public ResultCallback2 { }; template -class _MemberResultCallback_0_2 - : public Callback2 { +class _TessMemberResultCallback_0_2 + : public TessCallback2 { public: - typedef Callback2 base; + typedef TessCallback2 base; typedef void (T::*MemberSignature)(A1,A2) ; private: @@ -651,7 +653,7 @@ class _MemberResultCallback_0_2 MemberSignature member_; public: - inline _MemberResultCallback_0_2( + inline _TessMemberResultCallback_0_2( T* object, MemberSignature member) : object_(object), member_(member) { @@ -671,35 +673,35 @@ class _MemberResultCallback_0_2 #ifndef SWIG template -inline typename _MemberResultCallback_0_2::base* -NewCallback( +inline typename _TessMemberResultCallback_0_2::base* +NewTessCallback( T1* obj, R (T2::*member)(A1,A2) ) { - return new _MemberResultCallback_0_2( + return new _TessMemberResultCallback_0_2( obj, member); } #endif #ifndef SWIG template -inline typename _MemberResultCallback_0_2::base* -NewPermanentCallback( +inline typename _TessMemberResultCallback_0_2::base* +NewPermanentTessCallback( T1* obj, R (T2::*member)(A1,A2) ) { - return new _MemberResultCallback_0_2( + return new _TessMemberResultCallback_0_2( obj, member); } #endif template -class _FunctionResultCallback_0_2 : public ResultCallback2 { +class _TessFunctionResultCallback_0_2 : public TessResultCallback2 { public: - typedef ResultCallback2 base; + typedef TessResultCallback2 base; typedef R (*FunctionSignature)(A1,A2); private: FunctionSignature function_; public: - inline _FunctionResultCallback_0_2( + inline _TessFunctionResultCallback_0_2( FunctionSignature function) : function_(function) { } @@ -719,17 +721,17 @@ class _FunctionResultCallback_0_2 : public ResultCallback2 { }; template -class _FunctionResultCallback_0_2 - : public Callback2 { +class _TessFunctionResultCallback_0_2 + : public TessCallback2 { public: - typedef Callback2 base; + typedef TessCallback2 base; typedef void (*FunctionSignature)(A1,A2); private: FunctionSignature function_; public: - inline _FunctionResultCallback_0_2( + inline _TessFunctionResultCallback_0_2( FunctionSignature function) : function_(function) { } @@ -747,35 +749,35 @@ class _FunctionResultCallback_0_2 }; template -inline typename _FunctionResultCallback_0_2::base* -NewCallback(R (*function)(A1,A2)) { - return new _FunctionResultCallback_0_2(function); +inline typename _TessFunctionResultCallback_0_2::base* +NewTessCallback(R (*function)(A1,A2)) { + return new _TessFunctionResultCallback_0_2(function); } template -inline typename _FunctionResultCallback_0_2::base* -NewPermanentCallback(R (*function)(A1,A2)) { - return new _FunctionResultCallback_0_2(function); +inline typename _TessFunctionResultCallback_0_2::base* +NewPermanentTessCallback(R (*function)(A1,A2)) { + return new _TessFunctionResultCallback_0_2(function); } template -class Callback3 { +class TessCallback3 { public: - virtual ~Callback3() { } + virtual ~TessCallback3() { } virtual void Run(A1,A2,A3) = 0; }; template -class ResultCallback3 { +class TessResultCallback3 { public: - virtual ~ResultCallback3() { } + virtual ~TessResultCallback3() { } virtual R Run(A1,A2,A3) = 0; }; template -class _ConstMemberResultCallback_0_3 : public ResultCallback3 { +class _ConstTessMemberResultCallback_0_3 : public TessResultCallback3 { public: - typedef ResultCallback3 base; + typedef TessResultCallback3 base; typedef R (T::*MemberSignature)(A1,A2,A3) const; private: @@ -783,7 +785,7 @@ class _ConstMemberResultCallback_0_3 : public ResultCallback3 { MemberSignature member_; public: - inline _ConstMemberResultCallback_0_3( + inline _ConstTessMemberResultCallback_0_3( const T* object, MemberSignature member) : object_(object), member_(member) { @@ -804,10 +806,10 @@ class _ConstMemberResultCallback_0_3 : public ResultCallback3 { }; template -class _ConstMemberResultCallback_0_3 - : public Callback3 { +class _ConstTessMemberResultCallback_0_3 + : public TessCallback3 { public: - typedef Callback3 base; + typedef TessCallback3 base; typedef void (T::*MemberSignature)(A1,A2,A3) const; private: @@ -815,7 +817,7 @@ class _ConstMemberResultCallback_0_3 MemberSignature member_; public: - inline _ConstMemberResultCallback_0_3( + inline _ConstTessMemberResultCallback_0_3( const T* object, MemberSignature member) : object_(object), member_(member) { @@ -835,28 +837,28 @@ class _ConstMemberResultCallback_0_3 #ifndef SWIG template -inline typename _ConstMemberResultCallback_0_3::base* -NewCallback( +inline typename _ConstTessMemberResultCallback_0_3::base* +NewTessCallback( const T1* obj, R (T2::*member)(A1,A2,A3) const) { - return new _ConstMemberResultCallback_0_3( + return new _ConstTessMemberResultCallback_0_3( obj, member); } #endif #ifndef SWIG template -inline typename _ConstMemberResultCallback_0_3::base* -NewPermanentCallback( +inline typename _ConstTessMemberResultCallback_0_3::base* +NewPermanentTessCallback( const T1* obj, R (T2::*member)(A1,A2,A3) const) { - return new _ConstMemberResultCallback_0_3( + return new _ConstTessMemberResultCallback_0_3( obj, member); } #endif template -class _MemberResultCallback_0_3 : public ResultCallback3 { +class _TessMemberResultCallback_0_3 : public TessResultCallback3 { public: - typedef ResultCallback3 base; + typedef TessResultCallback3 base; typedef R (T::*MemberSignature)(A1,A2,A3) ; private: @@ -864,7 +866,7 @@ class _MemberResultCallback_0_3 : public ResultCallback3 { MemberSignature member_; public: - inline _MemberResultCallback_0_3( + inline _TessMemberResultCallback_0_3( T* object, MemberSignature member) : object_(object), member_(member) { @@ -885,10 +887,10 @@ class _MemberResultCallback_0_3 : public ResultCallback3 { }; template -class _MemberResultCallback_0_3 - : public Callback3 { +class _TessMemberResultCallback_0_3 + : public TessCallback3 { public: - typedef Callback3 base; + typedef TessCallback3 base; typedef void (T::*MemberSignature)(A1,A2,A3) ; private: @@ -896,7 +898,7 @@ class _MemberResultCallback_0_3 MemberSignature member_; public: - inline _MemberResultCallback_0_3( + inline _TessMemberResultCallback_0_3( T* object, MemberSignature member) : object_(object), member_(member) { @@ -916,35 +918,35 @@ class _MemberResultCallback_0_3 #ifndef SWIG template -inline typename _MemberResultCallback_0_3::base* -NewCallback( +inline typename _TessMemberResultCallback_0_3::base* +NewTessCallback( T1* obj, R (T2::*member)(A1,A2,A3) ) { - return new _MemberResultCallback_0_3( + return new _TessMemberResultCallback_0_3( obj, member); } #endif #ifndef SWIG template -inline typename _MemberResultCallback_0_3::base* -NewPermanentCallback( +inline typename _TessMemberResultCallback_0_3::base* +NewPermanentTessCallback( T1* obj, R (T2::*member)(A1,A2,A3) ) { - return new _MemberResultCallback_0_3( + return new _TessMemberResultCallback_0_3( obj, member); } #endif template -class _FunctionResultCallback_0_3 : public ResultCallback3 { +class _TessFunctionResultCallback_0_3 : public TessResultCallback3 { public: - typedef ResultCallback3 base; + typedef TessResultCallback3 base; typedef R (*FunctionSignature)(A1,A2,A3); private: FunctionSignature function_; public: - inline _FunctionResultCallback_0_3( + inline _TessFunctionResultCallback_0_3( FunctionSignature function) : function_(function) { } @@ -964,17 +966,17 @@ class _FunctionResultCallback_0_3 : public ResultCallback3 { }; template -class _FunctionResultCallback_0_3 - : public Callback3 { +class _TessFunctionResultCallback_0_3 + : public TessCallback3 { public: - typedef Callback3 base; + typedef TessCallback3 base; typedef void (*FunctionSignature)(A1,A2,A3); private: FunctionSignature function_; public: - inline _FunctionResultCallback_0_3( + inline _TessFunctionResultCallback_0_3( FunctionSignature function) : function_(function) { } @@ -992,15 +994,15 @@ class _FunctionResultCallback_0_3 }; template -inline typename _FunctionResultCallback_0_3::base* -NewCallback(R (*function)(A1,A2,A3)) { - return new _FunctionResultCallback_0_3(function); +inline typename _TessFunctionResultCallback_0_3::base* +NewTessCallback(R (*function)(A1,A2,A3)) { + return new _TessFunctionResultCallback_0_3(function); } template -inline typename _FunctionResultCallback_0_3::base* -NewPermanentCallback(R (*function)(A1,A2,A3)) { - return new _FunctionResultCallback_0_3(function); +inline typename _TessFunctionResultCallback_0_3::base* +NewPermanentTessCallback(R (*function)(A1,A2,A3)) { + return new _TessFunctionResultCallback_0_3(function); } -#endif /* _CALLBACK_SPECIALIZATIONS_H */ +#endif /* _TESS_CALLBACK_SPECIALIZATIONS_H */ diff --git a/ccutil/tessclas.h b/ccutil/tessclas.h deleted file mode 100644 index 2475fd043e..0000000000 --- a/ccutil/tessclas.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef TESSCLAS_H -#define TESSCLAS_H 1 - -#define SPLINESIZE 23 /*max spline parts to a line */ - -#define TBLOBFLAGS 4 /*No of flags in a blob */ -#define MAX_WO_CLASSES 3 -#define EDGEPTFLAGS 4 /*concavity,length etc. */ - -typedef struct -{ - double a; /*x squared */ - double b; /*x */ - double c; /*constant */ -} QUAD_SPEC; /*definiton of quadratic */ - -typedef struct -{ - int segments; /*no of spline segments */ - int xstarts[SPLINESIZE]; /*start x coords */ - QUAD_SPEC quads[SPLINESIZE]; /*quadratic sections */ -} SPLINE_SPEC; /*quadratic spline */ - -typedef struct -{ - short x; /*absolute x coord */ - short y; /*absolute y coord */ -} TPOINT; -typedef TPOINT VECTOR; /*structure for coordinates */ - -typedef struct -{ - char dx; /*compact vectors */ - char dy; -} BYTEVEC; - -typedef struct edgeptstruct -{ - TPOINT pos; /*position */ - VECTOR vec; /*vector to next point */ - char flags[EDGEPTFLAGS]; /*concavity, length etc */ - struct edgeptstruct *next; /*anticlockwise element */ - struct edgeptstruct *prev; /*clockwise element */ -} EDGEPT; /*point on expanded outline */ - -typedef struct blobstruct -{ - struct olinestruct *outlines; /*list of outlines in blob */ - char flags[TBLOBFLAGS]; /*blob flags */ - char correct; /*correct text */ - char guess; /*best guess */ - /*quickie classification */ - unsigned char classes[MAX_WO_CLASSES]; - /*quickie ratings */ - unsigned char values[MAX_WO_CLASSES]; - struct blobstruct *next; /*next blob in block */ -} TBLOB; /*blob structure */ - -typedef struct olinestruct -{ - TPOINT topleft; /*top left of loop */ - TPOINT botright; /*bottom right of loop */ - TPOINT start; /*start of loop */ - BYTEVEC *compactloop; /*ptr to compacted loop */ - EDGEPT *loop; /*edgeloop */ - void *node; /*1st node on outline */ - struct olinestruct *next; /*next at this level */ - struct olinestruct *child; /*inner outline */ -} TESSLINE; /*outline structure */ - -typedef struct wordstruct -{ - struct textrowstruct *row; /*row it came from */ - char *correct; /*correct word string */ - char *guess; /*guess word string */ - TBLOB *blobs; /*blobs in word */ - int blanks; /*blanks before word */ - int blobcount; /*no of blobs in word */ - struct wordstruct *next; /*next word */ -} TWERD; /*word structure */ - -typedef struct textrowstruct -{ - int blobcount; /** count of blobs in row. **/ - TBLOB *blobs; /*list of blobs in row */ - TWERD *words; /*list of words in row */ - int mean_y; /** y coordinate of centre of row **/ - int max_y; /** y coordinate of top of row **/ - int min_y; /** y coordinate of bottom of row **/ - SPLINE_SPEC xheight; /*top of row */ - SPLINE_SPEC baseline; /*bottom of row */ - float descdrop; /*descender drop */ - float ascrise; /*ascender rise */ - float lineheight; /*average xheight-baseline */ - int kerning; /*kerning of row */ - int space; /*spacing of row */ - float space_threshold; /*Bayesian space limit */ - int p_spaced; /*proportinal flag */ - int b_space; /*block spacing */ - int b_kern; /*block kerning */ - struct textrowstruct *next; /*next row in block */ -} TEXTROW; - -typedef struct blockstruct /** list of coordinates **/ -{ - TBLOB *blobs; /*blobs in block */ - TEXTROW *rows; /*rows in block */ - int blobcount; /*no of blobs */ - short xmin; - short xmax; - short ymin; - short ymax; - char type; /** block type **/ - char p_spaced; /** flag to show propertianal spacing **/ - short rowcount; /** number of rows **/ - short leading; /** space between rows **/ - short kerning; /** space between characters **/ - short space; /** distance between char centres **/ - short minwidth; /*min width of char in block */ - short p_size; /** point size of text **/ - short l_margin; /** posn of left margin **/ - short italic; /** flag to show italic block **/ - short spurious; /** percentage of spurious characters **/ - struct blockstruct *next; /*next text block */ -} TEXTBLOCK; /*block from image */ - -/********************************************************************** - * iterate_blobs - * - * Visit all the words in a list using a local variable. - **********************************************************************/ - -#define iterate_blobs(blob,blobs) \ -for (blob = blobs; blob != NULL; blob = blob->next) -#endif diff --git a/ccutil/tessdatamanager.cpp b/ccutil/tessdatamanager.cpp index b4b2cd0ac2..04503f5a25 100644 --- a/ccutil/tessdatamanager.cpp +++ b/ccutil/tessdatamanager.cpp @@ -28,23 +28,16 @@ #include "serialis.h" #include "strngs.h" #include "tprintf.h" -#include "varable.h" - -BOOL_VAR(global_load_system_dawg, true, "Load system word dawg."); -BOOL_VAR(global_load_freq_dawg, true, "Load frequent word dawg."); -BOOL_VAR(global_load_punc_dawg, true, "Load dawg with punctuation patterns."); -BOOL_VAR(global_load_number_dawg, true, "Load dawg with number patterns."); - -INT_VAR(global_tessdata_manager_debug_level, 0, - "Debug level for TessdataManager functions."); +#include "params.h" namespace tesseract { -void TessdataManager::Init(const char *data_file_name) { +void TessdataManager::Init(const char *data_file_name, int debug_level) { int i; + debug_level_ = debug_level; data_file_ = fopen(data_file_name, "rb"); if (data_file_ == NULL) { - tprintf("Error openning data file %s\n", data_file_name); + tprintf("Error opening data file %s\n", data_file_name); exit(1); } fread(&actual_tessdata_num_entries_, sizeof(inT32), 1, data_file_); @@ -60,7 +53,7 @@ void TessdataManager::Init(const char *data_file_name) { offset_table_[i] = reverse64(offset_table_[i]); } } - if (global_tessdata_manager_debug_level) { + if (debug_level_) { tprintf("TessdataManager loaded %d types of tesseract data files.\n", actual_tessdata_num_entries_); for (i = 0; i < actual_tessdata_num_entries_; ++i) { @@ -124,8 +117,8 @@ bool TessdataManager::CombineDataFiles( fseek(output_file, sizeof(inT32) + sizeof(inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET); - TessdataType type; - bool text_file; + TessdataType type = TESSDATA_NUM_ENTRIES; + bool text_file = false; FILE *file_ptr[TESSDATA_NUM_ENTRIES]; // Load individual tessdata components from files. @@ -167,8 +160,8 @@ bool TessdataManager::OverwriteComponents( int num_new_components) { int i; inT64 offset_table[TESSDATA_NUM_ENTRIES]; - TessdataType type; - bool text_file; + TessdataType type = TESSDATA_NUM_ENTRIES; + bool text_file = false; FILE *file_ptr[TESSDATA_NUM_ENTRIES]; for (i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { offset_table[i] = -1; @@ -235,8 +228,8 @@ bool TessdataManager::TessdataTypeFromFileName( } bool TessdataManager::ExtractToFile(const char *filename) { - TessdataType type; - bool text_file; + TessdataType type = TESSDATA_NUM_ENTRIES; + bool text_file = false; ASSERT_HOST(tesseract::TessdataManager::TessdataTypeFromFileName( filename, &type, &text_file)); if (!SeekToStart(type)) return false; diff --git a/ccutil/tessdatamanager.h b/ccutil/tessdatamanager.h index 51ed60d84d..860e63cc4a 100644 --- a/ccutil/tessdatamanager.h +++ b/ccutil/tessdatamanager.h @@ -23,17 +23,6 @@ #include #include "host.h" #include "tprintf.h" -#include "varable.h" - -extern BOOL_VAR_H(global_load_punc_dawg, true, - "Load dawg with punctuation patterns."); -extern BOOL_VAR_H(global_load_system_dawg, true, "Load system word dawg."); -extern BOOL_VAR_H(global_load_number_dawg, true, - "Load dawg with number patterns."); -extern BOOL_VAR_H(global_load_freq_dawg, true, "Load frequent word dawg."); - -extern INT_VAR_H(global_tessdata_manager_debug_level, 0, - "Debug level for TessdataManager functions."); static const char kTrainedDataSuffix[] = "traineddata"; @@ -49,20 +38,26 @@ static const char kPuncDawgFileSuffix[] = "punc-dawg"; static const char kSystemDawgFileSuffix[] = "word-dawg"; static const char kNumberDawgFileSuffix[] = "number-dawg"; static const char kFreqDawgFileSuffix[] = "freq-dawg"; +static const char kFixedLengthDawgsFileSuffix[] = "fixed-length-dawgs"; +static const char kCubeUnicharsetFileSuffix[] = "cube-unicharset"; +static const char kCubeSystemDawgFileSuffix[] = "cube-word-dawg"; namespace tesseract { enum TessdataType { - TESSDATA_LANG_CONFIG, // 0 - TESSDATA_UNICHARSET, // 1 - TESSDATA_AMBIGS, // 2 - TESSDATA_INTTEMP, // 3 - TESSDATA_PFFMTABLE, // 4 - TESSDATA_NORMPROTO, // 5 - TESSDATA_PUNC_DAWG, // 6 - TESSDATA_SYSTEM_DAWG, // 7 - TESSDATA_NUMBER_DAWG, // 8 - TESSDATA_FREQ_DAWG, // 9 + TESSDATA_LANG_CONFIG, // 0 + TESSDATA_UNICHARSET, // 1 + TESSDATA_AMBIGS, // 2 + TESSDATA_INTTEMP, // 3 + TESSDATA_PFFMTABLE, // 4 + TESSDATA_NORMPROTO, // 5 + TESSDATA_PUNC_DAWG, // 6 + TESSDATA_SYSTEM_DAWG, // 7 + TESSDATA_NUMBER_DAWG, // 8 + TESSDATA_FREQ_DAWG, // 9 + TESSDATA_FIXED_LENGTH_DAWGS, // 10 + TESSDATA_CUBE_UNICHARSET, // 11 + TESSDATA_CUBE_SYSTEM_DAWG, // 12 TESSDATA_NUM_ENTRIES }; @@ -82,6 +77,9 @@ static const char * const kTessdataFileSuffixes[] = { kSystemDawgFileSuffix, // 7 kNumberDawgFileSuffix, // 8 kFreqDawgFileSuffix, // 9 + kFixedLengthDawgsFileSuffix, // 10 + kCubeUnicharsetFileSuffix, // 11 + kCubeSystemDawgFileSuffix, // 12 }; /** @@ -99,6 +97,9 @@ static const bool kTessdataFileIsText[] = { false, // 7 false, // 8 false, // 9 + false, // 10 + true, // 11 + false, // 12 }; /** @@ -121,9 +122,10 @@ class TessdataManager { } } ~TessdataManager() {} + int DebugLevel() { return debug_level_; } /** Opens the given data file and reads the offset table. */ - void Init(const char *data_file_name); + void Init(const char *data_file_name, int debug_level); /** Returns data file pointer. */ inline FILE *GetDataFilePtr() const { return data_file_; } @@ -134,9 +136,10 @@ class TessdataManager { * at the start of the data of the given type. */ inline bool SeekToStart(TessdataType tessdata_type) { - if (global_tessdata_manager_debug_level) { - tprintf("TessdataManager: seek to offset %lld (start of tessdata" - "type %d)\n", offset_table_[tessdata_type], tessdata_type); + if (debug_level_) { + tprintf("TessdataManager: seek to offset %lld - start of tessdata" + "type %d (%s))\n", offset_table_[tessdata_type], + tessdata_type, kTessdataFileSuffixes[tessdata_type]); } if (offset_table_[tessdata_type] < 0) { return false; @@ -153,7 +156,7 @@ class TessdataManager { while (index < actual_tessdata_num_entries_ && offset_table_[index] == -1) { ++index; // skip tessdata types not present in the combined file } - if (global_tessdata_manager_debug_level) { + if (debug_level_) { tprintf("TessdataManager: end offset for type %d is %lld\n", tessdata_type, (index == actual_tessdata_num_entries_) ? -1 @@ -230,6 +233,14 @@ class TessdataManager { bool *text_file); private: + + /** + * Opens the file whose name is a concatenation of language_data_path_prefix + * and file_suffix. Returns a file pointer to the opened file. + */ + static FILE *GetFilePtr(const char *language_data_path_prefix, + const char *file_suffix, bool text_file); + /** * Each offset_table_[i] contains a file offset in the combined data file * where the data of TessdataFileType i is stored. @@ -245,6 +256,7 @@ class TessdataManager { */ inT32 actual_tessdata_num_entries_; FILE *data_file_; ///< pointer to the data file. + int debug_level_; }; diff --git a/ccutil/tordvars.cpp b/ccutil/tordvars.cpp deleted file mode 100644 index 5bcbde6e73..0000000000 --- a/ccutil/tordvars.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: tordvars.cpp - * Description: Text Ordering Control Variables - * Author: Mark Seaman, OCR Technology - * Created: Wed Jan 17 12:47:29 1990 - * Modified: Tue Jul 30 16:22:40 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "mfcpch.h" - -#include - -#include "varable.h" - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -FILE *rawfile; /* Text before dictionary */ -FILE *textfile; /* Text output file */ -FILE *matcher_fp; //matcher log -FILE *correct_fp; //correct text - -BOOL_VAR(tord_write_output, 0, "Text file output"); - -BOOL_VAR(tord_write_raw_output, 0, "Text before context"); - -BOOL_VAR(tord_similarity_enable, 0, "Switch for Similarity"); - -double_VAR(tord_certainty_threshold, -2.25, "Certainty Value"); - -INT_VAR(tord_num_word_choices, 30, "Number of choices"); - -BOOL_VAR(tord_blob_skip, 0, "Skip to Next selection"); - -double_VAR(tord_overlap_threshold, 0.33, "Overlap Threshold"); - -BOOL_VAR(tord_debug_3, 0, "Textord Debug #3"); - -BOOL_VAR(tord_debug_5, 0, "Textord Debug #5"); - -BOOL_VAR(tord_debug_8, 0, "Textord Debug #8"); - -INT_VAR(tord_display_ratings, 0, "Ratings display"); - -BOOL_VAR(tord_display_text, 0, "Display Text"); - -BOOL_VAR(tord_show_bold, 1, "Show Bold Text"); diff --git a/ccutil/tordvars.h b/ccutil/tordvars.h deleted file mode 100644 index e6c93d366e..0000000000 --- a/ccutil/tordvars.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: tordvars.h - * Description: Text Ordering Control Variables - * Author: Mark Seaman, OCR Technology - * Created: Wed Oct 25 16:33:01 1989 - * Modified: Mon Jul 1 14:28:23 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef TOVARS_H -#define TOVARS_H - -#include - -#include "varable.h" - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -extern FILE *rawfile; /* Text before dictionary */ -extern FILE *textfile; /* Text output file */ -extern FILE *correct_fp; //correct text -extern FILE *matcher_fp; - -extern BOOL_VAR_H(tord_write_output, 0, "Text file output"); - -extern BOOL_VAR_H(tord_write_raw_output, 0, "Text before context"); - -extern BOOL_VAR_H(tord_similarity_enable, 0, "Switch for Similarity"); - -extern double_VAR_H(tord_certainty_threshold, -2.25, "Certainty Value"); - -extern INT_VAR_H(tord_num_word_choices, 30, "Number of choices"); - -extern BOOL_VAR_H(tord_blob_skip, 0, "Skip to Next selection"); - -extern double_VAR_H(tord_overlap_threshold, 0.33, "Overlap Threshold"); - -extern BOOL_VAR_H(tord_debug_3, 0, "Textord Debug #3"); - -extern BOOL_VAR_H(tord_debug_5, 0, "Textord Debug #5"); - -extern BOOL_VAR_H(tord_debug_8, 0, "Textord Debug #8"); - -extern INT_VAR_H(tord_display_ratings, 0, "Ratings display"); - -extern BOOL_VAR_H(tord_display_text, 0, "Display Text"); - -extern BOOL_VAR_H(tord_show_bold, 1, "Show Bold Text"); - -#endif diff --git a/ccutil/tprintf.cpp b/ccutil/tprintf.cpp index f5c129482b..21973ae550 100644 --- a/ccutil/tprintf.cpp +++ b/ccutil/tprintf.cpp @@ -19,23 +19,24 @@ #include "mfcpch.h" //precompiled headers +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + #include #include #include "strngs.h" -#include "varable.h" +#include "params.h" #include "debugwin.h" //#include "ipeerr.h" #include "tprintf.h" #include "ccutil.h" -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - #define MAX_MSG_LEN 1024 #define EXTERN +// Since tprintf is protected by a mutex, these parameters can rmain global. DLLSYM STRING_VAR (debug_file, "", "File to send tprintf output to"); DLLSYM BOOL_VAR (debug_window_on, FALSE, "Send tprintf to window unless file set"); diff --git a/ccutil/tprintf.h b/ccutil/tprintf.h index 81a9783c31..3a6e4fd848 100644 --- a/ccutil/tprintf.h +++ b/ccutil/tprintf.h @@ -20,7 +20,7 @@ #ifndef TPRINTF_H #define TPRINTF_H -#include "varable.h" +#include "params.h" extern DLLSYM STRING_VAR_H (debug_file, "", "File to send tprintf output to"); extern DLLSYM BOOL_VAR_H (debug_window_on, TRUE, diff --git a/ccutil/unicharset.cpp b/ccutil/unicharset.cpp index 86275767f0..5ec37ed042 100644 --- a/ccutil/unicharset.cpp +++ b/ccutil/unicharset.cpp @@ -25,38 +25,57 @@ #include "tprintf.h" #include "unichar.h" #include "unicharset.h" -#include "varable.h" +#include "params.h" static const int ISALPHA_MASK = 0x1; static const int ISLOWER_MASK = 0x2; static const int ISUPPER_MASK = 0x4; static const int ISDIGIT_MASK = 0x8; static const int ISPUNCTUATION_MASK = 0x10; +// Y coordinate threshold for determining cap-height vs x-height. +// TODO(rays) Bring the global definition down to the ccutil library level, +// so this constant is relative to some other constants. +static const int kMeanlineThreshold = 220; +// Let C be the number of alpha chars for which all tops exceed +// kMeanlineThreshold, and X the number of alpha chars for which all tops +// are below kMeanlineThreshold, then if X > C * kMinXHeightFraction or +// more than half the alpha characters have upper or lower case, then +// the unicharset "has x-height". +const double kMinXHeightFraction = 0.25; + +UNICHARSET::UNICHAR_PROPERTIES::UNICHAR_PROPERTIES() { + Init(); +} +void UNICHARSET::UNICHAR_PROPERTIES::Init() { + isalpha = false; + islower = false; + isupper = false; + isdigit = false; + ispunctuation = false; + isngram = false; + enabled = false; + min_bottom = 0; + max_bottom = MAX_UINT8; + min_top = 0; + max_top = MAX_UINT8; + script_id = 0; + other_case = 0; + fragment = NULL; +} UNICHARSET::UNICHARSET() : unichars(NULL), ids(), size_used(0), size_reserved(0), - script_table(0), + script_table(NULL), script_table_size_used(0), - script_table_size_reserved(0), - null_script("NULL"), - null_sid_(0), - common_sid_(0), - latin_sid_(0), - cyrillic_sid_(0), - greek_sid_(0), - han_sid_(0) {} + null_script("NULL") { + clear(); +} UNICHARSET::~UNICHARSET() { - if (size_reserved > 0) { - for (int i = 0; i < script_table_size_used; ++i) - delete[] script_table[i]; - delete[] script_table; - delete_pointers_in_unichars(); - delete[] unichars; - } + clear(); } void UNICHARSET::reserve(int unichars_number) { @@ -66,7 +85,6 @@ void UNICHARSET::reserve(int unichars_number) { memcpy(&unichars_new[i], &unichars[i], sizeof(UNICHAR_SLOT)); for (int j = size_used; j < unichars_number; ++j) { unichars_new[j].properties.script_id = add_script(null_script); - unichars_new[j].properties.fragment = NULL; } delete[] unichars; unichars = unichars_new; @@ -146,6 +164,7 @@ STRING UNICHARSET::debug_utf8_str(const char* str) { // Return a STRING containing debug information on the unichar, including // the id_to_unichar, its hex unicodes and the properties. STRING UNICHARSET::debug_str(UNICHAR_ID id) const { + if (id == INVALID_UNICHAR_ID) return STRING(id_to_unichar(id)); const CHAR_FRAGMENT *fragment = this->get_fragment(id); if (fragment) { STRING base = debug_str(fragment->get_unichar()); @@ -153,7 +172,6 @@ STRING UNICHARSET::debug_str(UNICHAR_ID id) const { fragment->get_total()); } const char* str = id_to_unichar(id); - if (id == INVALID_UNICHAR_ID) return STRING(str); STRING result = debug_utf8_str(str); // Append a for lower alpha, A for upper alpha, and x if alpha but neither. if (get_isalpha(id)) { @@ -175,7 +193,29 @@ STRING UNICHARSET::debug_str(UNICHAR_ID id) const { return result; } +unsigned int UNICHARSET::get_properties(UNICHAR_ID id) const { + unsigned int properties = 0; + if (this->get_isalpha(id)) + properties |= ISALPHA_MASK; + if (this->get_islower(id)) + properties |= ISLOWER_MASK; + if (this->get_isupper(id)) + properties |= ISUPPER_MASK; + if (this->get_isdigit(id)) + properties |= ISDIGIT_MASK; + if (this->get_ispunctuation(id)) + properties |= ISPUNCTUATION_MASK; + return properties; +} +char UNICHARSET::get_chartype(UNICHAR_ID id) const { + if (this->get_isupper(id)) return 'A'; + if (this->get_islower(id)) return 'a'; + if (this->get_isalpha(id)) return 'x'; + if (this->get_isdigit(id)) return '0'; + if (this->get_ispunctuation(id)) return 'p'; + return 0; +} void UNICHARSET::unichar_insert(const char* const unichar_repr) { if (!ids.contains(unichar_repr)) { @@ -192,12 +232,6 @@ void UNICHARSET::unichar_insert(const char* const unichar_repr) { } strcpy(unichars[size_used].representation, unichar_repr); - this->set_isalpha(size_used, false); - this->set_islower(size_used, false); - this->set_isupper(size_used, false); - this->set_isdigit(size_used, false); - this->set_ispunctuation(size_used, false); - this->set_isngram(size_used, false); this->set_script(size_used, null_script); // If the given unichar_repr represents a fragmented character, set // fragment property to a pointer to CHAR_FRAGMENT class instance with @@ -235,27 +269,19 @@ bool UNICHARSET::eq(UNICHAR_ID unichar_id, bool UNICHARSET::save_to_file(FILE *file) const { fprintf(file, "%d\n", this->size()); for (UNICHAR_ID id = 0; id < this->size(); ++id) { - unsigned int properties = 0; - - if (this->get_isalpha(id)) - properties |= ISALPHA_MASK; - if (this->get_islower(id)) - properties |= ISLOWER_MASK; - if (this->get_isupper(id)) - properties |= ISUPPER_MASK; - if (this->get_isdigit(id)) - properties |= ISDIGIT_MASK; - if (this->get_ispunctuation(id)) - properties |= ISPUNCTUATION_MASK; - + int min_bottom, max_bottom, min_top, max_top; + get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top); + unsigned int properties = this->get_properties(id); if (strcmp(this->id_to_unichar(id), " ") == 0) fprintf(file, "%s %x %s %d\n", "NULL", properties, this->get_script_from_script_id(this->get_script(id)), this->get_other_case(id)); else - fprintf(file, "%s %x %s %d\n", this->id_to_unichar(id), properties, + fprintf(file, "%s %x %d,%d,%d,%d %s %d\t# %s\n", + this->id_to_unichar(id), properties, + min_bottom, max_bottom, min_top, max_top, this->get_script_from_script_id(this->get_script(id)), - this->get_other_case(id)); + this->get_other_case(id), this->debug_str(id).string()); } return true; } @@ -277,8 +303,15 @@ bool UNICHARSET::load_from_file(FILE *file) { strcpy(script, null_script); this->unichars[id].properties.other_case = id; + int min_bottom = 0; + int max_bottom = MAX_UINT8; + int min_top = 0; + int max_top = MAX_UINT8; if (fgets(buffer, sizeof (buffer), file) == NULL || - (sscanf(buffer, "%s %x %63s %d", unichar, &properties, + (sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d", unichar, &properties, + &min_bottom, &max_bottom, &min_top, &max_top, + script, &(this->unichars[id].properties.other_case)) != 8 && + sscanf(buffer, "%s %x %63s %d", unichar, &properties, script, &(this->unichars[id].properties.other_case)) != 4 && sscanf(buffer, "%s %x %63s", unichar, &properties, script) != 3 && sscanf(buffer, "%s %x", unichar, &properties) != 2)) { @@ -289,15 +322,52 @@ bool UNICHARSET::load_from_file(FILE *file) { else this->unichar_insert(unichar); - this->set_isalpha(id, (properties & ISALPHA_MASK) != 0); - this->set_islower(id, (properties & ISLOWER_MASK) != 0); - this->set_isupper(id, (properties & ISUPPER_MASK) != 0); - this->set_isdigit(id, (properties & ISDIGIT_MASK) != 0); - this->set_ispunctuation(id, (properties & ISPUNCTUATION_MASK) != 0); + this->set_isalpha(id, properties & ISALPHA_MASK); + this->set_islower(id, properties & ISLOWER_MASK); + this->set_isupper(id, properties & ISUPPER_MASK); + this->set_isdigit(id, properties & ISDIGIT_MASK); + this->set_ispunctuation(id, properties & ISPUNCTUATION_MASK); this->set_isngram(id, false); this->set_script(id, script); this->unichars[id].properties.enabled = true; + this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top); } + post_load_setup(); + return true; +} + +// Sets up internal data after loading the file, based on the char +// properties. Called from load_from_file, but also needs to be run +// during set_unicharset_properties. +void UNICHARSET::post_load_setup() { + // Number of alpha chars with the case property minus those without, + // in order to determine that half the alpha chars have case. + int net_case_alphas = 0; + int x_height_alphas = 0; + int cap_height_alphas = 0; + top_bottom_set_ = false; + for (UNICHAR_ID id = 0; id < size_used; ++id) { + int min_bottom = 0; + int max_bottom = MAX_UINT8; + int min_top = 0; + int max_top = MAX_UINT8; + get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top); + if (min_top > 0) + top_bottom_set_ = true; + if (get_isalpha(id)) { + if (get_islower(id) || get_isupper(id)) + ++net_case_alphas; + else + --net_case_alphas; + if (min_top < kMeanlineThreshold && max_top < kMeanlineThreshold) + ++x_height_alphas; + else if (min_top > kMeanlineThreshold && max_top > kMeanlineThreshold) + ++cap_height_alphas; + } + } + script_has_upper_lower_ = net_case_alphas > 0; + script_has_xheight_ = script_has_upper_lower_ || + x_height_alphas > cap_height_alphas * kMinXHeightFraction; null_sid_ = get_script_id_from_name(null_script); ASSERT_HOST(null_sid_ == 0); @@ -306,7 +376,31 @@ bool UNICHARSET::load_from_file(FILE *file) { cyrillic_sid_ = get_script_id_from_name("Cyrillic"); greek_sid_ = get_script_id_from_name("Greek"); han_sid_ = get_script_id_from_name("Han"); - return true; + hiragana_sid_ = get_script_id_from_name("Hiragana"); + katakana_sid_ = get_script_id_from_name("Katakana"); + + // Compute default script. + int* script_counts = new int[script_table_size_used]; + memset(script_counts, 0, sizeof(*script_counts) * script_table_size_used); + for (int id = 0; id < size_used; ++id) + ++script_counts[get_script(id)]; + default_sid_ = 0; + for (int s = 1; s < script_table_size_used; ++s) { + if (script_counts[s] > script_counts[default_sid_] && s != common_sid_) + default_sid_ = s; + } + delete [] script_counts; +} + +// Returns true if any script entry in the unicharset is for a +// right_to_left language. +bool UNICHARSET::any_right_to_left() const { + for (int id = 0; id < script_table_size_used; ++id) { + if (strcmp(script_table[id], "Arabic") == 0 || + strcmp(script_table[id], "Hebrew") == 0) + return true; + } + return false; } // Set a whitelist and/or blacklist of characters to recognize. @@ -325,7 +419,9 @@ void UNICHARSET::set_black_and_whitelist(const char* blacklist, ch_step = step(whitelist + w_ind); if (ch_step > 0) { UNICHAR_ID u_id = unichar_to_id(whitelist + w_ind, ch_step); - unichars[u_id].properties.enabled = true; + if (u_id != INVALID_UNICHAR_ID) { + unichars[u_id].properties.enabled = true; + } } else { ch_step = 1; } @@ -337,7 +433,9 @@ void UNICHARSET::set_black_and_whitelist(const char* blacklist, ch_step = step(blacklist + b_ind); if (ch_step > 0) { UNICHAR_ID u_id = unichar_to_id(blacklist + b_ind, ch_step); - unichars[u_id].properties.enabled = false; + if (u_id != INVALID_UNICHAR_ID) { + unichars[u_id].properties.enabled = false; + } } else { ch_step = 1; } diff --git a/ccutil/unicharset.h b/ccutil/unicharset.h index 66800580c7..28cb56a64f 100644 --- a/ccutil/unicharset.h +++ b/ccutil/unicharset.h @@ -24,7 +24,7 @@ #include "strngs.h" #include "unichar.h" #include "unicharmap.h" -#include "varable.h" +#include "params.h" class CHAR_FRAGMENT { public: @@ -56,6 +56,7 @@ class CHAR_FRAGMENT { // Returns the string that represents a fragment // with the given unichar, pos and total. static STRING to_string(const char *unichar, int pos, int total) { + if (total == 1) return STRING(unichar); STRING result = ""; result += kSeparator; result += unichar; @@ -185,20 +186,33 @@ class UNICHARSET { // Clear the UNICHARSET (all the previous data is lost). void clear() { - if (size_reserved > 0) { + if (script_table != NULL) { for (int i = 0; i < script_table_size_used; ++i) delete[] script_table[i]; delete[] script_table; - script_table = 0; - script_table_size_reserved = 0; + script_table = NULL; script_table_size_used = 0; + } + if (unichars != NULL) { delete_pointers_in_unichars(); delete[] unichars; - unichars = 0; - size_reserved = 0; - size_used = 0; + unichars = NULL; } + script_table_size_reserved = 0; + size_reserved = 0; + size_used = 0; ids.clear(); + top_bottom_set_ = false; + script_has_upper_lower_ = false; + script_has_xheight_ = false; + null_sid_ = 0; + common_sid_ = 0; + latin_sid_ = 0; + cyrillic_sid_ = 0; + greek_sid_ = 0; + han_sid_ = 0; + hiragana_sid_ = 0; + katakana_sid_ = 0; } // Return the size of the set (the number of different UNICHAR it holds). @@ -238,6 +252,15 @@ class UNICHARSET { // Returns true if the operation is successful. bool load_from_file(FILE *file); + // Sets up internal data after loading the file, based on the char + // properties. Called from load_from_file, but also needs to be run + // during set_unicharset_properties. + void post_load_setup(); + + // Returns true if any script entry in the unicharset is for a + // right_to_left language. + bool any_right_to_left() const; + // Set a whitelist and/or blacklist of characters to recognize. // An empty or NULL whitelist enables everything (minus any blacklist). // An empty or NULL blacklist disables nothing. @@ -318,6 +341,35 @@ class UNICHARSET { return unichars[unichar_id].properties.isngram; } + // Returns true if the ids have useful min/max top/bottom values. + bool top_bottom_useful() const { + return top_bottom_set_; + } + // Returns the min and max bottom and top of the given unichar in + // baseline-normalized coordinates, ie, where the baseline is + // kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight + // (See polyblob.h for the definitions). + void get_top_bottom(UNICHAR_ID unichar_id, + int* min_bottom, int* max_bottom, + int* min_top, int* max_top) const { + *min_bottom = unichars[unichar_id].properties.min_bottom; + *max_bottom = unichars[unichar_id].properties.max_bottom; + *min_top = unichars[unichar_id].properties.min_top; + *max_top = unichars[unichar_id].properties.max_top; + } + void set_top_bottom(UNICHAR_ID unichar_id, + int min_bottom, int max_bottom, + int min_top, int max_top) { + unichars[unichar_id].properties.min_bottom = + static_cast(ClipToRange(min_bottom, 0, MAX_UINT8)); + unichars[unichar_id].properties.max_bottom = + static_cast(ClipToRange(max_bottom, 0, MAX_UINT8)); + unichars[unichar_id].properties.min_top = + static_cast(ClipToRange(min_top, 0, MAX_UINT8)); + unichars[unichar_id].properties.max_top = + static_cast(ClipToRange(max_top, 0, MAX_UINT8)); + } + // Return the script name of the given unichar. // The returned pointer will always be the same for the same script, it's // managed by unicharset and thus MUST NOT be deleted @@ -325,6 +377,19 @@ class UNICHARSET { return unichars[unichar_id].properties.script_id; } + // Return the character properties, eg. alpha/upper/lower/digit/punct, + // as a bit field of unsigned int. + unsigned int get_properties(UNICHAR_ID unichar_id) const; + + // Return the character property as a single char. If a character has + // multiple attributes, the main property is defined by the following order: + // upper_case : 'A' + // lower_case : 'a' + // alpha : 'x' + // digit : '0' + // punctuation: 'p' + char get_chartype(UNICHAR_ID unichar_id) const; + // Get other_case unichar id in the properties for the given unichar id. UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const { return unichars[unichar_id].properties.other_case; @@ -373,6 +438,16 @@ class UNICHARSET { return get_ispunctuation(unichar_to_id(unichar_repr)); } + // Return the character properties, eg. alpha/upper/lower/digit/punct, + // of the given unichar representation + unsigned int get_properties(const char* const unichar_repr) const { + return get_properties(unichar_to_id(unichar_repr)); + } + + char get_chartype(const char* const unichar_repr) const { + return get_chartype(unichar_to_id(unichar_repr)); + } + // Return the script name of the given unichar representation. // The returned pointer will always be the same for the same script, it's // managed by unicharset and thus MUST NOT be deleted @@ -475,10 +550,28 @@ class UNICHARSET { int cyrillic_sid() const { return cyrillic_sid_; } int greek_sid() const { return greek_sid_; } int han_sid() const { return han_sid_; } + int hiragana_sid() const { return hiragana_sid_; } + int katakana_sid() const { return katakana_sid_; } + int default_sid() const { return default_sid_; } + + // Returns true if the unicharset has the concept of upper/lower case. + bool script_has_upper_lower() const { + return script_has_upper_lower_; + } + // Returns true if the unicharset has the concept of x-height. + // script_has_xheight can be true even if script_has_upper_lower is not, + // when the script has a sufficiently predominant top line with ascenders, + // such as Devanagari and Thai. + bool script_has_xheight() const { + return script_has_xheight_; + } private: struct UNICHAR_PROPERTIES { + UNICHAR_PROPERTIES(); + void Init(); + bool isalpha; bool islower; bool isupper; @@ -486,6 +579,14 @@ class UNICHARSET { bool ispunctuation; bool isngram; bool enabled; + // Possible limits of the top and bottom of the bounding box in + // baseline-normalized coordinates, ie, where the baseline is + // kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight + // (See polyblob.h for the definitions). + uinT8 min_bottom; + uinT8 max_bottom; + uinT8 min_top; + uinT8 max_top; int script_id; UNICHAR_ID other_case; // id of the corresponding upper/lower case unichar @@ -509,6 +610,13 @@ class UNICHARSET { int script_table_size_used; int script_table_size_reserved; const char* null_script; + // True if the unichars have their tops/bottoms set. + bool top_bottom_set_; + // True if the unicharset has significant upper/lower case chars. + bool script_has_upper_lower_; + // True if the unicharset has a significant mean-line with significant + // ascenders above that. + bool script_has_xheight_; // A few convenient script name-to-id mapping without using hash. // These are initialized when unicharset file is loaded. Anything @@ -519,6 +627,10 @@ class UNICHARSET { int cyrillic_sid_; int greek_sid_; int han_sid_; + int hiragana_sid_; + int katakana_sid_; + // The most frequently occurring script in the charset. + int default_sid_; }; #endif // TESSERACT_CCUTIL_UNICHARSET_H__ diff --git a/ccutil/unicity_table.h b/ccutil/unicity_table.h index 9bfe8e5d8d..c6273b5d7f 100644 --- a/ccutil/unicity_table.h +++ b/ccutil/unicity_table.h @@ -20,7 +20,7 @@ #ifndef TESSERACT_CCUTIL_UNICITY_TABLE_H_ #define TESSERACT_CCUTIL_UNICITY_TABLE_H_ -#include "callback.h" +#include "tesscallback.h" #include "errcode.h" #include "genericvector.h" @@ -62,11 +62,11 @@ class UnicityTable { /// Add a callback to be called to delete the elements when the table took /// their ownership. - void set_clear_callback(Callback1* cb); + void set_clear_callback(TessCallback1* cb); /// Add a callback to be called to compare the elements when needed (contains, /// get_id, ...) - void set_compare_callback(ResultCallback2* cb); + void set_compare_callback(TessResultCallback2* cb); /// Clear the table, calling the callback function if any. /// All the owned Callbacks are also deleted. @@ -82,14 +82,14 @@ class UnicityTable { /// The Callback given must be permanent since they will be called more than /// once. The given callback will be deleted at the end. /// Returns false on read/write error. - bool write(FILE* f, ResultCallback2* cb); + bool write(FILE* f, TessResultCallback2* cb); /// swap is used to switch the endianness. - bool read(FILE* f, ResultCallback3* cb, bool swap); + bool read(FILE* f, TessResultCallback3* cb, bool swap); private: GenericVector table_; // Mutable because Run method is not const - mutable ResultCallback2* compare_cb_; + mutable TessResultCallback2* compare_cb_; }; template @@ -97,7 +97,7 @@ class UnicityTableEqEq : public UnicityTable { public: UnicityTableEqEq() { UnicityTable::set_compare_callback( - NewPermanentCallback(tesseract::cmp_eq)); + NewPermanentTessCallback(tesseract::cmp_eq)); } }; @@ -161,14 +161,14 @@ int UnicityTable::push_back(T object) { // Add a callback to be called to delete the elements when the table took // their ownership. template -void UnicityTable::set_clear_callback(Callback1* cb) { +void UnicityTable::set_clear_callback(TessCallback1* cb) { table_.set_clear_callback(cb); } // Add a callback to be called to delete the elements when the table took // their ownership. template -void UnicityTable::set_compare_callback(ResultCallback2* cb) { +void UnicityTable::set_compare_callback(TessResultCallback2* cb) { table_.set_compare_callback(cb); compare_cb_ = cb; } @@ -181,13 +181,13 @@ void UnicityTable::clear() { template bool UnicityTable::write(FILE* f, - ResultCallback2* cb) { + TessResultCallback2* cb) { return table_.write(f, cb); } template bool UnicityTable::read( - FILE* f, ResultCallback3* cb, bool swap) { + FILE* f, TessResultCallback3* cb, bool swap) { return table_.read(f, cb, swap); } diff --git a/ccutil/varable.cpp b/ccutil/varable.cpp deleted file mode 100644 index 7187f609ca..0000000000 --- a/ccutil/varable.cpp +++ /dev/null @@ -1,659 +0,0 @@ -/********************************************************************** - * File: varable.c (Formerly variable.c) - * Description: Initialization and setting of VARIABLEs. - * Author: Ray Smith - * Created: Fri Feb 22 16:22:34 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mfcpch.h" //precompiled headers - -#include -#include -#include - -#include "scanutils.h" -#include "tprintf.h" -#include "varable.h" - -#define PLUS '+' //flag states -#define MINUS '-' -#define EQUAL '=' - -CLISTIZE (INT_VARIABLE) -CLISTIZE (BOOL_VARIABLE) CLISTIZE (STRING_VARIABLE) CLISTIZE (double_VARIABLE) -INT_VAR_FROM -INT_VARIABLE::copy; -INT_VARIABLE_CLIST -INT_VARIABLE::head; //global definition -INT_VAR_TO -INT_VARIABLE::replace; -BOOL_VAR_FROM -BOOL_VARIABLE::copy; -BOOL_VARIABLE_CLIST -BOOL_VARIABLE::head; //global definition -BOOL_VAR_TO -BOOL_VARIABLE::replace; -STRING_VAR_FROM -STRING_VARIABLE::copy; -STRING_VARIABLE_CLIST -STRING_VARIABLE::head; //global definition -STRING_VAR_TO -STRING_VARIABLE::replace; -double_VAR_FROM -double_VARIABLE::copy; -double_VARIABLE_CLIST -double_VARIABLE::head; //global definition -double_VAR_TO -double_VARIABLE::replace; - -/********************************************************************** - * INT_VAR_FROM::INT_VAR_FROM - * - * Constructor to copy the list to a temporary location while the - * list head gets constructed. - **********************************************************************/ - -INT_VAR_FROM::INT_VAR_FROM() { //constructor - INT_VARIABLE_C_IT start_it = &INT_VARIABLE::head; - INT_VARIABLE_C_IT end_it = &INT_VARIABLE::head; - - if (!start_it.empty ()) { - while (!end_it.at_last ()) - end_it.forward (); - //move to copy - list.assign_to_sublist (&start_it, &end_it); - } -} - - -/********************************************************************** - * INT_VAR_TO::INT_VAR_TO - * - * Constructor to copy the list back to its rightful place. - **********************************************************************/ - -INT_VAR_TO::INT_VAR_TO() { //constructor - INT_VARIABLE_C_IT start_it = &INT_VARIABLE::copy.list; - INT_VARIABLE_C_IT end_it = &INT_VARIABLE::copy.list; - - if (!start_it.empty ()) { - while (!end_it.at_last ()) - end_it.forward (); - INT_VARIABLE::head.assign_to_sublist (&start_it, &end_it); - } -} - - -/********************************************************************** - * INT_VARIABLE::INT_VARIABLE - * - * Constructor for INT_VARIABLE. Add the variable to the static list. - **********************************************************************/ - -INT_VARIABLE::INT_VARIABLE( //constructor - inT32 v, //the variable - const char *vname, //of variable - const char *comment //info on variable - ) { - INT_VARIABLE_C_IT it = &head; //list iterator - - //tprintf("Constructing %s\n",vname); - set_value(v); //set the value - name = vname; //strings must be static - info = comment; - it.add_before_stay_put (this); //add it to stack -} - - -INT_VARIABLE::~INT_VARIABLE ( //constructor -) { - INT_VARIABLE_C_IT it = &head; //list iterator - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - if (it.data () == this) - it.extract (); -} - - -/********************************************************************** - * INT_VARIABLE::get_head - * - * Get the head of the list of the variables. - **********************************************************************/ - -INT_VARIABLE_CLIST *INT_VARIABLE::get_head() { //access to static - return &head; -} - - -/********************************************************************** - * INT_VARIABLE::print - * - * Print the entire list of INT_VARIABLEs. - **********************************************************************/ - -void INT_VARIABLE::print( //print full list - FILE *fp //file to print on - ) { - INT_VARIABLE_C_IT it = &head; //list iterator - INT_VARIABLE *elt; //current element - - if (fp == stdout) { - tprintf ("#Variables of type INT_VARIABLE:\n"); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - elt = it.data (); - tprintf ("%s %d #%s\n", elt->name, elt->value, elt->info); - } - } - else { - fprintf (fp, "#Variables of type INT_VARIABLE:\n"); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - elt = it.data (); - fprintf (fp, "%s " INT32FORMAT " #%s\n", elt->name, elt->value, - elt->info); - } - } -} - - -/********************************************************************** - * BOOL_VAR_FROM::BOOL_VAR_FROM - * - * Constructor to copy the list to a temporary location while the - * list head gets constructed. - **********************************************************************/ - -BOOL_VAR_FROM::BOOL_VAR_FROM() { //constructor - BOOL_VARIABLE_C_IT start_it = &BOOL_VARIABLE::head; - BOOL_VARIABLE_C_IT end_it = &BOOL_VARIABLE::head; - - if (!start_it.empty ()) { - while (!end_it.at_last ()) - end_it.forward (); - //move to copy - list.assign_to_sublist (&start_it, &end_it); - } -} - - -/********************************************************************** - * BOOL_VAR_TO::BOOL_VAR_TO - * - * Constructor to copy the list back to its rightful place. - **********************************************************************/ - -BOOL_VAR_TO::BOOL_VAR_TO() { //constructor - BOOL_VARIABLE_C_IT start_it = &BOOL_VARIABLE::copy.list; - BOOL_VARIABLE_C_IT end_it = &BOOL_VARIABLE::copy.list; - - if (!start_it.empty ()) { - while (!end_it.at_last ()) - end_it.forward (); - BOOL_VARIABLE::head.assign_to_sublist (&start_it, &end_it); - } -} - - -/********************************************************************** - * BOOL_VARIABLE::BOOL_VARIABLE - * - * Constructor for BOOL_VARIABLE. Add the variable to the static list. - **********************************************************************/ - -BOOL_VARIABLE::BOOL_VARIABLE( //constructor - BOOL8 v, //the variable - const char *vname, //of variable - const char *comment //info on variable - ) { - BOOL_VARIABLE_C_IT it = &head; //list iterator - - //tprintf("Constructing %s\n",vname); - set_value(v); //set the value - name = vname; //strings must be static - info = comment; - it.add_before_stay_put (this); //add it to stack - -} - - -/********************************************************************** - * BOOL_VARIABLE::BOOL_VARIABLE - * - * Constructor for BOOL_VARIABLE. Add the variable to the static list. - **********************************************************************/ - -BOOL_VARIABLE::~BOOL_VARIABLE () { - BOOL_VARIABLE_C_IT it = &head; //list iterator - - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) - if (it.data () == this) - it.extract (); -} - - -/********************************************************************** - * BOOL_VARIABLE::get_head - * - * Get the head of the list of the variables. - **********************************************************************/ - -BOOL_VARIABLE_CLIST *BOOL_VARIABLE::get_head() { //access to static - return &head; -} - - -/********************************************************************** - * BOOL_VARIABLE::print - * - * Print the entire list of BOOL_VARIABLEs. - **********************************************************************/ - -void BOOL_VARIABLE::print( //print full list - FILE *fp //file to print on - ) { - BOOL_VARIABLE_C_IT it = &head; //list iterator - BOOL_VARIABLE *elt; //current element - - if (fp == stdout) { - tprintf ("#Variables of type BOOL_VARIABLE:\n"); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - elt = it.data (); - tprintf ("%s %c #%s\n", - elt->name, elt->value ? 'T' : 'F', elt->info); - } - } - else { - fprintf (fp, "#Variables of type BOOL_VARIABLE:\n"); - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - elt = it.data (); - fprintf (fp, "%s %c #%s\n", - elt->name, elt->value ? 'T' : 'F', elt->info); - } - } -} - - -/********************************************************************** - * STRING_VAR_FROM::STRING_VAR_FROM - * - * Constructor to copy the list to a temporary location while the - * list head gets constructed. - **********************************************************************/ - -STRING_VAR_FROM::STRING_VAR_FROM() { //constructor - STRING_VARIABLE_C_IT start_it = &STRING_VARIABLE::head; - STRING_VARIABLE_C_IT end_it = &STRING_VARIABLE::head; - - if (!start_it.empty ()) { - while (!end_it.at_last ()) - end_it.forward (); - //move to copy - list.assign_to_sublist (&start_it, &end_it); - } -} - - -/********************************************************************** - * STRING_VAR_TO::STRING_VAR_TO - * - * Constructor to copy the list back to its rightful place. - **********************************************************************/ - -STRING_VAR_TO::STRING_VAR_TO() { //constructor - STRING_VARIABLE_C_IT start_it = &STRING_VARIABLE::copy.list; - STRING_VARIABLE_C_IT end_it = &STRING_VARIABLE::copy.list; - - if (!start_it.empty ()) { - while (!end_it.at_last ()) - end_it.forward (); - STRING_VARIABLE::head.assign_to_sublist (&start_it, &end_it); - } -} - - -/********************************************************************** - * STRING_VARIABLE::STRING_VARIABLE - * - * Constructor for STRING_VARIABLE. Add the variable to the static list. - **********************************************************************/ - -STRING_VARIABLE::STRING_VARIABLE ( - //constructor -const char *v, //the variable -const char *vname, //of variable -const char *comment //info on variable -): -value(v) { - // list iterator - STRING_VARIABLE_C_IT it = &head; - - name = vname; // strings must be static - info = comment; - it.add_before_stay_put(this); // add it to stack -} - - -/********************************************************************** - * STRING_VARIABLE::~STRING_VARIABLE - * - * Destructor for STRING_VARIABLE. Add the variable to the static list. - **********************************************************************/ - - // constructor -STRING_VARIABLE::~STRING_VARIABLE( -) { - // list iterator - STRING_VARIABLE_C_IT it = &head; - - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) - if (it.data() == this) - it.extract(); -} - - -/********************************************************************** - * STRING_VARIABLE::get_head - * - * Get the head of the list of the variables. - **********************************************************************/ - -STRING_VARIABLE_CLIST *STRING_VARIABLE::get_head() { // access to static - return &head; -} - - -/********************************************************************** - * STRING_VARIABLE::print - * - * Print the entire list of STRING_VARIABLEs. - **********************************************************************/ - -void STRING_VARIABLE::print(FILE *fp) { - STRING_VARIABLE_C_IT it = &head; // list iterator - STRING_VARIABLE *elt; // current element - - // Comments aren't allowed with string variables, so the # character can - // be part of a string. - if (fp == stdout) { - tprintf("#Variables of type STRING_VARIABLE:\n"); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - elt = it.data(); - tprintf("%s %s\n", elt->name, elt->value.string()); - } - } else { - fprintf(fp, "#Variables of type STRING_VARIABLE:\n"); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - elt = it.data(); - fprintf(fp, "%s %s\n", elt->name, elt->value.string()); - } - } -} - - -/********************************************************************** - * double_VAR_FROM::double_VAR_FROM - * - * Constructor to copy the list to a temporary location while the - * list head gets constructed. - **********************************************************************/ - -double_VAR_FROM::double_VAR_FROM() { // constructor - double_VARIABLE_C_IT start_it = &double_VARIABLE::head; - double_VARIABLE_C_IT end_it = &double_VARIABLE::head; - - if (!start_it.empty()) { - while (!end_it.at_last()) - end_it.forward(); - // move to copy - list.assign_to_sublist(&start_it, &end_it); - } -} - - -/********************************************************************** - * double_VAR_TO::double_VAR_TO - * - * Constructor to copy the list back to its rightful place. - **********************************************************************/ - -double_VAR_TO::double_VAR_TO() { // constructor - double_VARIABLE_C_IT start_it = &double_VARIABLE::copy.list; - double_VARIABLE_C_IT end_it = &double_VARIABLE::copy.list; - - if (!start_it.empty()) { - while (!end_it.at_last()) - end_it.forward(); - double_VARIABLE::head.assign_to_sublist(&start_it, &end_it); - } -} - - -/********************************************************************** - * double_VARIABLE::double_VARIABLE - * - * Constructor for double_VARIABLE. Add the variable to the static list. - **********************************************************************/ - -double_VARIABLE::double_VARIABLE(double v, // the variable - const char *vname, // of variable - const char *comment // info on variable - ) { - // list iterator - double_VARIABLE_C_IT it = &head; - - set_value(v); // set the value - name = vname; // strings must be static - info = comment; - it.add_before_stay_put(this); // add it to stack -} - - -double_VARIABLE::~double_VARIABLE() { - // list iterator - double_VARIABLE_C_IT it = &head; - - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) - if (it.data() == this) - it.extract(); -} - - -/********************************************************************** - * double_VARIABLE::get_head - * - * Get the head of the list of the variables. - **********************************************************************/ - -double_VARIABLE_CLIST *double_VARIABLE::get_head() { // access to static - return &head; -} - - -/********************************************************************** - * double_VARIABLE::print - * - * Print the entire list of double_VARIABLEs. - **********************************************************************/ - -void double_VARIABLE::print(FILE *fp // file to print on - ) { - // list iterator - double_VARIABLE_C_IT it = &head; - double_VARIABLE *elt; // current element - - if (fp == stdout) { - tprintf("#Variables of type double_VARIABLE:\n"); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - elt = it.data(); - tprintf ("%s %lg #%s\n", elt->name, elt->value, elt->info); - } - } else { - fprintf(fp, "#Variables of type double_VARIABLE:\n"); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - elt = it.data(); - fprintf(fp, "%s %g #%s\n", elt->name, elt->value, elt->info); - } - } -} - - -/********************************************************************** - * read_variables_file - * - * Read a file of variables definitions and set/modify the values therein. - * If the filename begins with a + or -, the BOOL_VARIABLEs will be - * ORed or ANDed with any current values. - * Blank lines and lines beginning # are ignored. - * Values may have any whitespace after the name and are the rest of line. - **********************************************************************/ - -DLLSYM BOOL8 read_variables_file(const char *file, // name to read - bool global_only // only set variables - ) { // starting with "global_" - char flag; // file flag - inT16 nameoffset; // offset for real name - FILE *fp; // file pointer - // iterators - bool ret; - - if (*file == PLUS) { - flag = PLUS; // file has flag - nameoffset = 1; - } else if (*file == MINUS) { - flag = MINUS; - nameoffset = 1; - } else { - flag = EQUAL; - nameoffset = 0; - } - - fp = fopen(file + nameoffset, "r"); - if (fp == NULL) { - tprintf("read_variables_file: Can't open %s\n", file + nameoffset); - return TRUE; // can't open it - } - ret = read_variables_from_fp(fp, -1, global_only); - fclose(fp); - return ret; -} - -bool read_variables_from_fp(FILE *fp, inT64 end_offset, bool global_only) { - char line[MAX_PATH]; // input line - bool anyerr = false; // true if any error - bool foundit; // found variable - inT16 length; // length of line - char *valptr; // value field - - while ((end_offset < 0 || ftell(fp) < end_offset) && - fgets(line, MAX_PATH, fp)) { - if (line[0] != '\n' && line[0] != '#') { - length = strlen (line); - if (line[length - 1] == '\n') - line[length - 1] = '\0'; // cut newline - for (valptr = line; *valptr && *valptr != ' ' && *valptr != '\t'; - valptr++); - if (*valptr) { // found blank - *valptr = '\0'; // make name a string - do - valptr++; // find end of blanks - while (*valptr == ' ' || *valptr == '\t'); - } - if (global_only && strstr(line, kGlobalVariablePrefix) == NULL) continue; - foundit = set_variable(line, valptr); - - if (!foundit) { - anyerr = TRUE; // had an error - tprintf("read_variables_file: variable not found: %s\n", line); - exit(1); - } - } - } - return anyerr; -} - -bool set_variable(const char *variable, const char* value) { - INT_VARIABLE_C_IT int_it = &INT_VARIABLE::head; - BOOL_VARIABLE_C_IT BOOL_it = &BOOL_VARIABLE::head; - STRING_VARIABLE_C_IT STRING_it = &STRING_VARIABLE::head; - double_VARIABLE_C_IT double_it = &double_VARIABLE::head; - - bool foundit = false; - // find name - for (STRING_it.mark_cycle_pt(); - !STRING_it.cycled_list() && strcmp(variable, STRING_it.data()->name); - STRING_it.forward()); - if (!STRING_it.cycled_list()) { - foundit = true; // found the varaible - STRING_it.data()->set_value(value); // set its value - } - - if (*value) { - // find name - for (int_it.mark_cycle_pt(); - !int_it.cycled_list() && strcmp(variable, int_it.data()->name); - int_it.forward()); - int intval; - if (!int_it.cycled_list() - && sscanf(value, INT32FORMAT, &intval) == 1) { - foundit = true; // found the varaible - int_it.data()->set_value(intval); // set its value. - } - for (BOOL_it.mark_cycle_pt(); - !BOOL_it.cycled_list() && strcmp(variable, BOOL_it.data()->name); - BOOL_it.forward()); - if (!BOOL_it.cycled_list()) { - if (*value == 'T' || *value == 't' || - *value == 'Y' || *value == 'y' || *value == '1') { - foundit = true; - BOOL_it.data()->set_value(TRUE); - } - else if (*value == 'F' || *value == 'f' || - *value == 'N' || *value == 'n' || *value == '0') { - foundit = true; - BOOL_it.data()->set_value(FALSE); - } - } - for (double_it.mark_cycle_pt(); - !double_it.cycled_list() && strcmp(variable, double_it.data ()->name); - double_it.forward()); - double doubleval; -#ifdef EMBEDDED - if (!double_it.cycled_list ()) { - doubleval = strtofloat(value); -#else - if (!double_it.cycled_list() - && sscanf(value, "%lf", &doubleval) == 1) { -#endif - foundit = true; // found the variable - double_it.data()->set_value(doubleval); - } - } - return foundit; -} - -/********************************************************************** - * print_variables - * - * Print all variable types to the given file - **********************************************************************/ - -DLLSYM void print_variables( //print all vars - FILE *fp //file to print on - ) { - INT_VARIABLE::print(fp); //print INTs - BOOL_VARIABLE::print(fp); //print BOOLs - STRING_VARIABLE::print(fp); //print STRINGs - double_VARIABLE::print(fp); //print doubles -} diff --git a/ccutil/varable.h b/ccutil/varable.h deleted file mode 100644 index 54a6dca23c..0000000000 --- a/ccutil/varable.h +++ /dev/null @@ -1,428 +0,0 @@ -/********************************************************************** - * File: varable.h (Formerly variable.h) - * Description: Class definitions of the *_VAR classes for tunable constants. - * Author: Ray Smith - * Created: Fri Feb 22 11:26:25 GMT 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef VARABLE_H -#define VARABLE_H - -#include - -#include "clst.h" -#include "strngs.h" - -class DLLSYM INT_VARIABLE; - -// Read config file. -extern DLLSYM BOOL8 read_variables_file( - const char *file, // filename to read - bool global_only); // only set variables starting with "global_" - -// Read variables from the given file pointer (stop at end_offset). -bool read_variables_from_fp(FILE *fp, inT64 end_offset, bool global_only); - -// Set a variable to have the given value. -bool set_variable(const char *variable, const char* value); - -// Print variables to a file. -extern DLLSYM void print_variables(FILE *fp); - -const char kGlobalVariablePrefix[] = "global_"; - -CLISTIZEH (INT_VARIABLE) -class DLLSYM INT_VAR_FROM -{ - friend class INT_VAR_TO; - public: - INT_VAR_FROM(); //constructor - private: - INT_VARIABLE_CLIST list; //copy of list -}; - -class DLLSYM INT_VAR_TO -{ - public: - INT_VAR_TO(); //constructor - private: - INT_VARIABLE_CLIST dummy; -}; - -class DLLSYM INT_VARIABLE -{ - friend class INT_VAR_TO; - friend class INT_VAR_FROM; - //for setting values - friend bool set_variable(const char *variable, const char* value); - - public: - INT_VARIABLE(inT32 v, // initial value - const char *vname, // name of variable - const char *comment); // info on variable - - INT_VARIABLE() { // for elist only - value = 0; - name = "NONAME"; - info = "Uninitialized"; - } - ~INT_VARIABLE(); // for elist only - - operator inT32() { // conversion - return value; // access as int - } - - void set_value(inT32 v) { // value to set - value = v; - } - - const char *name_str() { // access name - return name; - } - - const char *info_str() { // access name - return info; - } - - // access list head - static INT_VARIABLE_CLIST *get_head(); - - static void print(FILE *fp); // file to print on - - private: - inT32 value; // the variable - const char *name; // name of variable - const char *info; // for menus - static INT_VAR_FROM copy; // pre constructor - // start of list - static INT_VARIABLE_CLIST head; - static INT_VAR_TO replace; // post constructor -}; - -class DLLSYM BOOL_VARIABLE; - -CLISTIZEH(BOOL_VARIABLE) -class DLLSYM BOOL_VAR_FROM { - friend class BOOL_VAR_TO; - public: - BOOL_VAR_FROM(); // constructor - private: - BOOL_VARIABLE_CLIST list; // copy of list -}; - -class DLLSYM BOOL_VAR_TO { - public: - BOOL_VAR_TO(); // constructor - private: - BOOL_VARIABLE_CLIST dummy; -}; - -class DLLSYM BOOL_VARIABLE { - friend class BOOL_VAR_FROM; - friend class BOOL_VAR_TO; - //for setting values - friend bool set_variable(const char *variable, const char* value); - - public: - BOOL_VARIABLE( //constructor - BOOL8 v, //initial value - const char *vname, //name of variable - const char *comment); //info on variable - - BOOL_VARIABLE() { //for elist only - value = FALSE; - name = "NONAME"; - info = "Uninitialized"; - } - ~BOOL_VARIABLE (); //for elist only - - operator BOOL8() { //conversion - return value; //access as int - } - - void set_value( //assign to value - BOOL8 v) { //value to set - value = v; - } - - const char *name_str() { //access name - return name; - } - - const char *info_str() { //access name - return info; - } - - //access list head - static BOOL_VARIABLE_CLIST *get_head(); - - static void print( //print whole list - FILE *fp); //file to print on - - private: - BOOL8 value; //the variable - const char *name; //name of variable - const char *info; //for menus - static BOOL_VAR_FROM copy; //pre constructor - //start of list - static BOOL_VARIABLE_CLIST head; - static BOOL_VAR_TO replace; //post constructor -}; - -class DLLSYM STRING_VARIABLE; - -CLISTIZEH (STRING_VARIABLE) -class DLLSYM STRING_VAR_FROM -{ - friend class STRING_VAR_TO; - public: - STRING_VAR_FROM(); //constructor - private: - STRING_VARIABLE_CLIST list; //copy of list -}; - -class DLLSYM STRING_VAR_TO -{ - public: - STRING_VAR_TO(); //constructor - private: - STRING_VARIABLE_CLIST dummy; -}; - -class DLLSYM STRING_VARIABLE -{ - friend class STRING_VAR_TO; - friend class STRING_VAR_FROM; - //for setting values - friend bool set_variable(const char *variable, const char* value); - - public: - STRING_VARIABLE( //constructor - const char *v, //initial value - const char *vname, //name of variable - const char *comment); //info on variable - - STRING_VARIABLE() { //for elist only - name = "NONAME"; - info = "Uninitialized"; - } - ~STRING_VARIABLE (); //for elist only - - //conversion - operator const STRING &() { - return value; //access as int - } - - void set_value( //assign to value - STRING v) { //value to set - value = v; - } - - const char *string() const { //get string - return value.string (); - } - - const char *name_str() { //access name - return name; - } - - const char *info_str() { //access name - return info; - } - - //access list head - static STRING_VARIABLE_CLIST *get_head(); - - static void print( //print whole list - FILE *fp); //file to print on - - private: - STRING value; //the variable - const char *name; //name of variable - const char *info; //for menus - static STRING_VAR_FROM copy; //pre constructor - //start of list - static STRING_VARIABLE_CLIST head; - static STRING_VAR_TO replace;//post constructor -}; - -class DLLSYM double_VARIABLE; - -CLISTIZEH (double_VARIABLE) -class DLLSYM double_VAR_FROM -{ - friend class double_VAR_TO; - public: - double_VAR_FROM(); //constructor - private: - double_VARIABLE_CLIST list; //copy of list -}; - -class DLLSYM double_VAR_TO -{ - public: - double_VAR_TO(); //constructor - private: - double_VARIABLE_CLIST dummy; -}; - -class DLLSYM double_VARIABLE -{ - friend class double_VAR_TO; - friend class double_VAR_FROM; - //for setting values - friend bool set_variable(const char *variable, const char* value); - - public: - double_VARIABLE( //constructor - double v, //initial value - const char *vname, //name of variable - const char *comment); //info on variable - - double_VARIABLE() { //for elist only - value = 0.0; - name = "NONAME"; - info = "Uninitialized"; - } - ~double_VARIABLE (); //for elist only - - operator double() { //conversion - return value; //access as int - } - - void set_value( //assign to value - double v) { //value to set - value = v; - } - - const char *name_str() { //access name - return name; - } - - const char *info_str() { //access name - return info; - } - - //access list head - static double_VARIABLE_CLIST *get_head(); - - static void print( //print whole list - FILE *fp); //file to print on - - private: - double value; //the variable - const char *name; //name of variable - const char *info; //for menus - static double_VAR_FROM copy; //pre constructor - //start of list - static double_VARIABLE_CLIST head; - static double_VAR_TO replace;//post constructor -}; - -/************************************************************************* - * NOTE ON DEFINING VARIABLES - * - * For our normal code, the ***_VAR and ***_EVAR macros for variable - * definitions are identical. HOWEVER, for the code version to ship to NEVADA - * (or anywhere else where we want to hide the majority of variables) the - * **_VAR macros are changed so that the "#name" and "comment" parameters - * to the variable constructor are changed to empty strings. This prevents the - * variable name or comment string appearing in the object code file (after it - * has gone through strip). - * - * Certain variables can remain EXPOSED and hence be used in config files given - * to UNLV. These are variable which have been declared with the ***_EVAR - * macros. - * - *************************************************************************/ - -/* SECURE_NAMES is defined in senames.h when necessary */ -#ifdef SECURE_NAMES - -#define INT_VAR(name,val,comment) /*make INT_VARIABLE*/\ - INT_VARIABLE name(val,"","") - -#define BOOL_VAR(name,val,comment) /*make BOOL_VARIABLE*/\ - BOOL_VARIABLE name(val,"","") - -#define STRING_VAR(name,val,comment) /*make STRING_VARIABLE*/\ - STRING_VARIABLE name(val,"","") - -#define double_VAR(name,val,comment) /*make double_VARIABLE*/\ - double_VARIABLE name(val,"","") - -#else - -#define INT_VAR(name,val,comment) /*make INT_VARIABLE*/\ - INT_VARIABLE name(val,#name,comment) - -#define BOOL_VAR(name,val,comment) /*make BOOL_VARIABLE*/\ - BOOL_VARIABLE name(val,#name,comment) - -#define STRING_VAR(name,val,comment) /*make STRING_VARIABLE*/\ - STRING_VARIABLE name(val,#name,comment) - -#define double_VAR(name,val,comment) /*make double_VARIABLE*/\ - double_VARIABLE name(val,#name,comment) -#endif - -#define INT_VAR_H(name,val,comment) /*declare one*/\ - INT_VARIABLE name - -#define BOOL_VAR_H(name,val,comment) /*declare one*/\ - BOOL_VARIABLE name - -#define STRING_VAR_H(name,val,comment) /*declare one*/\ - STRING_VARIABLE name - -#define double_VAR_H(name,val,comment) /*declare one*/\ - double_VARIABLE name - -#define INT_MEMBER(name, val, comment) /*make INT_VARIABLE*/\ - name(val, #name, comment) - -#define BOOL_MEMBER(name, val, comment) /*make BOOL_VARIABLE*/\ - name(val, #name, comment) - -#define STRING_MEMBER(name, val, comment) /*make STRING_VARIABLE*/\ - name(val, #name, comment) - -#define double_MEMBER(name, val, comment) /*make double_VARIABLE*/\ - name(val, #name, comment) - -#define INT_EVAR(name,val,comment) /*make INT_VARIABLE*/\ - INT_VARIABLE name(val,#name,comment) - -#define INT_EVAR_H(name,val,comment) /*declare one*/\ - INT_VARIABLE name - -#define BOOL_EVAR(name,val,comment) /*make BOOL_VARIABLE*/\ - BOOL_VARIABLE name(val,#name,comment) - -#define BOOL_EVAR_H(name,val,comment) /*declare one*/\ - BOOL_VARIABLE name - -#define STRING_EVAR(name,val,comment) /*make STRING_VARIABLE*/\ - STRING_VARIABLE name(val,#name,comment) - -#define STRING_EVAR_H(name,val,comment) /*declare one*/\ - STRING_VARIABLE name - -#define double_EVAR(name,val,comment) /*make double_VARIABLE*/\ - double_VARIABLE name(val,#name,comment) - -#define double_EVAR_H(name,val,comment) /*declare one*/\ - double_VARIABLE name -#endif diff --git a/classify/Makefile.am b/classify/Makefile.am index 7d55efbf8d..88e9bc229a 100644 --- a/classify/Makefile.am +++ b/classify/Makefile.am @@ -4,14 +4,12 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \ -I$(top_srcdir)/image -I$(top_srcdir)/viewer -EXTRA_DIST = classify.vcproj - include_HEADERS = \ - adaptive.h adaptmatch.h baseline.h blobclass.h chartoname.h \ + adaptive.h baseline.h blobclass.h chartoname.h \ classify.h cluster.h clusttool.h cutoffs.h \ extern.h extract.h \ - featdefs.h flexfx.h float2int.h fpoint.h fxdefs.h fxid.h \ - hideedge.h intfx.h intmatcher.h intproto.h kdtree.h \ + featdefs.h flexfx.h float2int.h fpoint.h fxdefs.h \ + intfx.h intmatcher.h intproto.h kdtree.h \ mf.h mfdefs.h mfoutline.h mfx.h \ normfeat.h normmatch.h \ ocrfeatures.h outfeat.h picofeat.h protos.h \ @@ -19,11 +17,11 @@ include_HEADERS = \ lib_LTLIBRARIES = libtesseract_classify.la libtesseract_classify_la_SOURCES = \ - adaptive.cpp adaptmatch.cpp baseline.cpp blobclass.cpp \ + adaptive.cpp adaptmatch.cpp blobclass.cpp \ chartoname.cpp classify.cpp cluster.cpp clusttool.cpp cutoffs.cpp \ extract.cpp \ featdefs.cpp flexfx.cpp float2int.cpp fpoint.cpp fxdefs.cpp \ - hideedge.cpp intfx.cpp intmatcher.cpp intproto.cpp kdtree.cpp \ + intfx.cpp intmatcher.cpp intproto.cpp kdtree.cpp \ mf.cpp mfdefs.cpp mfoutline.cpp mfx.cpp \ normfeat.cpp normmatch.cpp \ ocrfeatures.cpp outfeat.cpp picofeat.cpp protos.cpp \ diff --git a/classify/Makefile.in b/classify/Makefile.in index 66a0645daf..5eacf4423c 100644 --- a/classify/Makefile.in +++ b/classify/Makefile.in @@ -72,12 +72,12 @@ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) libtesseract_classify_la_LIBADD = am_libtesseract_classify_la_OBJECTS = adaptive.lo adaptmatch.lo \ - baseline.lo blobclass.lo chartoname.lo classify.lo cluster.lo \ - clusttool.lo cutoffs.lo extract.lo featdefs.lo flexfx.lo \ - float2int.lo fpoint.lo fxdefs.lo hideedge.lo intfx.lo \ - intmatcher.lo intproto.lo kdtree.lo mf.lo mfdefs.lo \ - mfoutline.lo mfx.lo normfeat.lo normmatch.lo ocrfeatures.lo \ - outfeat.lo picofeat.lo protos.lo speckle.lo xform2d.lo + blobclass.lo chartoname.lo classify.lo cluster.lo clusttool.lo \ + cutoffs.lo extract.lo featdefs.lo flexfx.lo float2int.lo \ + fpoint.lo fxdefs.lo intfx.lo intmatcher.lo intproto.lo \ + kdtree.lo mf.lo mfdefs.lo mfoutline.lo mfx.lo normfeat.lo \ + normmatch.lo ocrfeatures.lo outfeat.lo picofeat.lo protos.lo \ + speckle.lo xform2d.lo libtesseract_classify_la_OBJECTS = \ $(am_libtesseract_classify_la_OBJECTS) libtesseract_classify_la_LINK = $(LIBTOOL) --tag=CXX \ @@ -252,7 +252,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -274,13 +273,12 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \ -I$(top_srcdir)/image -I$(top_srcdir)/viewer -EXTRA_DIST = classify.vcproj include_HEADERS = \ - adaptive.h adaptmatch.h baseline.h blobclass.h chartoname.h \ + adaptive.h baseline.h blobclass.h chartoname.h \ classify.h cluster.h clusttool.h cutoffs.h \ extern.h extract.h \ - featdefs.h flexfx.h float2int.h fpoint.h fxdefs.h fxid.h \ - hideedge.h intfx.h intmatcher.h intproto.h kdtree.h \ + featdefs.h flexfx.h float2int.h fpoint.h fxdefs.h \ + intfx.h intmatcher.h intproto.h kdtree.h \ mf.h mfdefs.h mfoutline.h mfx.h \ normfeat.h normmatch.h \ ocrfeatures.h outfeat.h picofeat.h protos.h \ @@ -288,11 +286,11 @@ include_HEADERS = \ lib_LTLIBRARIES = libtesseract_classify.la libtesseract_classify_la_SOURCES = \ - adaptive.cpp adaptmatch.cpp baseline.cpp blobclass.cpp \ + adaptive.cpp adaptmatch.cpp blobclass.cpp \ chartoname.cpp classify.cpp cluster.cpp clusttool.cpp cutoffs.cpp \ extract.cpp \ featdefs.cpp flexfx.cpp float2int.cpp fpoint.cpp fxdefs.cpp \ - hideedge.cpp intfx.cpp intmatcher.cpp intproto.cpp kdtree.cpp \ + intfx.cpp intmatcher.cpp intproto.cpp kdtree.cpp \ mf.cpp mfdefs.cpp mfoutline.cpp mfx.cpp \ normfeat.cpp normmatch.cpp \ ocrfeatures.cpp outfeat.cpp picofeat.cpp protos.cpp \ @@ -375,7 +373,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adaptive.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adaptmatch.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/baseline.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blobclass.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chartoname.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/classify.Plo@am__quote@ @@ -388,7 +385,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/float2int.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fpoint.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fxdefs.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hideedge.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intfx.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intmatcher.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intproto.Plo@am__quote@ diff --git a/classify/adaptive.cpp b/classify/adaptive.cpp index 40b20b7a53..0dd424a25e 100644 --- a/classify/adaptive.cpp +++ b/classify/adaptive.cpp @@ -112,7 +112,8 @@ ADAPT_CLASS NewAdaptedClass() { Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT)); Class->NumPermConfigs = 0; - Class->TempProtos = NIL; + Class->MaxNumTimesSeen = 0; + Class->TempProtos = NIL_LIST; Class->PermProtos = NewBitVector (MAX_NUM_PROTOS); Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS); @@ -218,7 +219,7 @@ TEMP_CONFIG NewTempConfig(int MaxProtoId) { Config->NumTimesSeen = 1; Config->MaxProtoId = MaxProtoId; Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos); - Config->ContextsSeen = NIL; + Config->ContextsSeen = NIL_LIST; zero_all_bits (Config->Protos, Config->ProtoVectorSize); return (Config); @@ -318,7 +319,7 @@ ADAPT_CLASS ReadAdaptedClass(FILE *File) { /* then read in the list of temporary protos */ fread ((char *) &NumTempProtos, sizeof (int), 1, File); - Class->TempProtos = NIL; + Class->TempProtos = NIL_LIST; for (i = 0; i < NumTempProtos; i++) { TempProto = (TEMP_PROTO) alloc_struct (sizeof (TEMP_PROTO_STRUCT), diff --git a/classify/adaptive.h b/classify/adaptive.h index 72b8f019c9..025632e513 100644 --- a/classify/adaptive.h +++ b/classify/adaptive.h @@ -57,7 +57,8 @@ typedef union typedef struct { uinT8 NumPermConfigs; - uinT8 dummy[3]; + uinT8 MaxNumTimesSeen; // maximum number of times any TEMP_CONFIG was seen + uinT8 dummy[2]; // (cut at matcher_min_examples_for_prototyping) BIT_VECTOR PermProtos; BIT_VECTOR PermConfigs; LIST TempProtos; @@ -81,7 +82,7 @@ typedef ADAPT_TEMPLATES_STRUCT *ADAPT_TEMPLATES; #define NumNonEmptyClassesIn(Template) ((Template)->NumNonEmptyClasses) #define IsEmptyAdaptedClass(Class) ((Class)->NumPermConfigs == 0 && \ -(Class)->TempProtos == NIL) +(Class)->TempProtos == NIL_LIST) #define ConfigIsPermanent(Class,ConfigId) \ (test_bit ((Class)->PermConfigs, ConfigId)) diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 2e2c6fd697..175802b0ae 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -20,7 +20,10 @@ Include Files and Type Defines -----------------------------------------------------------------------------*/ #include -#include "adaptmatch.h" +#include "ambigs.h" +#include "blobclass.h" +#include "blobs.h" +#include "helpers.h" #include "normfeat.h" #include "mfoutline.h" #include "picofeat.h" @@ -32,17 +35,18 @@ #include "efio.h" #include "normmatch.h" #include "permute.h" -#include "context.h" #include "ndminx.h" #include "intproto.h" #include "const.h" #include "globals.h" #include "werd.h" #include "callcpp.h" -#include "tordvars.h" -#include "varable.h" +#include "pageres.h" +#include "params.h" #include "classify.h" #include "unicharset.h" +#include "dict.h" +#include "featdefs.h" #include #include @@ -65,23 +69,25 @@ #define NO_DEBUG 0 #define MAX_ADAPTABLE_WERD_SIZE 40 -#define ADAPTABLE_WERD (GOOD_WERD + 0.05) +#define ADAPTABLE_WERD_ADJUSTMENT (0.05) #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT) #define WORST_POSSIBLE_RATING (1.0) -struct ADAPT_RESULTS -{ +struct ScoredClass { + CLASS_ID id; + FLOAT32 rating; + inT16 config; + inT16 config2; +}; + +struct ADAPT_RESULTS { inT32 BlobLength; int NumMatches; bool HasNonfragment; - CLASS_ID Classes[MAX_NUM_CLASSES]; - FLOAT32 Ratings[MAX_CLASS_ID + 1]; - uinT8 Configs[MAX_CLASS_ID + 1]; - FLOAT32 BestRating; - CLASS_ID BestClass; - uinT8 BestConfig; + ScoredClass match[MAX_NUM_CLASSES]; + ScoredClass best_match; CLASS_PRUNER_RESULTS CPResults; /// Initializes data members to the default values. Sets the initial @@ -90,26 +96,18 @@ struct ADAPT_RESULTS BlobLength = MAX_INT32; NumMatches = 0; HasNonfragment = false; - BestRating = WORST_POSSIBLE_RATING; - BestClass = NO_CLASS; - BestConfig = 0; - for (int i = 0; i <= MAX_CLASS_ID; ++i) { - Ratings[i] = WORST_POSSIBLE_RATING; - } + best_match.id = NO_CLASS; + best_match.rating = WORST_POSSIBLE_RATING; + best_match.config = 0; + best_match.config2 = 0; } }; - - -typedef struct -{ +struct PROTO_KEY { ADAPT_TEMPLATES Templates; CLASS_ID ClassId; int ConfigId; -} - - -PROTO_KEY; +}; /*----------------------------------------------------------------------------- Private Macros @@ -117,200 +115,24 @@ PROTO_KEY; #define MarginalMatch(Rating) \ ((Rating) > matcher_great_threshold) -#define TempConfigReliable(Config) \ -((Config)->NumTimesSeen >= matcher_min_examples_for_prototyping) - #define InitIntFX() (FeaturesHaveBeenExtracted = FALSE) /*----------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ -void AdaptToChar(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID ClassId, - FLOAT32 Threshold); - -void AdaptToPunc(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID ClassId, - FLOAT32 Threshold); - -void AmbigClassifier(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - UNICHAR_ID *Ambiguities, - ADAPT_RESULTS *Results); - -UNICHAR_ID *BaselineClassifier(TBLOB *Blob, - LINE_STATS *LineStats, - ADAPT_TEMPLATES Templates, - ADAPT_RESULTS *Results); - -void make_config_pruner(INT_TEMPLATES templates, CONFIG_PRUNER *config_pruner); - -void CharNormClassifier(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - ADAPT_RESULTS *Results); - -void ClassifyAsNoise(ADAPT_RESULTS *Results); - -int CompareCurrentRatings(const void *arg1, - const void *arg2); +int CompareByRating(const void *arg1, const void *arg2); -void ConvertMatchesToChoices(ADAPT_RESULTS *Results, - BLOB_CHOICE_LIST *Choices); +ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id); -void DebugAdaptiveClassifier(TBLOB *Blob, - LINE_STATS *LineStats, - ADAPT_RESULTS *Results); - -void DoAdaptiveMatch(TBLOB *Blob, - LINE_STATS *LineStats, - ADAPT_RESULTS *Results); - -void GetAdaptThresholds(TWERD * Word, -LINE_STATS * LineStats, -const WERD_CHOICE& BestChoice, -const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]); - -UNICHAR_ID *GetAmbiguities(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID CorrectClass); - -namespace tesseract { -int GetBaselineFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength); - - -int GetIntBaselineFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength); - -} // namespace tesseract. +ScoredClass ScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id); void InitMatcherRatings(register FLOAT32 *Rating); -PROTO_ID MakeNewTempProtos(FEATURE_SET Features, -int NumBadFeat, -FEATURE_ID BadFeat[], -INT_CLASS IClass, -ADAPT_CLASS Class, BIT_VECTOR TempProtoMask); - -void MakePermanent(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int ConfigId, - TBLOB *Blob, - LINE_STATS *LineStats); - int MakeTempProtoPerm(void *item1, void *item2); -int NumBlobsIn(TWERD *Word); - -int NumOutlinesInBlob(TBLOB *Blob); - -void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results); - -void RemoveBadMatches(ADAPT_RESULTS *Results); - -void RemoveExtraPuncs(ADAPT_RESULTS *Results); - void SetAdaptiveThreshold(FLOAT32 Threshold); -void ShowBestMatchFor(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID ClassId, - BOOL8 AdaptiveOn, - BOOL8 PreTrainedOn); -/*----------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -/* variables used to hold performance statistics */ -static int AdaptiveMatcherCalls = 0; -static int BaselineClassifierCalls = 0; -static int CharNormClassifierCalls = 0; -static int AmbigClassifierCalls = 0; -static int NumWordsAdaptedTo = 0; -static int NumCharsAdaptedTo = 0; -static int NumBaselineClassesTried = 0; -static int NumCharNormClassesTried = 0; -static int NumAmbigClassesTried = 0; -static int NumClassesOutput = 0; -static int NumAdaptationsFailed = 0; - -/* define globals used to hold onto extracted features. This is used -to map from the old scheme in which baseline features and char norm -features are extracted separately, to the new scheme in which they -are extracted at the same time. */ -static BOOL8 FeaturesHaveBeenExtracted = FALSE; -static BOOL8 FeaturesOK = TRUE; -static INT_FEATURE_ARRAY BaselineFeatures; -static INT_FEATURE_ARRAY CharNormFeatures; -static INT_FX_RESULT_STRUCT FXInfo; - -/** use a global variable to hold onto the current ratings so that the -comparison function passes to qsort can get at them */ -static FLOAT32 *CurrentRatings; - -/* define globals to hold filename of training data */ -static CLASS_CUTOFF_ARRAY CharNormCutoffs; -static CLASS_CUTOFF_ARRAY BaselineCutoffs; - -/* define control knobs for adaptive matcher */ -BOOL_VAR(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier"); - -BOOL_VAR(classify_use_pre_adapted_templates, 0, - "Use pre-adapted classifier templates"); - -BOOL_VAR(classify_save_adapted_templates, 0, - "Save adapted templates to a file"); - -BOOL_VAR(classify_enable_adaptive_debugger, 0, "Enable match debugger"); - -INT_VAR(matcher_debug_level, 0, "Matcher Debug Level"); -INT_VAR(matcher_debug_flags, 0, "Matcher Debug Flags"); - -INT_VAR(classify_learning_debug_level, 0, "Learning Debug Level: "); - -double_VAR(matcher_good_threshold, 0.125, "Good Match (0-1)"); -double_VAR(matcher_great_threshold, 0.0, "Great Match (0-1)"); - -double_VAR(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)"); -double_VAR(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)"); -double_VAR(matcher_rating_margin, 0.1, "New template margin (0-1)"); -double_VAR(matcher_avg_noise_size, 12.0, "Avg. noise blob length: "); - -INT_VAR(matcher_permanent_classes_min, 1, "Min # of permanent classes"); - -INT_VAR(matcher_min_examples_for_prototyping, 3, "Reliable Config Threshold"); - -double_VAR(matcher_clustering_max_angle_delta, 0.015, - "Maximum angle delta for prototype clustering"); - -BOOL_VAR(classify_enable_int_fx, 1, "Enable integer fx"); - -BOOL_VAR(classify_enable_new_adapt_rules, 1, "Enable new adaptation rules"); - -double_VAR(rating_scale, 1.5, "Rating scaling factor"); -extern double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor"); - -INT_VAR(matcher_failed_adaptations_before_reset, 150, - "Number of failed adaptions before adapted templates reset"); - -double_VAR(tessedit_class_miss_scale, 0.00390625, - "Scale factor for features not used"); - -BOOL_VAR(tess_cn_matching, 0, "Character Normalized Matching"); -BOOL_VAR(tess_bn_matching, 0, "Baseline Normalized Matching"); - /*----------------------------------------------------------------------------- Public Code -----------------------------------------------------------------------------*/ @@ -336,50 +158,32 @@ namespace tesseract { * @note History: Mon Mar 11 10:00:58 1991, DSJ, Created. * * @param Blob blob to be classified - * @param DotBlob (obsolete) - * @param Row row of text that word appears in * @param[out] Choices List of choices found by adaptive matcher. * @param[out] CPResults Array of CPResultStruct of size MAX_NUM_CLASSES is * filled on return with the choices found by the * class pruner and the ratings therefrom. Also * contains the detailed results of the integer matcher. * - * Globals: - * - CurrentRatings used by compare function for qsort */ void Classify::AdaptiveClassifier(TBLOB *Blob, - TBLOB *DotBlob, - TEXTROW *Row, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS CPResults) { assert(Choices != NULL); ADAPT_RESULTS *Results = new ADAPT_RESULTS(); - LINE_STATS LineStats; - if (matcher_failed_adaptations_before_reset >= 0 && - NumAdaptationsFailed >= matcher_failed_adaptations_before_reset) { - NumAdaptationsFailed = 0; - ResetAdaptiveClassifier(); - } if (AdaptedTemplates == NULL) AdaptedTemplates = NewAdaptedTemplates (true); - EnterClassifyMode; - Results->Initialize(); - GetLineStatsFromRow(Row, &LineStats); - DoAdaptiveMatch(Blob, &LineStats, Results); + DoAdaptiveMatch(Blob, Results); if (CPResults != NULL) memcpy(CPResults, Results->CPResults, sizeof(CPResults[0]) * Results->NumMatches); - RemoveBadMatches(Results); - - /* save ratings in a global so that CompareCurrentRatings() can see them */ - CurrentRatings = Results->Ratings; - qsort ((void *) (Results->Classes), Results->NumMatches, - sizeof (CLASS_ID), CompareCurrentRatings); + RemoveBadMatches(Results); + qsort((void *)Results->match, Results->NumMatches, + sizeof(ScoredClass), CompareByRating); RemoveExtraPuncs(Results); ConvertMatchesToChoices(Results, Choices); @@ -388,155 +192,163 @@ void Classify::AdaptiveClassifier(TBLOB *Blob, PrintAdaptiveMatchResults(stdout, Results); } - if (LargeSpeckle (Blob, Row)) + if (LargeSpeckle(Blob)) AddLargeSpeckleTo(Choices); #ifndef GRAPHICS_DISABLED if (classify_enable_adaptive_debugger) - DebugAdaptiveClassifier(Blob, &LineStats, Results); + DebugAdaptiveClassifier(Blob, Results); #endif NumClassesOutput += Choices->length(); if (Choices->length() == 0) { - if (!bln_numericmode) + if (!classify_bln_numeric_mode) tprintf ("Empty classification!\n"); // Should never normally happen. Choices = new BLOB_CHOICE_LIST(); BLOB_CHOICE_IT temp_it; temp_it.set_to_list(Choices); - temp_it.add_to_end(new BLOB_CHOICE(0, 50.0f, -20.0f, -1, NULL)); + temp_it.add_to_end(new BLOB_CHOICE(0, 50.0f, -20.0f, -1, -1, NULL)); } delete Results; } /* AdaptiveClassifier */ +// Learns the given word using its chopped_word, seam_array, denorm, +// box_word, best_state, and correct_text to learn both correctly and +// incorrectly segmented blobs. If filename is not NULL, then LearnBlob +// is called and the data will be written to a file for static training. +// Otherwise AdaptToBlob is called for adaption within a document. +// If rejmap is not NULL, then only chars with a rejmap entry of '1' will +// be learned, otherwise all chars with good correct_text are learned. +void Classify::LearnWord(const char* filename, const char *rejmap, + WERD_RES *word) { + int word_len = word->correct_text.size(); + if (word_len == 0) return; + + float* thresholds = NULL; + if (filename == NULL) { + // Adaption mode. + if (!EnableLearning || word->best_choice == NULL) + return; // Can't or won't adapt. -/*---------------------------------------------------------------------------*/ -/** - * This routine implements a preliminary - * version of the rules which are used to decide - * which characters to adapt to. - * - * A word is adapted to if it is in the dictionary or - * if it is a "good" number (no trailing units, etc.). - * It cannot contain broken or merged characters. - * - * Within that word, only letters and digits are - * adapted to (no punctuation). - * - * @param Word word to be adapted to - * @param Row row of text that word is found in - * @param BestChoice best choice for word found by system - * @param BestRawChoice best choice for word found by classifier only - * @param rejmap Reject map - * - * Globals: - * - #EnableLearning TRUE if learning is enabled - * - * @note Exceptions: none - * @note History: Thu Mar 14 07:40:36 1991, DSJ, Created. -*/ -void Classify::AdaptToWord(TWERD *Word, - TEXTROW *Row, - const WERD_CHOICE& BestChoice, - const WERD_CHOICE& BestRawChoice, - const char *rejmap) { - TBLOB *Blob; - LINE_STATS LineStats; - FLOAT32 Thresholds[MAX_ADAPTABLE_WERD_SIZE]; - FLOAT32 *Threshold; - const char *map = rejmap; - char map_char = '1'; - const char* BestChoice_string = BestChoice.unichar_string().string(); - const char* BestChoice_lengths = BestChoice.unichar_lengths().string(); - - if (strlen(BestChoice_lengths) > MAX_ADAPTABLE_WERD_SIZE) - return; - - if (EnableLearning) { NumWordsAdaptedTo++; - - #ifndef SECURE_NAMES if (classify_learning_debug_level >= 1) - cprintf ("\n\nAdapting to word = %s\n", - BestChoice.debug_string(unicharset).string()); - #endif - GetLineStatsFromRow(Row, &LineStats); + tprintf("\n\nAdapting to word = %s\n", + word->best_choice->debug_string(unicharset).string()); + thresholds = new float[word_len]; + GetAdaptThresholds(word->rebuild_word, *word->best_choice, + *word->raw_choice, thresholds); + } + int start_blob = 0; + char prev_map_char = '0'; + for (int ch = 0; ch < word_len; ++ch) { + char rej_map_char = rejmap != NULL ? *rejmap++ : '1'; + char next_map_char = ch + 1 < word_len + ? (rejmap != NULL ? *rejmap : '1') + : '0'; + if (word->correct_text[ch].length() > 0 && rej_map_char == '1') { + float threshold = thresholds != NULL ? thresholds[ch] : 0.0f; + LearnPieces(filename, start_blob, word->best_state[ch], + threshold, CST_WHOLE, word->correct_text[ch].string(), word); + if (word->best_state[ch] > 1) { + // Blob includes fragments, so learn them. + for (int frag = 0; frag < word->best_state[ch]; ++frag) { + STRING frag_str = CHAR_FRAGMENT::to_string( + word->correct_text[ch].string(), frag, word->best_state[ch]); + LearnPieces(filename, start_blob + frag, 1, + threshold, CST_FRAGMENT, frag_str.string(), word); + } + // If the next blob is good, make junk with the rightmost fragment. + if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0 && + next_map_char == '1') { + LearnPieces(filename, start_blob + word->best_state[ch] - 1, + word->best_state[ch + 1] + 1, + threshold, CST_IMPROPER, INVALID_UNICHAR, word); + } + // If the previous blob is good, make junk with the leftmost fragment. + if (ch > 0 && word->correct_text[ch - 1].length() > 0 && + prev_map_char == '1') { + LearnPieces(filename, start_blob - word->best_state[ch - 1], + word->best_state[ch - 1] + 1, + threshold, CST_IMPROPER, INVALID_UNICHAR, word); + } + } + // If the next blob is good, make a join with it. + if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0 && + next_map_char == '1') { + STRING joined_text = word->correct_text[ch]; + joined_text += word->correct_text[ch + 1]; + LearnPieces(filename, start_blob, + word->best_state[ch] + word->best_state[ch + 1], + threshold, CST_NGRAM, joined_text.string(), word); + } + } + start_blob += word->best_state[ch]; + prev_map_char = rej_map_char; + } + delete [] thresholds; +} // LearnWord. + +// Builds a blob of length fragments, from the word, starting at start, +// and then learns it, as having the given correct_text. +// If filename is not NULL, then LearnBlob +// is called and the data will be written to a file for static training. +// Otherwise AdaptToBlob is called for adaption within a document. +// threshold is a magic number required by AdaptToChar and generated by +// GetAdaptThresholds. +// Although it can be partly inferred from the string, segmentation is +// provided to explicitly clarify the character segmentation. +void Classify::LearnPieces(const char* filename, int start, int length, + float threshold, CharSegmentationType segmentation, + const char* correct_text, WERD_RES *word) { + // TODO(daria) Remove/modify this if when we want to train and/or adapt + // on fragments and/or n-grams. + if (segmentation != CST_WHOLE) + return; - GetAdaptThresholds(Word, - &LineStats, - BestChoice, - BestRawChoice, - Thresholds); + join_pieces(word->chopped_word->blobs, word->seam_array, + start, start + length - 1); + TBLOB* blob = word->chopped_word->blobs; + for (int i = 0; i < start; ++i) + blob = blob->next; + if (strcmp(classify_learn_debug_str.string(), correct_text) == 0) { + const int kSampleSpaceWidth = 500; + if (learn_debug_win_ == NULL) { + learn_debug_win_ = new ScrollView(classify_learn_debug_str.string(), + 100, 400, kSampleSpaceWidth * 2, 200, + kSampleSpaceWidth* 2, 200, true); + } + learn_debug_win_->Clear(); + learn_debug_win_->Pen(64, 64, 64); + learn_debug_win_->Line(-kSampleSpaceWidth, kBlnBaselineOffset, + kSampleSpaceWidth, kBlnBaselineOffset); + learn_debug_win_->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset, + kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset); + blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN); + learn_debug_win_->Update(); + window_wait(learn_debug_win_); + } - for (Blob = Word->blobs, Threshold = Thresholds; Blob != NULL; - Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++), - Threshold++) { - InitIntFX(); - - if (rejmap != NULL) - map_char = *map++; - - assert (map_char == '1' || map_char == '0'); - - if (map_char == '1') { - -// if (unicharset.get_isalpha (BestChoice_string, *BestChoice_lengths) || -// unicharset.get_isdigit (BestChoice_string, *BestChoice_lengths)) { - /* SPECIAL RULE: don't adapt to an 'i' which is the first char - in a word because they are too ambiguous with 'I'. - The new adaptation rules should account for this - automatically, since they exclude ambiguous words from - adaptation, but for safety's sake we'll leave the rule in. - Also, don't adapt to i's that have only 1 blob in them - because this creates too much ambiguity for broken - characters. */ - if (*BestChoice_lengths == 1 && - (*BestChoice_string == 'i' - || (il1_adaption_test && *BestChoice_string == 'I' && - (Blob->next == NULL || - unicharset.get_islower (BestChoice_string + *BestChoice_lengths, - *(BestChoice_lengths + 1))))) - && (Blob == Word->blobs - || (!(unicharset.get_isalpha (BestChoice_string - - *(BestChoice_lengths - 1), - *(BestChoice_lengths - 1)) || - unicharset.get_isdigit (BestChoice_string - - *(BestChoice_lengths - 1), - *(BestChoice_lengths - 1)))) - - || (!il1_adaption_test && NumOutlinesInBlob(Blob) != 2))) { - if (classify_learning_debug_level >= 1) - cprintf ("Rejecting char = %s\n", unicharset.id_to_unichar( - unicharset.unichar_to_id(BestChoice_string, - *BestChoice_lengths))); - } - else { - #ifndef SECURE_NAMES - if (classify_learning_debug_level >= 1) - cprintf ("Adapting to char = %s, thr= %g\n", - unicharset.id_to_unichar( - unicharset.unichar_to_id(BestChoice_string, - *BestChoice_lengths)), - *Threshold); - #endif - AdaptToChar(Blob, &LineStats, - unicharset.unichar_to_id(BestChoice_string, - *BestChoice_lengths), - *Threshold); - } -// } -// else -// AdaptToPunc(Blob, &LineStats, -// unicharset.unichar_to_id(BestChoice_string, -// *BestChoice_lengths), -// *Threshold); - } + if (filename != NULL) { + classify_norm_method.set_value(character); // force char norm spc 30/11/93 + tess_bn_matching.set_value(false); // turn it off + tess_cn_matching.set_value(false); + LearnBlob(feature_defs_, filename, blob, word->denorm, correct_text); + } else { + if (!unicharset.contains_unichar(correct_text)) { + unicharset.unichar_insert(correct_text); + // TODO(rays) We may need to worry about exceeding MAX_NUM_CLASSES. + // if (unicharset_boxes->size() > MAX_NUM_CLASSES) ... } + UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text); if (classify_learning_debug_level >= 1) - cprintf ("\n"); + tprintf("Adapting to char = %s, thr= %g\n", + unicharset.id_to_unichar(class_id), threshold); + AdaptToChar(blob, class_id, threshold); } -} /* AdaptToWord */ + break_pieces(blob, word->seam_array, start, start + length - 1); +} // LearnPieces. /*---------------------------------------------------------------------------*/ /** @@ -607,21 +419,20 @@ void Classify::EndAdaptiveClassifier() { * This routine reads in the training * information needed by the adaptive classifier * and saves it into global variables. - * - * Globals: - * - BuiltInTemplatesFile file to get built-in temps from - * - BuiltInCutoffsFile file to get avg. feat per class from - * - #PreTrainedTemplates pre-trained configs and protos - * - #AdaptedTemplates templates adapted to current page - * - CharNormCutoffs avg # of features per class - * - #AllProtosOn dummy proto mask with all bits 1 - * - #AllConfigsOn dummy config mask with all bits 1 - * - #classify_use_pre_adapted_templates enables use of pre-adapted templates - * - * @note Exceptions: none - * @note History: Mon Mar 11 12:49:34 1991, DSJ, Created. + * Parameters: + * load_pre_trained_templates Indicates whether the pre-trained + * templates (inttemp, normproto and pffmtable components) + * should be lodaded. Should only be set to true if the + * necesary classifier components are present in the + * [lang].traineddata file. + * Globals: + * BuiltInTemplatesFile file to get built-in temps from + * BuiltInCutoffsFile file to get avg. feat per class from + * classify_use_pre_adapted_templates + * enables use of pre-adapted templates + * @note History: Mon Mar 11 12:49:34 1991, DSJ, Created. */ -void Classify::InitAdaptiveClassifier() { +void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) { if (!classify_enable_adaptive_matcher) return; if (AllProtosOn != NULL) @@ -629,31 +440,27 @@ void Classify::InitAdaptiveClassifier() { // If there is no language_data_path_prefix, the classifier will be // adaptive only. - if (language_data_path_prefix.length() > 0) { - if (!tessdata_manager.SeekToStart(TESSDATA_INTTEMP)) { - inttemp_loaded_ = false; - } else { - PreTrainedTemplates = - ReadIntTemplates(tessdata_manager.GetDataFilePtr()); - if (global_tessdata_manager_debug_level) tprintf("Loaded inttemp\n"); - - ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_PFFMTABLE)); - ReadNewCutoffs(tessdata_manager.GetDataFilePtr(), - tessdata_manager.GetEndOffset(TESSDATA_PFFMTABLE), - CharNormCutoffs); - if (global_tessdata_manager_debug_level) tprintf("Loaded pffmtable\n"); - - ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_NORMPROTO)); - NormProtos = - ReadNormProtos(tessdata_manager.GetDataFilePtr(), - tessdata_manager.GetEndOffset(TESSDATA_NORMPROTO)); - if (global_tessdata_manager_debug_level) tprintf("Loaded normproto\n"); - - inttemp_loaded_ = true; - } + if (language_data_path_prefix.length() > 0 && + load_pre_trained_templates) { + ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_INTTEMP)); + PreTrainedTemplates = + ReadIntTemplates(tessdata_manager.GetDataFilePtr()); + if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded inttemp\n"); + + ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_PFFMTABLE)); + ReadNewCutoffs(tessdata_manager.GetDataFilePtr(), + tessdata_manager.GetEndOffset(TESSDATA_PFFMTABLE), + CharNormCutoffs); + if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded pffmtable\n"); + + ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_NORMPROTO)); + NormProtos = + ReadNormProtos(tessdata_manager.GetDataFilePtr(), + tessdata_manager.GetEndOffset(TESSDATA_NORMPROTO)); + if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded normproto\n"); } - InitIntegerMatcher(); + im_.Init(&classify_debug_level, classify_integer_matcher_multiplier); InitIntegerFX(); AllProtosOn = NewBitVector(MAX_NUM_PROTOS); @@ -668,6 +475,10 @@ void Classify::InitAdaptiveClassifier() { zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS)); zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS)); + for (int i = 0; i < MAX_NUM_CLASSES; i++) { + BaselineCutoffs[i] = 0; + } + if (classify_use_pre_adapted_templates) { FILE *File; STRING Filename; @@ -700,14 +511,17 @@ void Classify::InitAdaptiveClassifier() { } /* InitAdaptiveClassifier */ void Classify::ResetAdaptiveClassifier() { + if (classify_learning_debug_level > 0) { + tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n", + NumAdaptationsFailed); + } free_adapted_templates(AdaptedTemplates); AdaptedTemplates = NULL; + NumAdaptationsFailed = 0; } -} // namespace tesseract /*---------------------------------------------------------------------------*/ -namespace tesseract { /** * Print to File the statistics which have * been gathered for the adaptive matcher. @@ -804,7 +618,6 @@ void Classify::SettupPass2() { * config in that class. * * @param Blob blob to model new class after - * @param LineStats statistics for text row blob is in * @param ClassId id of the class to be initialized * @param Class adapted class to be initialized * @param Templates adapted templates to add new class to @@ -818,7 +631,6 @@ void Classify::SettupPass2() { * @note History: Thu Mar 14 12:49:39 1991, DSJ, Created. */ void Classify::InitAdaptedClass(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID ClassId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) { @@ -832,7 +644,7 @@ void Classify::InitAdaptedClass(TBLOB *Blob, TEMP_CONFIG Config; classify_norm_method.set_value(baseline); - Features = ExtractOutlineFeatures (Blob, LineStats); + Features = ExtractOutlineFeatures(Blob); NumFeatures = Features->NumFeatures; if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) { FreeFeatureSet(Features); @@ -869,7 +681,8 @@ void Classify::InitAdaptedClass(TBLOB *Blob, SET_BIT (Config->Protos, Pid); ConvertProto(Proto, Pid, IClass); - AddProtoToProtoPruner(Proto, Pid, IClass); + AddProtoToProtoPruner(Proto, Pid, IClass, + classify_learning_debug_level >= 2); Class->TempProtos = push (Class->TempProtos, TempProto); } @@ -886,7 +699,6 @@ void Classify::InitAdaptedClass(TBLOB *Blob, if (IsEmptyAdaptedClass(Class)) (Templates->NumNonEmptyClasses)++; } /* InitAdaptedClass */ -} // namespace tesseract /*---------------------------------------------------------------------------*/ @@ -911,27 +723,25 @@ void Classify::InitAdaptedClass(TBLOB *Blob, * @note Exceptions: none * @note History: Tue Mar 12 17:55:18 1991, DSJ, Created. */ -int GetAdaptiveFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_FEATURE_ARRAY IntFeatures, - FEATURE_SET *FloatFeatures) { +int Classify::GetAdaptiveFeatures(TBLOB *Blob, + INT_FEATURE_ARRAY IntFeatures, + FEATURE_SET *FloatFeatures) { FEATURE_SET Features; int NumFeatures; classify_norm_method.set_value(baseline); - Features = ExtractPicoFeatures (Blob, LineStats); + Features = ExtractPicoFeatures(Blob); NumFeatures = Features->NumFeatures; if (NumFeatures > UNLIKELY_NUM_FEAT) { FreeFeatureSet(Features); - return (0); + return 0; } ComputeIntFeatures(Features, IntFeatures); *FloatFeatures = Features; - return (NumFeatures); - + return NumFeatures; } /* GetAdaptiveFeatures */ @@ -939,7 +749,6 @@ int GetAdaptiveFeatures(TBLOB *Blob, Private Code -----------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ -namespace tesseract { /** * Return TRUE if the specified word is * acceptable for adaptation. @@ -958,18 +767,20 @@ int Classify::AdaptableWord(TWERD *Word, const WERD_CHOICE &BestChoiceWord, const WERD_CHOICE &RawChoiceWord) { int BestChoiceLength = BestChoiceWord.length(); + float adaptable_score = + getDict().segment_penalty_dict_case_ok + ADAPTABLE_WERD_ADJUSTMENT; return ( // rules that apply in general - simplest to compute first BestChoiceLength > 0 && - BestChoiceLength == NumBlobsIn (Word) && + BestChoiceLength == Word->NumBlobs() && BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && ( (classify_enable_new_adapt_rules && - getDict().CurrentBestChoiceAdjustFactor() <= ADAPTABLE_WERD && - getDict().AlternativeChoicesWorseThan(ADAPTABLE_WERD) && + getDict().CurrentBestChoiceAdjustFactor() <= adaptable_score && + getDict().AlternativeChoicesWorseThan(adaptable_score) && getDict().CurrentBestChoiceIs(BestChoiceWord)) || (!classify_enable_new_adapt_rules && // old rules BestChoiceLength == RawChoiceWord.length() && ((getDict().valid_word_or_number(BestChoiceWord) && - Context::case_ok(BestChoiceWord, getDict().getUnicharset())))))); + getDict().case_ok(BestChoiceWord, getDict().getUnicharset())))))); } /*---------------------------------------------------------------------------*/ @@ -988,10 +799,9 @@ int Classify::AdaptableWord(TWERD *Word, * @note Exceptions: none * @note History: Thu Mar 14 09:36:03 1991, DSJ, Created. */ - void Classify::AdaptToChar(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID ClassId, - FLOAT32 Threshold) { +void Classify::AdaptToChar(TBLOB *Blob, + CLASS_ID ClassId, + FLOAT32 Threshold) { int NumFeatures; INT_FEATURE_ARRAY IntFeatures; INT_RESULT_STRUCT IntResult; @@ -1008,20 +818,20 @@ int Classify::AdaptableWord(TWERD *Word, Class = AdaptedTemplates->Class[ClassId]; assert(Class != NULL); if (IsEmptyAdaptedClass(Class)) { - InitAdaptedClass(Blob, LineStats, ClassId, Class, AdaptedTemplates); + InitAdaptedClass(Blob, ClassId, Class, AdaptedTemplates); } else { IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId); - NumFeatures = GetAdaptiveFeatures (Blob, LineStats, - IntFeatures, &FloatFeatures); + NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures); if (NumFeatures <= 0) return; - SetBaseLineMatch(); - IntegerMatcher (IClass, AllProtosOn, AllConfigsOn, - NumFeatures, NumFeatures, IntFeatures, 0, - &IntResult, NO_DEBUG); + im_.SetBaseLineMatch(); + im_.Match(IClass, AllProtosOn, AllConfigsOn, + NumFeatures, NumFeatures, IntFeatures, 0, + &IntResult, classify_adapt_feature_threshold, + NO_DEBUG, matcher_debug_separate_windows); SetAdaptiveThreshold(Threshold); @@ -1036,13 +846,17 @@ int Classify::AdaptableWord(TWERD *Word, TempConfig = TempConfigFor (Class, IntResult.Config); IncreaseConfidence(TempConfig); + if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) { + Class->MaxNumTimesSeen = TempConfig->NumTimesSeen; + } if (classify_learning_debug_level >= 1) cprintf ("Increasing reliability of temp config %d to %d.\n", IntResult.Config, TempConfig->NumTimesSeen); - if (TempConfigReliable (TempConfig)) - MakePermanent (AdaptedTemplates, ClassId, IntResult.Config, - Blob, LineStats); + if (TempConfigReliable(ClassId, TempConfig)) { + MakePermanent(AdaptedTemplates, ClassId, IntResult.Config, Blob); + UpdateAmbigsGroup(ClassId, Blob); + } } else { if (classify_learning_debug_level >= 1) @@ -1055,24 +869,27 @@ int Classify::AdaptableWord(TWERD *Word, FloatFeatures); if (NewTempConfigId >= 0 && - TempConfigReliable (TempConfigFor (Class, NewTempConfigId))) - MakePermanent (AdaptedTemplates, ClassId, NewTempConfigId, - Blob, LineStats); + TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) { + MakePermanent(AdaptedTemplates, ClassId, NewTempConfigId, Blob); + UpdateAmbigsGroup(ClassId, Blob); + } #ifndef GRAPHICS_DISABLED if (classify_learning_debug_level >= 1) { - IntegerMatcher (IClass, AllProtosOn, AllConfigsOn, - NumFeatures, NumFeatures, IntFeatures, 0, - &IntResult, NO_DEBUG); + im_.Match(IClass, AllProtosOn, AllConfigsOn, + NumFeatures, NumFeatures, IntFeatures, 0, + &IntResult, classify_adapt_feature_threshold, + NO_DEBUG, matcher_debug_separate_windows); cprintf ("Best match to temp config %d = %4.1f%%.\n", IntResult.Config, (1.0 - IntResult.Rating) * 100.0); if (classify_learning_debug_level >= 2) { uinT32 ConfigMask; ConfigMask = 1 << IntResult.Config; ShowMatchDisplay(); - IntegerMatcher (IClass, AllProtosOn, (BIT_VECTOR)&ConfigMask, - NumFeatures, NumFeatures, IntFeatures, 0, - &IntResult, 6 | 0x19); + im_.Match(IClass, AllProtosOn, (BIT_VECTOR)&ConfigMask, + NumFeatures, NumFeatures, IntFeatures, 0, + &IntResult, classify_adapt_feature_threshold, + 6 | 0x19, matcher_debug_separate_windows); UpdateMatchDisplay(); GetClassToDebug ("Adapting"); } @@ -1098,14 +915,13 @@ int Classify::AdaptableWord(TWERD *Word, * @note History: Thu Mar 14 09:36:03 1991, DSJ, Created. */ void Classify::AdaptToPunc(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID ClassId, FLOAT32 Threshold) { ADAPT_RESULTS *Results = new ADAPT_RESULTS(); int i; Results->Initialize(); - CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results); + CharNormClassifier(Blob, PreTrainedTemplates, Results); RemoveBadMatches(Results); if (Results->NumMatches != 1) { @@ -1114,17 +930,16 @@ void Classify::AdaptToPunc(TBLOB *Blob, unicharset.id_to_unichar(ClassId)); for (i = 0; i < Results->NumMatches; i++) - cprintf ("%s", unicharset.id_to_unichar(Results->Classes[i])); - cprintf (")\n"); + cprintf("%s", unicharset.id_to_unichar(Results->match[i].id)); + cprintf(")\n"); } } else { - #ifndef SECURE_NAMES if (classify_learning_debug_level >= 1) cprintf ("Adapting to punc = %s, thr= %g\n", unicharset.id_to_unichar(ClassId), Threshold); #endif - AdaptToChar(Blob, LineStats, ClassId, Threshold); + AdaptToChar(Blob, ClassId, Threshold); } delete Results; } /* AdaptToPunc */ @@ -1145,49 +960,47 @@ void Classify::AdaptToPunc(TBLOB *Blob, * Globals: * - #matcher_bad_match_pad defines limits of an acceptable match * - * @param[out] Results results to add new result to - * @param ClassId class of new result - * @param Rating rating of new result - * @param ConfigId config id of new result + * @param[out] results results to add new result to + * @param class_id class of new result + * @param rating rating of new result + * @param config_id config id of new result + * @param config2_id config id of 2nd choice result * * @note Exceptions: none * @note History: Tue Mar 12 18:19:29 1991, DSJ, Created. */ -void Classify::AddNewResult(ADAPT_RESULTS *Results, - CLASS_ID ClassId, - FLOAT32 Rating, - int ConfigId) { - FLOAT32 OldRating; - INT_CLASS_STRUCT* CharClass = NULL; - - OldRating = Results->Ratings[ClassId]; - if (Rating <= Results->BestRating + matcher_bad_match_pad && Rating < OldRating) { - if (!unicharset.get_fragment(ClassId)) { - Results->HasNonfragment = true; - } - Results->Ratings[ClassId] = Rating; - if (ClassId != NO_CLASS) - CharClass = ClassForClassId(PreTrainedTemplates, ClassId); - if (CharClass != NULL) - Results->Configs[ClassId] = ConfigId; - else - Results->Configs[ClassId] = ~0; - - if (Rating < Results->BestRating && - // Ensure that fragments do not affect best rating, class and config. - // This is needed so that at least one non-fragmented character is - // always present in the Results. - // TODO(daria): verify that this helps accuracy and does not - // hurt performance. - !unicharset.get_fragment(ClassId)) { - Results->BestRating = Rating; - Results->BestClass = ClassId; - Results->BestConfig = ConfigId; - } +void Classify::AddNewResult(ADAPT_RESULTS *results, + CLASS_ID class_id, + FLOAT32 rating, + int config_id, + int config2_id) { + ScoredClass *old_match = FindScoredUnichar(results, class_id); + ScoredClass match = {class_id, rating, config_id, config2_id}; + + if (rating > results->best_match.rating + matcher_bad_match_pad || + (old_match && rating >= old_match->rating)) + return; + + if (!unicharset.get_fragment(class_id)) + results->HasNonfragment = true; - /* if this is first rating for class, add to list of classes matched */ - if (OldRating == WORST_POSSIBLE_RATING) - Results->Classes[Results->NumMatches++] = ClassId; + if (class_id == NO_CLASS || + !ClassForClassId(PreTrainedTemplates, class_id)) + match.config = ~0; + + if (old_match) + old_match->rating = rating; + else + results->match[results->NumMatches++] = match; + + if (rating < results->best_match.rating && + // Ensure that fragments do not affect best rating, class and config. + // This is needed so that at least one non-fragmented character is + // always present in the results. + // TODO(daria): verify that this helps accuracy and does not + // hurt performance. + !unicharset.get_fragment(class_id)) { + results->best_match = match; } } /* AddNewResult */ @@ -1204,7 +1017,6 @@ void Classify::AddNewResult(ADAPT_RESULTS *Results, * - #AllConfigsOn mask that enables all configs * * @param Blob blob to be classified - * @param LineStats statistics for text line Blob is in * @param Templates built-in templates to classify against * @param Ambiguities array of class id's to match against * @param[out] Results place to put match results @@ -1213,7 +1025,6 @@ void Classify::AddNewResult(ADAPT_RESULTS *Results, * @note History: Tue Mar 12 19:40:36 1991, DSJ, Created. */ void Classify::AmbigClassifier(TBLOB *Blob, - LINE_STATS *LineStats, INT_TEMPLATES Templates, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results) { @@ -1225,10 +1036,8 @@ void Classify::AmbigClassifier(TBLOB *Blob, AmbigClassifierCalls++; - NumFeatures = GetCharNormFeatures (Blob, LineStats, - Templates, - IntFeatures, CharNormArray, - &(Results->BlobLength)); + NumFeatures = GetCharNormFeatures(Blob, Templates, IntFeatures, CharNormArray, + &(Results->BlobLength), NULL); if (NumFeatures <= 0) return; @@ -1238,18 +1047,21 @@ void Classify::AmbigClassifier(TBLOB *Blob, while (*Ambiguities >= 0) { ClassId = *Ambiguities; - SetCharNormMatch(); - IntegerMatcher (ClassForClassId (Templates, ClassId), - AllProtosOn, AllConfigsOn, - Results->BlobLength, NumFeatures, IntFeatures, - CharNormArray[ClassId], &IntResult, NO_DEBUG); + im_.SetCharNormMatch(classify_integer_matcher_multiplier); + im_.Match(ClassForClassId(Templates, ClassId), + AllProtosOn, AllConfigsOn, + Results->BlobLength, NumFeatures, IntFeatures, + CharNormArray[ClassId], &IntResult, + classify_adapt_feature_threshold, NO_DEBUG, + matcher_debug_separate_windows); if (matcher_debug_level >= 2) cprintf ("%s-%-2d %2.0f ", unicharset.id_to_unichar(ClassId), IntResult.Config, IntResult.Rating * 100.0); - AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config); + AddNewResult(Results, ClassId, IntResult.Rating, + IntResult.Config, IntResult.Config2); Ambiguities++; @@ -1270,8 +1082,11 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, ADAPT_CLASS* classes, int debug, int num_classes, + const TBOX& blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS* final_results) { + int top = blob_box.top(); + int bottom = blob_box.bottom(); for (int c = 0; c < num_classes; c++) { CLASS_ID class_id = results[c].Class; INT_RESULT_STRUCT& int_result = results[c].IMResult; @@ -1280,14 +1095,15 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, BIT_VECTOR configs = classes != NULL ? classes[class_id]->PermConfigs : AllConfigsOn; - IntegerMatcher(ClassForClassId(templates, class_id), - protos, configs, final_results->BlobLength, - num_features, features, norm_factors[class_id], - &int_result, debug); + im_.Match(ClassForClassId(templates, class_id), + protos, configs, final_results->BlobLength, + num_features, features, norm_factors[class_id], + &int_result, classify_adapt_feature_threshold, debug, + matcher_debug_separate_windows); // Compute class feature corrections. double miss_penalty = tessedit_class_miss_scale * int_result.FeatureMisses; - if (matcher_debug_level >= 2 || tord_display_ratings > 1) { + if (matcher_debug_level >= 2 || classify_debug_level > 1) { cprintf("%s-%-2d %2.1f(CP%2.1f, IM%2.1f + MP%2.1f) ", unicharset.id_to_unichar(class_id), int_result.Config, (int_result.Rating + miss_penalty) * 100.0, @@ -1296,10 +1112,27 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, if (c % 4 == 3) cprintf ("\n"); } + // Penalize non-alnums for being vertical misfits. + if (!unicharset.get_isalpha(class_id) && + !unicharset.get_isdigit(class_id) && + norm_factors[class_id] != 0 && classify_misfit_junk_penalty > 0.0) { + int min_bottom, max_bottom, min_top, max_top; + unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, + &min_top, &max_top); + if (classify_debug_level > 1) { + tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n", + top, min_top, max_top, bottom, min_bottom, max_bottom); + } + if (top < min_top || top > max_top || + bottom < min_bottom || bottom > max_bottom) { + miss_penalty += classify_misfit_junk_penalty; + } + } int_result.Rating += miss_penalty; if (int_result.Rating > WORST_POSSIBLE_RATING) int_result.Rating = WORST_POSSIBLE_RATING; - AddNewResult(final_results, class_id, int_result.Rating, int_result.Config); + AddNewResult(final_results, class_id, int_result.Rating, + int_result.Config, int_result.Config2); // Add unichars ambiguous with class_id with the same rating as class_id. if (use_definite_ambigs_for_classifier) { const UnicharIdVector *definite_ambigs = @@ -1307,31 +1140,34 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, int ambigs_size = (definite_ambigs == NULL) ? 0 : definite_ambigs->size(); for (int ambig = 0; ambig < ambigs_size; ++ambig) { UNICHAR_ID ambig_class_id = (*definite_ambigs)[ambig]; + // Do not include ambig_class_id if it has permanent adapted templates. + if (classes[class_id]->NumPermConfigs > 0) continue; + ScoredClass* ambig_match = + FindScoredUnichar(final_results, ambig_class_id); if (matcher_debug_level >= 3) { tprintf("class: %d definite ambig: %d rating: old %.4f new %.4f\n", class_id, ambig_class_id, - final_results->Ratings[ambig_class_id], int_result.Rating); + ambig_match ? ambig_match->rating : WORST_POSSIBLE_RATING, + int_result.Rating); } - if (final_results->Ratings[ambig_class_id] < WORST_POSSIBLE_RATING) { + if (ambig_match) { // ambig_class_id was already added to final_results, // so just need to modify the rating. - if (int_result.Rating < final_results->Ratings[ambig_class_id]) { - final_results->Ratings[ambig_class_id] = int_result.Rating; + if (int_result.Rating < ambig_match->rating) { + ambig_match->rating = int_result.Rating; } } else { - AddNewResult(final_results, ambig_class_id, - int_result.Rating, int_result.Config); + AddNewResult(final_results, ambig_class_id, int_result.Rating, + int_result.Config, int_result.Config2); } } } } - if (matcher_debug_level >= 2 || tord_display_ratings > 1) + if (matcher_debug_level >= 2 || classify_debug_level > 1) cprintf("\n"); } -} // namespace tesseract /*---------------------------------------------------------------------------*/ -namespace tesseract { /** * This routine extracts baseline normalized features * from the unknown character and matches them against the @@ -1342,7 +1178,6 @@ namespace tesseract { * - BaselineCutoffs expected num features for each class * * @param Blob blob to be classified - * @param LineStats statistics for text line Blob is in * @param Templates current set of adapted templates * @param Results place to put match results * @@ -1351,7 +1186,6 @@ namespace tesseract { * @note History: Tue Mar 12 19:38:03 1991, DSJ, Created. */ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, - LINE_STATS *LineStats, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) { int NumFeatures; @@ -1362,34 +1196,31 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, BaselineClassifierCalls++; - NumFeatures = GetBaselineFeatures (Blob, LineStats, - Templates->Templates, - IntFeatures, CharNormArray, - &(Results->BlobLength)); + NumFeatures = GetBaselineFeatures( + Blob, Templates->Templates, IntFeatures, CharNormArray, + &(Results->BlobLength)); if (NumFeatures <= 0) return NULL; - NumClasses = ClassPruner (Templates->Templates, NumFeatures, - IntFeatures, CharNormArray, - BaselineCutoffs, Results->CPResults, - matcher_debug_flags); + NumClasses = ClassPruner(Templates->Templates, NumFeatures, IntFeatures, + CharNormArray, BaselineCutoffs, Results->CPResults); NumBaselineClassesTried += NumClasses; - if (matcher_debug_level >= 2 || tord_display_ratings > 1) + if (matcher_debug_level >= 2 || classify_debug_level > 1) cprintf ("BL Matches = "); - SetBaseLineMatch(); + im_.SetBaseLineMatch(); MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray, Templates->Class, matcher_debug_flags, NumClasses, - Results->CPResults, Results); + Blob->bounding_box(), Results->CPResults, Results); - ClassId = Results->BestClass; + ClassId = Results->best_match.id; if (ClassId == NO_CLASS) return (NULL); /* this is a bug - maybe should return "" */ - return (Templates->Class[ClassId]->Config[Results->BestConfig].Perm); + return Templates->Class[ClassId]->Config[Results->best_match.config].Perm; } /* BaselineClassifier */ @@ -1401,7 +1232,6 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, * are added to Results. * * @param Blob blob to be classified - * @param LineStats statistics for text line Blob is in * @param Templates templates to classify unknown against * @param Results place to put match results * @@ -1414,7 +1244,6 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, * @note History: Tue Mar 12 16:02:52 1991, DSJ, Created. */ int Classify::CharNormClassifier(TBLOB *Blob, - LINE_STATS *LineStats, INT_TEMPLATES Templates, ADAPT_RESULTS *Results) { int NumFeatures; @@ -1424,26 +1253,22 @@ int Classify::CharNormClassifier(TBLOB *Blob, CharNormClassifierCalls++; - NumFeatures = GetCharNormFeatures(Blob, LineStats, - Templates, - IntFeatures, CharNormArray, - &(Results->BlobLength)); + NumFeatures = GetCharNormFeatures(Blob, Templates, IntFeatures, CharNormArray, + &(Results->BlobLength), NULL); if (NumFeatures <= 0) return 0; - NumClasses = ClassPruner(Templates, NumFeatures, - IntFeatures, CharNormArray, - CharNormCutoffs, Results->CPResults, - matcher_debug_flags); + NumClasses = ClassPruner(Templates, NumFeatures, IntFeatures, CharNormArray, + CharNormCutoffs, Results->CPResults); if (tessedit_single_match && NumClasses > 1) NumClasses = 1; NumCharNormClassesTried += NumClasses; - SetCharNormMatch(); + im_.SetCharNormMatch(classify_integer_matcher_multiplier); MasterMatcher(Templates, NumFeatures, IntFeatures, CharNormArray, NULL, matcher_debug_flags, NumClasses, - Results->CPResults, Results); + Blob->bounding_box(), Results->CPResults, Results); return NumFeatures; } /* CharNormClassifier */ @@ -1470,47 +1295,47 @@ void Classify::ClassifyAsNoise(ADAPT_RESULTS *Results) { Rating *= Rating; Rating /= 1.0 + Rating; - AddNewResult (Results, NO_CLASS, Rating, 0); + AddNewResult(Results, NO_CLASS, Rating, -1, -1); } /* ClassifyAsNoise */ } // namespace tesseract /*---------------------------------------------------------------------------*/ -/** - * This routine gets the ratings for the 2 specified classes - * from a global variable (CurrentRatings) and returns: - * - -1 if Rating1 < Rating2 - * - 0 if Rating1 = Rating2 - * - 1 if Rating1 > Rating2 - * - * @param arg1 - * @param arg2 classes whose ratings are to be compared - * - * Globals: - * - CurrentRatings contains actual ratings for each class - * - * @return Order of classes based on their ratings (see above). - * @note Exceptions: none - * @note History: Tue Mar 12 14:18:31 1991, DSJ, Created. - */ -int CompareCurrentRatings(const void *arg1, - const void *arg2) { - FLOAT32 Rating1, Rating2; - CLASS_ID *Class1 = (CLASS_ID *) arg1; - CLASS_ID *Class2 = (CLASS_ID *) arg2; - - Rating1 = CurrentRatings[*Class1]; - Rating2 = CurrentRatings[*Class2]; - - if (Rating1 < Rating2) - return (-1); - else if (Rating1 > Rating2) - return (1); - else - return (0); +// Return a pointer to the scored unichar in results, or NULL if not present. +ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) { + for (int i = 0; i < results->NumMatches; i++) { + if (results->match[i].id == id) + return &results->match[i]; + } + return NULL; +} + +// Retrieve the current rating for a unichar id if we have rated it, defaulting +// to WORST_POSSIBLE_RATING. +ScoredClass ScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) { + ScoredClass poor_result = {id, WORST_POSSIBLE_RATING, -1, -1}; + ScoredClass *entry = FindScoredUnichar(results, id); + return (entry == NULL) ? poor_result : *entry; +} + +// Compare character classes by rating as for qsort(3). +// For repeatability, use character class id as a tie-breaker. +int CompareByRating(const void *arg1, // ScoredClass *class1 + const void *arg2) { // ScoredClass *class2 + const ScoredClass *class1 = (const ScoredClass *)arg1; + const ScoredClass *class2 = (const ScoredClass *)arg2; -} /* CompareCurrentRatings */ + if (class1->rating < class2->rating) + return -1; + else if (class1->rating > class2->rating) + return 1; + if (class1->id < class2->id) + return -1; + else if (class1->id > class2->id) + return 1; + return 0; +} /*---------------------------------------------------------------------------*/ namespace tesseract { @@ -1523,17 +1348,16 @@ namespace tesseract { void Classify::ConvertMatchesToChoices(ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices) { assert(Choices != NULL); - int i; - CLASS_ID NextMatch; FLOAT32 Rating; FLOAT32 Certainty; BLOB_CHOICE_IT temp_it; bool contains_nonfrag = false; temp_it.set_to_list(Choices); int choices_length = 0; - for (i = 0; i < Results->NumMatches; i++) { - NextMatch = Results->Classes[i]; - bool current_is_frag = (unicharset.get_fragment(NextMatch) != NULL); + + for (int i = 0; i < Results->NumMatches; i++) { + ScoredClass next = Results->match[i]; + bool current_is_frag = (unicharset.get_fragment(next.id) != NULL); if (temp_it.length()+1 == MAX_MATCHES && !contains_nonfrag && current_is_frag) { continue; // look for a non-fragmented character to fill the @@ -1547,13 +1371,13 @@ void Classify::ConvertMatchesToChoices(ADAPT_RESULTS *Results, Certainty = -20; Rating = 100; // should be -certainty * real_blob_length } else { - Rating = Certainty = Results->Ratings[NextMatch]; + Rating = Certainty = next.rating; Rating *= rating_scale * Results->BlobLength; - Certainty *= -certainty_scale; + Certainty *= -(getDict().certainty_scale); } - temp_it.add_to_end(new BLOB_CHOICE(NextMatch, Rating, Certainty, - Results->Configs[NextMatch], - unicharset.get_script(NextMatch))); + temp_it.add_to_end(new BLOB_CHOICE(next.id, Rating, Certainty, + next.config, next.config2, + unicharset.get_script(next.id))); contains_nonfrag |= !current_is_frag; // update contains_nonfrag choices_length++; if (choices_length >= MAX_MATCHES) break; @@ -1567,7 +1391,6 @@ void Classify::ConvertMatchesToChoices(ADAPT_RESULTS *Results, /** * * @param Blob blob whose classification is being debugged - * @param LineStats statistics for text line blob is in * @param Results results of match being debugged * * Globals: none @@ -1576,12 +1399,11 @@ void Classify::ConvertMatchesToChoices(ADAPT_RESULTS *Results, * @note History: Wed Mar 13 16:44:41 1991, DSJ, Created. */ void Classify::DebugAdaptiveClassifier(TBLOB *Blob, - LINE_STATS *LineStats, ADAPT_RESULTS *Results) { const char *Prompt = "Left-click in IntegerMatch Window to continue or right click to debug..."; const char *DebugMode = "All Templates"; - CLASS_ID LastClass = Results->BestClass; + CLASS_ID LastClass = Results->best_match.id; CLASS_ID ClassId; BOOL8 AdaptiveOn = TRUE; BOOL8 PreTrainedOn = TRUE; @@ -1589,7 +1411,7 @@ void Classify::DebugAdaptiveClassifier(TBLOB *Blob, ShowMatchDisplay(); cprintf ("\nDebugging class = %s (%s) ...\n", unicharset.id_to_unichar(LastClass), DebugMode); - ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn); + ShowBestMatchFor(Blob, LastClass, AdaptiveOn, PreTrainedOn); UpdateMatchDisplay(); while ((ClassId = GetClassToDebug (Prompt)) != 0) { @@ -1623,7 +1445,7 @@ void Classify::DebugAdaptiveClassifier(TBLOB *Blob, ShowMatchDisplay(); cprintf ("\nDebugging class = %d = %s (%s) ...\n", LastClass, unicharset.id_to_unichar(LastClass), DebugMode); - ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn); + ShowBestMatchFor(Blob, LastClass, AdaptiveOn, PreTrainedOn); UpdateMatchDisplay(); } } /* DebugAdaptiveClassifier */ @@ -1642,7 +1464,6 @@ void Classify::DebugAdaptiveClassifier(TBLOB *Blob, * of these classifications are merged together into Results. * * @param Blob blob to be classified - * @param LineStats statistics for text line Blob is in * @param Results place to put match results * * Globals: @@ -1654,26 +1475,25 @@ void Classify::DebugAdaptiveClassifier(TBLOB *Blob, * @note History: Tue Mar 12 08:50:11 1991, DSJ, Created. */ void Classify::DoAdaptiveMatch(TBLOB *Blob, - LINE_STATS *LineStats, - ADAPT_RESULTS *Results) { + ADAPT_RESULTS *Results) { UNICHAR_ID *Ambiguities; AdaptiveMatcherCalls++; InitIntFX(); - if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min - || tess_cn_matching) { - CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results); + if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min || + tess_cn_matching) { + CharNormClassifier(Blob, PreTrainedTemplates, Results); } else { - Ambiguities = BaselineClassifier(Blob, LineStats, - AdaptedTemplates, Results); - if ((Results->NumMatches > 0 && MarginalMatch (Results->BestRating) - && !tess_bn_matching) || Results->NumMatches == 0) { - CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results); + Ambiguities = BaselineClassifier(Blob, AdaptedTemplates, Results); + if ((Results->NumMatches > 0 && + MarginalMatch (Results->best_match.rating) && + !tess_bn_matching) || + Results->NumMatches == 0) { + CharNormClassifier(Blob, PreTrainedTemplates, Results); } else if (Ambiguities && *Ambiguities >= 0) { AmbigClassifier(Blob, - LineStats, PreTrainedTemplates, Ambiguities, Results); @@ -1703,7 +1523,6 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, * desired thresholds. * * @param Word current word - * @param LineStats line stats for row word is in * @param BestChoice best choice for current word with context * @param BestRawChoice best choice for current word without context * @param[out] Thresholds array of thresholds to be filled in @@ -1718,12 +1537,10 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, * @note Exceptions: none * @note History: Fri May 31 09:22:08 1991, DSJ, Created. */ -void -Classify::GetAdaptThresholds (TWERD * Word, - LINE_STATS * LineStats, - const WERD_CHOICE& BestChoice, - const WERD_CHOICE& BestRawChoice, - FLOAT32 Thresholds[]) { +void Classify::GetAdaptThresholds(TWERD * Word, + const WERD_CHOICE& BestChoice, + const WERD_CHOICE& BestRawChoice, + FLOAT32 Thresholds[]) { TBLOB *Blob; const char* BestChoice_string = BestChoice.unichar_string().string(); const char* BestChoice_lengths = BestChoice.unichar_lengths().string(); @@ -1736,8 +1553,7 @@ Classify::GetAdaptThresholds (TWERD * Word, matcher_good_threshold, matcher_rating_margin, Thresholds); - } - else { /* old rules */ + } else { /* old rules */ for (Blob = Word->blobs; Blob != NULL; Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++), @@ -1751,15 +1567,12 @@ Classify::GetAdaptThresholds (TWERD * Word, needed to create a template which will correct the error with some margin. However, don't waste time trying to make templates which are too tight. */ - *Thresholds = GetBestRatingFor (Blob, LineStats, - unicharset.unichar_to_id( - BestChoice_string, - *BestChoice_lengths)); + *Thresholds = GetBestRatingFor( + Blob, unicharset.unichar_to_id(BestChoice_string, + *BestChoice_lengths)); *Thresholds *= (1.0 - matcher_rating_margin); - if (*Thresholds > matcher_good_threshold) - *Thresholds = matcher_good_threshold; - if (*Thresholds < matcher_perfect_threshold) - *Thresholds = matcher_perfect_threshold; + *Thresholds = ClipToRange( + *Thresholds, matcher_perfect_threshold, matcher_good_threshold); } } } /* GetAdaptThresholds */ @@ -1771,7 +1584,6 @@ Classify::GetAdaptThresholds (TWERD * Word, * class which are potential ambiguities. * * @param Blob blob to get classification ambiguities for - * @param LineStats statistics for text line blob is in * @param CorrectClass correct class for Blob * * Globals: @@ -1783,39 +1595,33 @@ Classify::GetAdaptThresholds (TWERD * Word, * @note History: Fri Mar 15 08:08:22 1991, DSJ, Created. */ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID CorrectClass) { ADAPT_RESULTS *Results = new ADAPT_RESULTS(); UNICHAR_ID *Ambiguities; int i; - EnterClassifyMode; - Results->Initialize(); - CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results); + CharNormClassifier(Blob, PreTrainedTemplates, Results); RemoveBadMatches(Results); - - /* save ratings in a global so that CompareCurrentRatings() can see them */ - CurrentRatings = Results->Ratings; - qsort ((void *) (Results->Classes), Results->NumMatches, - sizeof (CLASS_ID), CompareCurrentRatings); + qsort((void *)Results->match, Results->NumMatches, + sizeof(ScoredClass), CompareByRating); /* copy the class id's into an string of ambiguities - don't copy if the correct class is the only class id matched */ Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) * (Results->NumMatches + 1)); if (Results->NumMatches > 1 || - (Results->NumMatches == 1 && Results->Classes[0] != CorrectClass)) { + (Results->NumMatches == 1 && Results->match[0].id != CorrectClass)) { for (i = 0; i < Results->NumMatches; i++) - Ambiguities[i] = Results->Classes[i]; + Ambiguities[i] = Results->match[i].id; Ambiguities[i] = -1; - } - else + } else { Ambiguities[0] = -1; + } delete Results; - return (Ambiguities); + return Ambiguities; } /* GetAmbiguities */ /*---------------------------------------------------------------------------*/ @@ -1832,7 +1638,6 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, * in baseline normalized units is also returned. * * @param Blob blob to extract features from - * @param LineStats statistics about text row blob is in * @param Templates used to compute char norm adjustments * @param IntFeatures array to fill with integer features * @param CharNormArray array to fill with dummy char norm adjustments @@ -1844,21 +1649,21 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, * @note Exceptions: none * @note History: Tue Mar 12 17:55:18 1991, DSJ, Created. */ -int GetBaselineFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength) { +int Classify::GetBaselineFeatures(TBLOB *Blob, + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength) { FEATURE_SET Features; int NumFeatures; - if (classify_enable_int_fx) - return (GetIntBaselineFeatures (Blob, LineStats, Templates, - IntFeatures, CharNormArray, BlobLength)); + if (classify_enable_int_fx) { + return GetIntBaselineFeatures(Blob, Templates, + IntFeatures, CharNormArray, BlobLength); + } classify_norm_method.set_value(baseline); - Features = ExtractPicoFeatures (Blob, LineStats); + Features = ExtractPicoFeatures(Blob); NumFeatures = Features->NumFeatures; *BlobLength = NumFeatures; @@ -1880,7 +1685,6 @@ int GetBaselineFeatures(TBLOB *Blob, * rating found. * * @param Blob blob to get best rating for - * @param LineStats statistics about text line blob is in * @param ClassId class blob is to be compared to * * Globals: @@ -1894,7 +1698,6 @@ int GetBaselineFeatures(TBLOB *Blob, * @note History: Tue Apr 9 09:01:24 1991, DSJ, Created. */ FLOAT32 Classify::GetBestRatingFor(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID ClassId) { int NumCNFeatures, NumBLFeatures; INT_FEATURE_ARRAY CNFeatures, BLFeatures; @@ -1904,35 +1707,39 @@ FLOAT32 Classify::GetBestRatingFor(TBLOB *Blob, CNResult.Rating = BLResult.Rating = 1.0; if (!LegalClassId(ClassId)) - return (1.0); + return 1.0; uinT8 *CNAdjust = new uinT8[MAX_NUM_CLASSES]; uinT8 *BLAdjust = new uinT8[MAX_NUM_CLASSES]; if (!UnusedClassIdIn(PreTrainedTemplates, ClassId)) { - NumCNFeatures = GetCharNormFeatures(Blob, LineStats, - PreTrainedTemplates, - CNFeatures, CNAdjust, &BlobLength); + NumCNFeatures = GetCharNormFeatures(Blob, PreTrainedTemplates, + CNFeatures, CNAdjust, &BlobLength, + NULL); if (NumCNFeatures > 0) { - SetCharNormMatch(); - IntegerMatcher(ClassForClassId(PreTrainedTemplates, ClassId), - AllProtosOn, AllConfigsOn, - BlobLength, NumCNFeatures, CNFeatures, - CNAdjust[ClassId], &CNResult, NO_DEBUG); + im_.SetCharNormMatch(classify_integer_matcher_multiplier); + im_.Match(ClassForClassId(PreTrainedTemplates, ClassId), + AllProtosOn, AllConfigsOn, + BlobLength, NumCNFeatures, CNFeatures, + CNAdjust[ClassId], &CNResult, + classify_adapt_feature_threshold, NO_DEBUG, + matcher_debug_separate_windows); } } if (!UnusedClassIdIn(AdaptedTemplates->Templates, ClassId)) { - NumBLFeatures = GetBaselineFeatures(Blob, LineStats, + NumBLFeatures = GetBaselineFeatures(Blob, AdaptedTemplates->Templates, BLFeatures, BLAdjust, &BlobLength); if (NumBLFeatures > 0) { - SetBaseLineMatch(); - IntegerMatcher(ClassForClassId(AdaptedTemplates->Templates, ClassId), - AdaptedTemplates->Class[ClassId]->PermProtos, - AdaptedTemplates->Class[ClassId]->PermConfigs, - BlobLength, NumBLFeatures, BLFeatures, - BLAdjust[ClassId], &BLResult, NO_DEBUG); + im_.SetBaseLineMatch(); + im_.Match(ClassForClassId(AdaptedTemplates->Templates, ClassId), + AdaptedTemplates->Class[ClassId]->PermProtos, + AdaptedTemplates->Class[ClassId]->PermConfigs, + BlobLength, NumBLFeatures, BLFeatures, + BLAdjust[ClassId], &BLResult, + classify_adapt_feature_threshold, NO_DEBUG, + matcher_debug_separate_windows); } } @@ -1940,7 +1747,7 @@ FLOAT32 Classify::GetBestRatingFor(TBLOB *Blob, delete[] CNAdjust; delete[] BLAdjust; - return (MIN (BLResult.Rating, CNResult.Rating)); + return MIN(BLResult.Rating, CNResult.Rating); } /* GetBestRatingFor */ /*---------------------------------------------------------------------------*/ @@ -1955,7 +1762,6 @@ FLOAT32 Classify::GetBestRatingFor(TBLOB *Blob, * in baseline normalized units is also returned. * * @param Blob blob to extract features from - * @param LineStats statistics about text row blob is in * @param Templates used to compute char norm adjustments * @param IntFeatures array to fill with integer features * @param CharNormArray array to fill with char norm adjustments @@ -1968,13 +1774,13 @@ FLOAT32 Classify::GetBestRatingFor(TBLOB *Blob, * @note History: Tue Mar 12 17:55:18 1991, DSJ, Created. */ int Classify::GetCharNormFeatures(TBLOB *Blob, - LINE_STATS *LineStats, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength) { - return (GetIntCharNormFeatures (Blob, LineStats, Templates, - IntFeatures, CharNormArray, BlobLength)); + inT32 *BlobLength, + inT32 *FeatureOutlineIndex) { + return GetIntCharNormFeatures(Blob, Templates, IntFeatures, CharNormArray, + BlobLength, FeatureOutlineIndex); } /* GetCharNormFeatures */ /*---------------------------------------------------------------------------*/ @@ -1988,7 +1794,6 @@ int Classify::GetCharNormFeatures(TBLOB *Blob, * array provided by the caller. * * @param Blob blob to extract features from - * @param LineStats statistics about text row blob is in * @param Templates used to compute char norm adjustments * @param IntFeatures array to fill with integer features * @param CharNormArray array to fill with dummy char norm adjustments @@ -2004,23 +1809,22 @@ int Classify::GetCharNormFeatures(TBLOB *Blob, * @note Exceptions: none * @note History: Tue May 28 10:40:52 1991, DSJ, Created. */ -int GetIntBaselineFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength) { +int Classify::GetIntBaselineFeatures(TBLOB *Blob, + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength) { register INT_FEATURE Src, Dest, End; if (!FeaturesHaveBeenExtracted) { - FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures, - CharNormFeatures, &FXInfo); + FeaturesOK = ExtractIntFeat(Blob, denorm_, BaselineFeatures, + CharNormFeatures, &FXInfo); FeaturesHaveBeenExtracted = TRUE; } if (!FeaturesOK) { *BlobLength = FXInfo.NumBL; - return (0); + return 0; } for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures; @@ -2029,9 +1833,13 @@ int GetIntBaselineFeatures(TBLOB *Blob, ClearCharNormArray(Templates, CharNormArray); *BlobLength = FXInfo.NumBL; - return (FXInfo.NumBL); + return FXInfo.NumBL; } /* GetIntBaselineFeatures */ +void Classify::ResetFeaturesHaveBeenExtracted() { + FeaturesHaveBeenExtracted = FALSE; +} + /*---------------------------------------------------------------------------*/ /** * This routine calls the integer (Hardware) feature @@ -2045,7 +1853,6 @@ int GetIntBaselineFeatures(TBLOB *Blob, * array provided by the caller. * * @param Blob blob to extract features from - * @param LineStats statistics about text row blob is in * @param Templates used to compute char norm adjustments * @param IntFeatures array to fill with integer features * @param CharNormArray array to fill with dummy char norm adjustments @@ -2062,18 +1869,20 @@ int GetIntBaselineFeatures(TBLOB *Blob, * @note History: Tue May 28 10:40:52 1991, DSJ, Created. */ int Classify::GetIntCharNormFeatures(TBLOB *Blob, - LINE_STATS *LineStats, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength) { + inT32 *BlobLength, + inT32 *FeatureOutlineArray) { register INT_FEATURE Src, Dest, End; FEATURE NormFeature; FLOAT32 Baseline, Scale; + inT32 FeatureOutlineIndex[MAX_NUM_INT_FEATURES]; if (!FeaturesHaveBeenExtracted) { - FeaturesOK = ExtractIntFeat(Blob, BaselineFeatures, - CharNormFeatures, &FXInfo); + FeaturesOK = ExtractIntFeat(Blob, denorm_, BaselineFeatures, + CharNormFeatures, &FXInfo, + FeatureOutlineIndex); FeaturesHaveBeenExtracted = TRUE; } @@ -2085,10 +1894,13 @@ int Classify::GetIntCharNormFeatures(TBLOB *Blob, for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures; Src < End; *Dest++ = *Src++); + for (int i = 0; FeatureOutlineArray && i < FXInfo.NumCN; ++i) { + FeatureOutlineArray[i] = FeatureOutlineIndex[i]; + } NormFeature = NewFeature(&CharNormDesc); - Baseline = BaselineAt(LineStats, FXInfo.Xmean); - Scale = ComputeScaleFactor(LineStats); + Baseline = BASELINE_OFFSET; + Scale = MF_SCALE_FACTOR; NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale; NormFeature->Params[CharNormLength] = FXInfo.Length * Scale / LENGTH_COMPRESSION; @@ -2110,21 +1922,16 @@ int Classify::GetIntCharNormFeatures(TBLOB *Blob, * @param Features features describing model for new config * @param FloatFeatures floating-pt representation of features * - * Globals: - * - AllProtosOn mask to enable all protos - * - AllConfigsOff mask to disable all configs - * - TempProtoMask defines old protos matched in new config - * * @return The id of the new config created, a negative integer in * case of error. * @note Exceptions: none * @note History: Fri Mar 15 08:49:46 1991, DSJ, Created. */ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_SET FloatFeatures) { + CLASS_ID ClassId, + int NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_SET FloatFeatures) { INT_CLASS IClass; ADAPT_CLASS Class; PROTO_ID OldProtos[MAX_NUM_PROTOS]; @@ -2155,18 +1962,21 @@ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, OldMaxProtoId = IClass->NumProtos - 1; - NumOldProtos = FindGoodProtos(IClass, AllProtosOn, AllConfigsOff, - BlobLength, NumFeatures, Features, - OldProtos, debug_level); + NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff, + BlobLength, NumFeatures, Features, + OldProtos, classify_adapt_proto_threshold, + debug_level); MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS); zero_all_bits(TempProtoMask, MaskSize); for (i = 0; i < NumOldProtos; i++) SET_BIT(TempProtoMask, OldProtos[i]); - NumBadFeatures = FindBadFeatures(IClass, TempProtoMask, AllConfigsOn, - BlobLength, NumFeatures, Features, - BadFeatures, debug_level); + NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn, + BlobLength, NumFeatures, Features, + BadFeatures, + classify_adapt_feature_threshold, + debug_level); MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures, IClass, Class, TempProtoMask); @@ -2189,7 +1999,6 @@ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, return ConfigId; } /* MakeNewTemporaryConfig */ -} // namespace tesseract /*---------------------------------------------------------------------------*/ /** @@ -2212,12 +2021,12 @@ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, * Exceptions: none * History: Fri Mar 15 11:39:38 1991, DSJ, Created. */ -PROTO_ID -MakeNewTempProtos(FEATURE_SET Features, - int NumBadFeat, - FEATURE_ID BadFeat[], - INT_CLASS IClass, - ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) { +PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, + int NumBadFeat, + FEATURE_ID BadFeat[], + INT_CLASS IClass, + ADAPT_CLASS Class, + BIT_VECTOR TempProtoMask) { FEATURE_ID *ProtoStart; FEATURE_ID *ProtoEnd; FEATURE_ID *LastBad; @@ -2280,7 +2089,8 @@ MakeNewTempProtos(FEATURE_SET Features, SET_BIT(TempProtoMask, Pid); ConvertProto(Proto, Pid, IClass); - AddProtoToProtoPruner(Proto, Pid, IClass); + AddProtoToProtoPruner(Proto, Pid, IClass, + classify_learning_debug_level >= 2); Class->TempProtos = push(Class->TempProtos, TempProto); } @@ -2288,14 +2098,12 @@ MakeNewTempProtos(FEATURE_SET Features, } /* MakeNewTempProtos */ /*---------------------------------------------------------------------------*/ -namespace tesseract { /** * * @param Templates current set of adaptive templates * @param ClassId class containing config to be made permanent * @param ConfigId config to be made permanent * @param Blob current blob being adapted to - * @param LineStats statistics about text line Blob is in * * Globals: none * @@ -2305,8 +2113,7 @@ namespace tesseract { void Classify::MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, - TBLOB *Blob, - LINE_STATS *LineStats) { + TBLOB *Blob) { UNICHAR_ID *Ambigs; TEMP_CONFIG Config; ADAPT_CLASS Class; @@ -2327,7 +2134,7 @@ void Classify::MakePermanent(ADAPT_TEMPLATES Templates, MakeTempProtoPerm); FreeTempConfig(Config); - Ambigs = GetAmbiguities(Blob, LineStats, ClassId); + Ambigs = GetAmbiguities(Blob, ClassId); PermConfigFor(Class, ConfigId) = Ambigs; if (classify_learning_debug_level >= 1) { @@ -2380,58 +2187,6 @@ int MakeTempProtoPerm(void *item1, void *item2) { return TRUE; } /* MakeTempProtoPerm */ -/*---------------------------------------------------------------------------*/ -/** - * This routine returns the number of blobs in Word. - * - * @param Word word to count blobs in - * - * Globals: none - * - * @return Number of blobs in Word. - * @note Exceptions: none - * @note History: Thu Mar 14 08:30:27 1991, DSJ, Created. - */ -int NumBlobsIn(TWERD *Word) { - register TBLOB *Blob; - register int NumBlobs; - - if (Word == NULL) - return (0); - - for (Blob = Word->blobs, NumBlobs = 0; - Blob != NULL; Blob = Blob->next, NumBlobs++); - - return (NumBlobs); - -} /* NumBlobsIn */ - -/*---------------------------------------------------------------------------*/ -/** - * This routine returns the number of OUTER outlines - * in Blob. - * - * @param Blob blob to count outlines in - * - * Globals: none - * @return Number of outer outlines in Blob. - * @note Exceptions: none - * @note History: Mon Jun 10 15:46:20 1991, DSJ, Created. - */ -int NumOutlinesInBlob(TBLOB *Blob) { - register TESSLINE *Outline; - register int NumOutlines; - - if (Blob == NULL) - return (0); - - for (Outline = Blob->outlines, NumOutlines = 0; - Outline != NULL; Outline = Outline->next, NumOutlines++); - - return (NumOutlines); - -} /* NumOutlinesInBlob */ - /*---------------------------------------------------------------------------*/ namespace tesseract { /** @@ -2448,9 +2203,9 @@ namespace tesseract { void Classify::PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) { for (int i = 0; i < Results->NumMatches; ++i) { cprintf("%s(%d) %.2f ", - unicharset.debug_str(Results->Classes[i]).string(), - Results->Classes[i], - Results->Ratings[Results->Classes[i]] * 100.0); + unicharset.debug_str(Results->match[i].id).string(), + Results->match[i].id, + Results->match[i].rating * 100.0); } printf("\n"); } /* PrintAdaptiveMatchResults */ @@ -2473,43 +2228,44 @@ void Classify::PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) { */ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) { int Next, NextGood; - FLOAT32 *Rating = Results->Ratings; - CLASS_ID *Match = Results->Classes; FLOAT32 BadMatchThreshold; static const char* romans = "i v x I V X"; - BadMatchThreshold = Results->BestRating + matcher_bad_match_pad; + BadMatchThreshold = Results->best_match.rating + matcher_bad_match_pad; - if (bln_numericmode) { + if (classify_bln_numeric_mode) { UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ? unicharset.unichar_to_id("1") : -1; UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ? unicharset.unichar_to_id("0") : -1; + ScoredClass scored_one = ScoredUnichar(Results, unichar_id_one); + ScoredClass scored_zero = ScoredUnichar(Results, unichar_id_zero); + for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { - if (Rating[Match[Next]] <= BadMatchThreshold) { - if (!unicharset.get_isalpha(Match[Next]) || - strstr(romans, unicharset.id_to_unichar(Match[Next])) != NULL) { - Match[NextGood++] = Match[Next]; - } else if (unichar_id_one >= 0 && unicharset.eq(Match[Next], "l") && - Rating[unichar_id_one] >= BadMatchThreshold) { - Match[NextGood++] = unichar_id_one; - Rating[unichar_id_one] = Rating[unicharset.unichar_to_id("l")]; - } else if (unichar_id_zero >= 0 && unicharset.eq(Match[Next], "O") && - Rating[unichar_id_zero] >= BadMatchThreshold) { - Match[NextGood++] = unichar_id_zero; - Rating[unichar_id_zero] = Rating[unicharset.unichar_to_id("O")]; + if (Results->match[Next].rating <= BadMatchThreshold) { + ScoredClass match = Results->match[Next]; + if (!unicharset.get_isalpha(match.id) || + strstr(romans, unicharset.id_to_unichar(match.id)) != NULL) { + Results->match[NextGood++] = Results->match[Next]; + } else if (unicharset.eq(match.id, "l") && + scored_one.rating >= BadMatchThreshold) { + Results->match[NextGood] = scored_one; + Results->match[NextGood].rating = match.rating; + NextGood++; + } else if (unicharset.eq(match.id, "O") && + scored_zero.rating >= BadMatchThreshold) { + Results->match[NextGood] = scored_zero; + Results->match[NextGood].rating = match.rating; + NextGood++; } } } - } - else { + } else { for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { - if (Rating[Match[Next]] <= BadMatchThreshold) - Match[NextGood++] = Match[Next]; + if (Results->match[Next].rating <= BadMatchThreshold) + Results->match[NextGood++] = Results->match[Next]; } } - Results->NumMatches = NextGood; - } /* RemoveBadMatches */ /*----------------------------------------------------------------------------*/ @@ -2532,7 +2288,6 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { int Next, NextGood; int punc_count; /*no of garbage characters */ int digit_count; - CLASS_ID *Match = Results->Classes; /*garbage characters */ static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^"; static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9"; @@ -2540,27 +2295,23 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { punc_count = 0; digit_count = 0; for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { - if (strstr (punc_chars, - unicharset.id_to_unichar(Match[Next])) == NULL) { - if (strstr (digit_chars, - unicharset.id_to_unichar(Match[Next])) == NULL) { - Match[NextGood++] = Match[Next]; - } - else { + ScoredClass match = Results->match[Next]; + if (strstr(punc_chars, unicharset.id_to_unichar(match.id)) != NULL) { + if (punc_count < 2) + Results->match[NextGood++] = match; + punc_count++; + } else { + if (strstr(digit_chars, unicharset.id_to_unichar(match.id)) != NULL) { if (digit_count < 1) - Match[NextGood++] = Match[Next]; + Results->match[NextGood++] = match; digit_count++; + } else { + Results->match[NextGood++] = match; } } - else { - if (punc_count < 2) - Match[NextGood++] = Match[Next]; - punc_count++; /*count them */ - } } Results->NumMatches = NextGood; } /* RemoveExtraPuncs */ -} // namespace tesseract /*---------------------------------------------------------------------------*/ /** @@ -2576,29 +2327,21 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { * @note Exceptions: none * @note History: Tue Apr 9 08:33:13 1991, DSJ, Created. */ -void SetAdaptiveThreshold(FLOAT32 Threshold) { - if (Threshold == matcher_good_threshold) { - /* the blob was probably classified correctly - use the default rating - threshold */ - SetProtoThresh (0.9); - SetFeatureThresh (0.9); - } - else { - /* the blob was probably incorrectly classified */ - SetProtoThresh (1.0 - Threshold); - SetFeatureThresh (1.0 - Threshold); - } +void Classify::SetAdaptiveThreshold(FLOAT32 Threshold) { + Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold); + classify_adapt_proto_threshold.set_value( + ClipToRange(255 * Threshold, 0, 255)); + classify_adapt_feature_threshold.set_value( + ClipToRange(255 * Threshold, 0, 255)); } /* SetAdaptiveThreshold */ /*---------------------------------------------------------------------------*/ -namespace tesseract { /** * This routine compares Blob to both sets of templates * (adaptive and pre-trained) and then displays debug * information for the config which matched best. * * @param Blob blob to show best matching config for - * @param LineStats statistics for text line Blob is in * @param ClassId class whose configs are to be searched * @param AdaptiveOn TRUE if adaptive configs are enabled * @param PreTrainedOn TRUE if pretrained configs are enabled @@ -2613,7 +2356,6 @@ namespace tesseract { * @note History: Fri Mar 22 08:43:52 1991, DSJ, Created. */ void Classify::ShowBestMatchFor(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID ClassId, BOOL8 AdaptiveOn, BOOL8 PreTrainedOn) { @@ -2641,18 +2383,18 @@ void Classify::ShowBestMatchFor(TBLOB *Blob, cprintf ("No built-in templates for class %d = %s\n", ClassId, unicharset.id_to_unichar(ClassId)); else { - NumCNFeatures = GetCharNormFeatures (Blob, LineStats, - PreTrainedTemplates, - CNFeatures, CNAdjust, - &BlobLength); + NumCNFeatures = GetCharNormFeatures( + Blob, PreTrainedTemplates, CNFeatures, CNAdjust, &BlobLength, NULL); if (NumCNFeatures <= 0) cprintf ("Illegal blob (char norm features)!\n"); else { - SetCharNormMatch(); - IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), - AllProtosOn, AllConfigsOn, - BlobLength, NumCNFeatures, CNFeatures, - CNAdjust[ClassId], &CNResult, NO_DEBUG); + im_.SetCharNormMatch(classify_integer_matcher_multiplier); + im_.Match(ClassForClassId (PreTrainedTemplates, ClassId), + AllProtosOn, AllConfigsOn, + BlobLength, NumCNFeatures, CNFeatures, + CNAdjust[ClassId], &CNResult, + classify_adapt_feature_threshold, NO_DEBUG, + matcher_debug_separate_windows); cprintf ("Best built-in template match is config %2d (%4.1f) (cn=%d)\n", CNResult.Config, CNResult.Rating * 100.0, CNAdjust[ClassId]); @@ -2665,21 +2407,21 @@ void Classify::ShowBestMatchFor(TBLOB *Blob, cprintf ("No AD templates for class %d = %s\n", ClassId, unicharset.id_to_unichar(ClassId)); else { - NumBLFeatures = GetBaselineFeatures (Blob, LineStats, - AdaptedTemplates->Templates, - BLFeatures, BLAdjust, - &BlobLength); + NumBLFeatures = GetBaselineFeatures(Blob, + AdaptedTemplates->Templates, + BLFeatures, BLAdjust, + &BlobLength); if (NumBLFeatures <= 0) cprintf ("Illegal blob (baseline features)!\n"); else { - SetBaseLineMatch(); - IntegerMatcher (ClassForClassId - (AdaptedTemplates->Templates, ClassId), - AllProtosOn, AllConfigsOn, - // AdaptedTemplates->Class[ClassId]->PermProtos, - // AdaptedTemplates->Class[ClassId]->PermConfigs, - BlobLength, NumBLFeatures, BLFeatures, - BLAdjust[ClassId], &BLResult, NO_DEBUG); + im_.SetBaseLineMatch(); + im_.Match(ClassForClassId + (AdaptedTemplates->Templates, ClassId), + AllProtosOn, AllConfigsOn, + BlobLength, NumBLFeatures, BLFeatures, + BLAdjust[ClassId], &BLResult, + classify_adapt_feature_threshold, NO_DEBUG, + matcher_debug_separate_windows); #ifndef SECURE_NAMES ADAPT_CLASS Class = AdaptedTemplates->Class[ClassId]; @@ -2702,13 +2444,14 @@ void Classify::ShowBestMatchFor(TBLOB *Blob, } classify_norm_method.set_value(baseline); - SetBaseLineMatch(); - IntegerMatcher (ClassForClassId (AdaptedTemplates->Templates, ClassId), - AllProtosOn, - // AdaptedTemplates->Class[ClassId]->PermProtos, - (BIT_VECTOR) & ConfigMask, - BlobLength, NumBLFeatures, BLFeatures, - BLAdjust[ClassId], &BLResult, matcher_debug_flags); + im_.SetBaseLineMatch(); + im_.Match(ClassForClassId(AdaptedTemplates->Templates, ClassId), + AllProtosOn, (BIT_VECTOR) &ConfigMask, + BlobLength, NumBLFeatures, BLFeatures, + BLAdjust[ClassId], &BLResult, + classify_adapt_feature_threshold, + matcher_debug_flags, + matcher_debug_separate_windows); cprintf ("Adaptive template match for config %2d is %4.1f\n", BLResult.Config, BLResult.Rating * 100.0); } @@ -2716,15 +2459,85 @@ void Classify::ShowBestMatchFor(TBLOB *Blob, ConfigMask = 1 << CNResult.Config; classify_norm_method.set_value(character); - SetCharNormMatch(); - //xiaofan - IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), AllProtosOn, (BIT_VECTOR) & ConfigMask, - BlobLength, NumCNFeatures, CNFeatures, - CNAdjust[ClassId], &CNResult, matcher_debug_flags); + im_.SetCharNormMatch(classify_integer_matcher_multiplier); + im_.Match(ClassForClassId (PreTrainedTemplates, ClassId), + AllProtosOn, (BIT_VECTOR) & ConfigMask, + BlobLength, NumCNFeatures, CNFeatures, + CNAdjust[ClassId], &CNResult, + classify_adapt_feature_threshold, + matcher_debug_flags, + matcher_debug_separate_windows); } // Clean up. delete[] CNAdjust; delete[] BLAdjust; } /* ShowBestMatchFor */ + +// Returns true if the given TEMP_CONFIG is good enough to make it +// a permanent config. +bool Classify::TempConfigReliable(CLASS_ID class_id, + const TEMP_CONFIG &config) { + if (classify_learning_debug_level >= 1) { + tprintf("NumTimesSeen for config of %s is %d\n", + getDict().getUnicharset().debug_str(class_id).string(), + config->NumTimesSeen); + } + if (config->NumTimesSeen >= matcher_sufficient_examples_for_prototyping) { + return true; + } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) { + return false; + } else if (use_ambigs_for_adaption) { + // Go through the ambigs vector and see whether we have already seen + // enough times all the characters represented by the ambigs vector. + const UnicharIdVector *ambigs = + getDict().getUnicharAmbigs().AmbigsForAdaption(class_id); + int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size(); + for (int ambig = 0; ambig < ambigs_size; ++ambig) { + ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]]; + assert(ambig_class != NULL); + if (ambig_class->NumPermConfigs == 0 && + ambig_class->MaxNumTimesSeen < + matcher_min_examples_for_prototyping) { + if (classify_learning_debug_level >= 1) { + tprintf("Ambig %s has not been seen enough times," + " not making config for %s permanent\n", + getDict().getUnicharset().debug_str( + (*ambigs)[ambig]).string(), + getDict().getUnicharset().debug_str(class_id).string()); + } + return false; + } + } + } + return true; +} + +void Classify::UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob) { + const UnicharIdVector *ambigs = + getDict().getUnicharAmbigs().ReverseAmbigsForAdaption(class_id); + int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size(); + if (classify_learning_debug_level >= 1) { + tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n", + getDict().getUnicharset().debug_str(class_id).string(), class_id); + } + for (int ambig = 0; ambig < ambigs_size; ++ambig) { + CLASS_ID ambig_class_id = (*ambigs)[ambig]; + const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id]; + for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) { + if (ConfigIsPermanent(ambigs_class, cfg)) continue; + const TEMP_CONFIG config = + TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg); + if (config != NULL && TempConfigReliable(ambig_class_id, config)) { + if (classify_learning_debug_level >= 1) { + tprintf("Making config %d of %s permanent\n", cfg, + getDict().getUnicharset().debug_str( + ambig_class_id).string()); + } + MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob); + } + } + } +} + } // namespace tesseract diff --git a/classify/adaptmatch.h b/classify/adaptmatch.h deleted file mode 100644 index 1c4f7a5be0..0000000000 --- a/classify/adaptmatch.h +++ /dev/null @@ -1,56 +0,0 @@ -/****************************************************************************** - ** Filename: adaptmatch.h - ** Purpose: Interface to high-level adaptive matcher - ** Author: Dan Johnson - ** History: Mon Mar 11 11:48:48 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef ADAPTMATCH_H -#define ADAPTMATCH_H - -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "oldlist.h" -#include "tessclas.h" -#include "fxdefs.h" -#include "matchdefs.h" -#include "adaptive.h" -#include "ocrfeatures.h" -#include "ratngs.h" - -/*--------------------------------------------------------------------------- - Variables -----------------------------------------------------------------------------*/ -extern double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)"); -extern double_VAR_H(matcher_great_threshold, 0.0, "Great Match (0-1)"); -extern INT_VAR_H(matcher_failed_adaptations_before_reset, 150, - "Number of failed adaptions before adapted templates reset"); -extern INT_VAR_H(matcher_min_examples_for_prototyping, 2, - "Reliable Config Threshold"); -extern BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching"); -extern BOOL_VAR_H(tess_bn_matching, 0, "Baseline Normalized Matching"); -extern INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: "); - -/*----------------------------------------------------------------------------- - Public Function Prototypes ------------------------------------------------------------------------------*/ -int GetAdaptiveFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_FEATURE_ARRAY IntFeatures, - FEATURE_SET *FloatFeatures); - -/*----------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -#endif diff --git a/classify/baseline.cpp b/classify/baseline.cpp deleted file mode 100644 index be645b2366..0000000000 --- a/classify/baseline.cpp +++ /dev/null @@ -1,42 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: baseline.c (Formerly baseline.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Jul 30 16:16:13 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **************************************************************************/ - -/*---------------------------------------------------------------------- - I n c l u d e s ----------------------------------------------------------------------*/ -#include "baseline.h" -#include "hideedge.h" -#include "varable.h" - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -typedef TPOINT SCALE; - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ - -INT_VAR(classify_baseline_normalized, 1, "Baseline Enable"); diff --git a/classify/baseline.h b/classify/baseline.h index d7353144ae..e4addca8bc 100644 --- a/classify/baseline.h +++ b/classify/baseline.h @@ -28,9 +28,9 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ -#include "general.h" -#include "tessclas.h" -#include "varable.h" +#include "host.h" +#include "blobs.h" +#include "params.h" /*---------------------------------------------------------------------- T y p e s @@ -38,10 +38,4 @@ #define BASELINE_OFFSET 64 #define BASELINE_SCALE 128 -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ - -extern INT_VAR_H(classify_baseline_normalized, 1, "Baseline Enable"); - #endif diff --git a/classify/blobclass.cpp b/classify/blobclass.cpp index 384c80863b..568bedb2f3 100644 --- a/classify/blobclass.cpp +++ b/classify/blobclass.cpp @@ -1,122 +1,122 @@ -/****************************************************************************** - ** Filename: blobclass.c - ** Purpose: High level blob classification and training routines. - ** Author: Dan Johnson - ** History: 7/21/89, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "blobclass.h" -#include "fxdefs.h" -#include "extract.h" -#include "efio.h" -#include "callcpp.h" -#include "chartoname.h" - -#include -#include -#include - -#define MAXFILENAME 80 -#define MAXMATCHES 10 - -static const char kUnknownFontName[] = "UnknownFont"; - -STRING_VAR(classify_font_name, kUnknownFontName, - "Default font name to be used in training"); - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -/* name of current image file being processed */ -extern char imagefile[]; - -/**---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------**/ - -/*---------------------------------------------------------------------------*/ -void LearnBlob(const STRING& filename, - TBLOB * Blob, TEXTROW * Row, const char* BlobText) { -/* - ** Parameters: - ** Blob blob whose micro-features are to be learned - ** Row row of text that blob came from - ** BlobText text that corresponds to blob - ** TextLength number of characters in blob - ** Globals: - ** imagefile base filename of the page being learned - ** classify_font_name - ** name of font currently being trained on - ** Operation: - ** Extract micro-features from the specified blob and append - ** them to the appropriate file. - ** Return: none - ** Exceptions: none - ** History: 7/28/89, DSJ, Created. - */ -#define TRAIN_SUFFIX ".tr" - static FILE *FeatureFile = NULL; - STRING Filename(filename); - - // If no fontname was set, try to extract it from the filename - STRING CurrFontName = classify_font_name; - if (CurrFontName == kUnknownFontName) { - // filename is expected to be of the form [lang].[fontname].exp[num] - // The [lang], [fontname] and [num] fields should not have '.' characters. - const char *basename = strrchr(filename.string(), '/'); - const char *firstdot = strchr(basename ? basename : filename.string(), '.'); - const char *lastdot = strrchr(filename.string(), '.'); - if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) { - strncpy(&CurrFontName[0], firstdot + 1, lastdot - firstdot - 1); - CurrFontName[lastdot - firstdot - 1] = '\0'; - } - } - - // if a feature file is not yet open, open it - // the name of the file is the name of the image plus TRAIN_SUFFIX - if (FeatureFile == NULL) { - Filename += TRAIN_SUFFIX; - FeatureFile = Efopen(Filename.string(), "w"); - cprintf("TRAINING ... Font name = %s\n", CurrFontName.string()); - } - - LearnBlob(FeatureFile, Blob, Row, BlobText, CurrFontName.string()); -} // LearnBlob - -void LearnBlob(FILE* FeatureFile, TBLOB* Blob, TEXTROW* Row, - const char* BlobText, const char* FontName) { - CHAR_DESC CharDesc; - LINE_STATS LineStats; - - EnterLearnMode; - - GetLineStatsFromRow(Row, &LineStats); - - CharDesc = ExtractBlobFeatures (Blob, &LineStats); - if (CharDesc == NULL) { - cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); - return; - } - - // label the features with a class name and font name - fprintf (FeatureFile, "\n%s %s ", FontName, BlobText); - - // write micro-features to file and clean up - WriteCharDescription(FeatureFile, CharDesc); - FreeCharDescription(CharDesc); - -} // LearnBlob +/****************************************************************************** + ** Filename: blobclass.c + ** Purpose: High level blob classification and training routines. + ** Author: Dan Johnson + ** History: 7/21/89, DSJ, Created. + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +/**---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------**/ +#include "blobclass.h" +#include "extract.h" +#include "efio.h" +#include "featdefs.h" +#include "callcpp.h" +#include "chartoname.h" + +#include +#include +#include + +#define MAXFILENAME 80 +#define MAXMATCHES 10 + +static const char kUnknownFontName[] = "UnknownFont"; + +STRING_VAR(classify_font_name, kUnknownFontName, + "Default font name to be used in training"); + +/**---------------------------------------------------------------------------- + Global Data Definitions and Declarations +----------------------------------------------------------------------------**/ +/* name of current image file being processed */ +extern char imagefile[]; + +/**---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------**/ + +/*---------------------------------------------------------------------------*/ +void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename, + TBLOB * Blob, const DENORM& denorm, const char* BlobText) { +/* + ** Parameters: + ** Blob blob whose micro-features are to be learned + ** Row row of text that blob came from + ** BlobText text that corresponds to blob + ** TextLength number of characters in blob + ** Globals: + ** imagefile base filename of the page being learned + ** classify_font_name + ** name of font currently being trained on + ** Operation: + ** Extract micro-features from the specified blob and append + ** them to the appropriate file. + ** Return: none + ** Exceptions: none + ** History: 7/28/89, DSJ, Created. + */ +#define TRAIN_SUFFIX ".tr" + static FILE *FeatureFile = NULL; + STRING Filename(filename); + + // If no fontname was set, try to extract it from the filename + STRING CurrFontName = classify_font_name; + if (CurrFontName == kUnknownFontName) { + // filename is expected to be of the form [lang].[fontname].exp[num] + // The [lang], [fontname] and [num] fields should not have '.' characters. + const char *basename = strrchr(filename.string(), '/'); + const char *firstdot = strchr(basename ? basename : filename.string(), '.'); + const char *lastdot = strrchr(filename.string(), '.'); + if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) { + ++firstdot; + CurrFontName = firstdot; + CurrFontName[lastdot - firstdot] = '\0'; + } + } + + // if a feature file is not yet open, open it + // the name of the file is the name of the image plus TRAIN_SUFFIX + if (FeatureFile == NULL) { + Filename += TRAIN_SUFFIX; + FeatureFile = Efopen(Filename.string(), "w"); + cprintf("TRAINING ... Font name = %s\n", CurrFontName.string()); + } + + LearnBlob(FeatureDefs, FeatureFile, Blob, denorm, BlobText, + CurrFontName.string()); +} // LearnBlob + +void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* FeatureFile, + TBLOB* Blob, const DENORM& denorm, + const char* BlobText, const char* FontName) { + CHAR_DESC CharDesc; + + ASSERT_HOST(FeatureFile != NULL); + + CharDesc = ExtractBlobFeatures(FeatureDefs, denorm, Blob); + if (CharDesc == NULL) { + cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); + return; + } + + // label the features with a class name and font name + fprintf (FeatureFile, "\n%s %s ", FontName, BlobText); + + // write micro-features to file and clean up + WriteCharDescription(FeatureDefs, FeatureFile, CharDesc); + FreeCharDescription(CharDesc); + +} // LearnBlob diff --git a/classify/blobclass.h b/classify/blobclass.h index 0183541340..57d27a0daa 100644 --- a/classify/blobclass.h +++ b/classify/blobclass.h @@ -21,8 +21,9 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ +#include "featdefs.h" #include "oldlist.h" -#include "tessclas.h" +#include "blobs.h" /*--------------------------------------------------------------------------- Macros @@ -38,11 +39,12 @@ /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -void LearnBlob(const STRING& filename, - TBLOB * Blob, TEXTROW * Row, const char* BlobText); +void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename, + TBLOB * Blob, const DENORM& denorm, const char* BlobText); -void LearnBlob(FILE* File, TBLOB* Blob, TEXTROW* Row, - const char* BlobText, const char* FontName); +void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* File, TBLOB* Blob, + const DENORM& denorm, const char* BlobText, + const char* FontName); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/classify/classify.cpp b/classify/classify.cpp index aeff801cab..4e2566cf1f 100644 --- a/classify/classify.cpp +++ b/classify/classify.cpp @@ -18,6 +18,8 @@ #include "classify.h" #include "intproto.h" +#include "mfoutline.h" +#include "scrollview.h" #include "unicity_table.h" #include @@ -53,23 +55,114 @@ void delete_callback_fs(FontSet fs) { namespace tesseract { Classify::Classify() - : INT_MEMBER(tessedit_single_match, FALSE, "Top choice only from CP"), - BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier"), - BOOL_MEMBER(classify_recog_devanagari, false, - "Whether recognizing a language with devanagari script."), + : INT_MEMBER(tessedit_single_match, FALSE, + "Top choice only from CP", this->params()), + BOOL_MEMBER(classify_enable_learning, true, + "Enable adaptive classifier", this->params()), + INT_MEMBER(classify_debug_level, 0, "Classify debug level", + this->params()), + INT_MEMBER(classify_norm_method, character, "Normalization Method ...", + this->params()), + double_MEMBER(classify_char_norm_range, 0.2, + "Character Normalization Range ...", this->params()), + double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...", + this->params()), /* PREV DEFAULT 0.1 */ + double_MEMBER(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...", + this->params()), /* PREV DEFAULT 0.3 */ + double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...", + this->params()), /* PREV DEFAULT 0.1 */ + double_MEMBER(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...", + this->params()), /* PREV DEFAULT 0.3 */ + BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching", + this->params()), + BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching", + this->params()), + BOOL_MEMBER(classify_enable_adaptive_matcher, 1, + "Enable adaptive classifier", + this->params()), + BOOL_MEMBER(classify_use_pre_adapted_templates, 0, + "Use pre-adapted classifier templates", this->params()), + BOOL_MEMBER(classify_save_adapted_templates, 0, + "Save adapted templates to a file", this->params()), + BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger", + this->params()), + INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()), + INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()), + INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", + this->params()), + double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)", + this->params()), + double_MEMBER(matcher_great_threshold, 0.0, "Great Match (0-1)", + this->params()), + double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)", + this->params()), + double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)", + this->params()), + double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)", + this->params()), + double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length", + this->params()), + INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes", + this->params()), + INT_MEMBER(matcher_min_examples_for_prototyping, 3, + "Reliable Config Threshold", this->params()), + INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5, + "Enable adaption even if the ambiguities have not been seen", + this->params()), + double_MEMBER(matcher_clustering_max_angle_delta, 0.015, + "Maximum angle delta for prototype clustering", + this->params()), + double_MEMBER(classify_misfit_junk_penalty, 0.0, + "Penalty to apply when a non-alnum is vertically out of " + "its expected textline position", + this->params()), + BOOL_MEMBER(classify_enable_int_fx, 1, "Enable integer fx", + this->params()), + BOOL_MEMBER(classify_enable_new_adapt_rules, 1, + "Enable new adaptation rules", this->params()), + double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()), + double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", + this->params()), + double_MEMBER(tessedit_class_miss_scale, 0.00390625, + "Scale factor for features not used", this->params()), + INT_MEMBER(classify_adapt_proto_threshold, 230, + "Threshold for good protos during adaptive 0-255", + this->params()), + INT_MEMBER(classify_adapt_feature_threshold, 230, + "Threshold for good features during adaptive 0-255", + this->params()), + BOOL_MEMBER(disable_character_fragments, FALSE, + "Do not include character fragments in the" + " results of the classifier", this->params()), + BOOL_MEMBER(matcher_debug_separate_windows, FALSE, + "Use two different windows for debugging the matching: " + "One for the protos and one for the features.", this->params()), + STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning", + this->params()), + INT_INIT_MEMBER(classify_class_pruner_threshold, 229, + "Class Pruner Threshold 0-255: ", this->params()), + INT_INIT_MEMBER(classify_class_pruner_multiplier, 30, + "Class Pruner Multiplier 0-255: ", this->params()), + INT_INIT_MEMBER(classify_cp_cutoff_strength, 7, + "Class Pruner CutoffStrength: ", this->params()), + INT_INIT_MEMBER(classify_integer_matcher_multiplier, 14, + "Integer Matcher Multiplier 0-255: ", this->params()), EnableLearning(true), + INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word", + this->params()), + BOOL_MEMBER(classify_bln_numeric_mode, 0, + "Assume the input is numbers [0-9].", this->params()), dict_(&image_) { fontinfo_table_.set_compare_callback( - NewPermanentCallback(compare_fontinfo)); + NewPermanentTessCallback(compare_fontinfo)); fontinfo_table_.set_clear_callback( - NewPermanentCallback(delete_callback)); + NewPermanentTessCallback(delete_callback)); fontset_table_.set_compare_callback( - NewPermanentCallback(compare_font_set)); + NewPermanentTessCallback(compare_font_set)); fontset_table_.set_clear_callback( - NewPermanentCallback(delete_callback_fs)); + NewPermanentTessCallback(delete_callback_fs)); AdaptedTemplates = NULL; PreTrainedTemplates = NULL; - inttemp_loaded_ = false; AllProtosOn = NULL; PrunedProtos = NULL; AllConfigsOn = NULL; @@ -77,10 +170,27 @@ Classify::Classify() AllConfigsOff = NULL; TempProtoMask = NULL; NormProtos = NULL; + + AdaptiveMatcherCalls = 0; + BaselineClassifierCalls = 0; + CharNormClassifierCalls = 0; + AmbigClassifierCalls = 0; + NumWordsAdaptedTo = 0; + NumCharsAdaptedTo = 0; + NumBaselineClassesTried = 0; + NumCharNormClassesTried = 0; + NumAmbigClassesTried = 0; + NumClassesOutput = 0; + NumAdaptationsFailed = 0; + + FeaturesHaveBeenExtracted = false; + FeaturesOK = true; + learn_debug_win_ = NULL; } Classify::~Classify() { EndAdaptiveClassifier(); + delete learn_debug_win_; } } // namespace tesseract diff --git a/classify/classify.h b/classify/classify.h index 179ca3d8d2..e676bd8e04 100644 --- a/classify/classify.h +++ b/classify/classify.h @@ -23,24 +23,43 @@ #include "ccstruct.h" #include "classify.h" #include "dict.h" -#include "fxdefs.h" +#include "featdefs.h" +#include "intfx.h" #include "intmatcher.h" #include "ratngs.h" #include "ocrfeatures.h" #include "unicity_table.h" +class ScrollView; class WERD_CHOICE; +class WERD_RES; struct ADAPT_RESULTS; struct NORM_PROTOS; namespace tesseract { + +// How segmented is a blob. In this enum, character refers to a classifiable +// unit, but that is too long and character is usually easier to understand. +enum CharSegmentationType { + CST_FRAGMENT, // A partial character. + CST_WHOLE, // A correctly segmented character. + CST_IMPROPER, // More than one but less than 2 characters. + CST_NGRAM // Multiple characters. +}; + class Classify : public CCStruct { public: Classify(); - ~Classify(); + virtual ~Classify(); Dict& getDict() { return dict_; } + + // Set the denorm for classification. Takes a copy. + void set_denorm(const DENORM* denorm) { + denorm_ = *denorm; + } + /* adaptive.cpp ************************************************************/ ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset); int ClassPruner(INT_TEMPLATES IntTemplates, @@ -48,8 +67,7 @@ class Classify : public CCStruct { INT_FEATURE_ARRAY Features, CLASS_NORMALIZATION_ARRAY NormalizationFactors, CLASS_CUTOFF_ARRAY ExpectedNumFeatures, - CLASS_PRUNER_RESULTS Results, - int Debug); + CLASS_PRUNER_RESULTS Results); void ReadNewCutoffs(FILE *CutoffFile, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs); void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); @@ -61,27 +79,40 @@ class Classify : public CCStruct { NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset); /* protos.cpp ***************************************************************/ void ReadClassFile(); - INT_TEMPLATES - CreateIntTemplates(CLASSES FloatProtos, - const UNICHARSET& target_unicharset); + void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class); + INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, + const UNICHARSET& target_unicharset); /* adaptmatch.cpp ***********************************************************/ - void AdaptToWord(TWERD *Word, - TEXTROW *Row, - const WERD_CHOICE& BestChoice, - const WERD_CHOICE& BestRawChoice, - const char *rejmap); - void InitAdaptiveClassifier(); + // Learn the given word using its chopped_word, seam_array, denorm, + // box_word, best_state, and correct_text to learn both correctly and + // incorrectly segmented blobs. If filename is not NULL, then LearnBlob + // is called and the data will be written to a file for static training. + // Otherwise AdaptToBlob is called for adaption within a document. + // If rejmap is not NULL, then only chars with a rejmap entry of '1' will + // be learned, otherwise all chars with good correct_text are learned. + void LearnWord(const char* filename, const char *rejmap, WERD_RES *word); + + // Builds a blob of length fragments, from the word, starting at start, + // and then learn it, as having the given correct_text. + // If filename is not NULL, then LearnBlob + // is called and the data will be written to a file for static training. + // Otherwise AdaptToBlob is called for adaption within a document. + // threshold is a magic number required by AdaptToChar and generated by + // GetAdaptThresholds. + // Although it can be partly inferred from the string, segmentation is + // provided to explicitly clarify the character segmentation. + void LearnPieces(const char* filename, int start, int length, + float threshold, CharSegmentationType segmentation, + const char* correct_text, WERD_RES *word); + void InitAdaptiveClassifier(bool load_pre_trained_templates); void InitAdaptedClass(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID ClassId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates); void AdaptToPunc(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID ClassId, FLOAT32 Threshold); void AmbigClassifier(TBLOB *Blob, - LINE_STATS *LineStats, INT_TEMPLATES Templates, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results); @@ -92,6 +123,7 @@ class Classify : public CCStruct { ADAPT_CLASS* classes, int debug, int num_classes, + const TBOX& blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS* final_results); void ConvertMatchesToChoices(ADAPT_RESULTS *Results, @@ -99,18 +131,27 @@ class Classify : public CCStruct { void AddNewResult(ADAPT_RESULTS *Results, CLASS_ID ClassId, FLOAT32 Rating, - int ConfigId); + int ConfigId, + int config2); + int GetAdaptiveFeatures(TBLOB *Blob, + INT_FEATURE_ARRAY IntFeatures, + FEATURE_SET *FloatFeatures); + #ifndef GRAPHICS_DISABLED void DebugAdaptiveClassifier(TBLOB *Blob, - LINE_STATS *LineStats, ADAPT_RESULTS *Results); #endif void GetAdaptThresholds (TWERD * Word, - LINE_STATS * LineStats, const WERD_CHOICE& BestChoice, const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]); + PROTO_ID MakeNewTempProtos(FEATURE_SET Features, + int NumBadFeat, + FEATURE_ID BadFeat[], + INT_CLASS IClass, + ADAPT_CLASS Class, + BIT_VECTOR TempProtoMask); int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int NumFeatures, @@ -119,32 +160,26 @@ class Classify : public CCStruct { void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, - TBLOB *Blob, - LINE_STATS *LineStats); + TBLOB *Blob); void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results); void RemoveExtraPuncs(ADAPT_RESULTS *Results); void RemoveBadMatches(ADAPT_RESULTS *Results); + void SetAdaptiveThreshold(FLOAT32 Threshold); void ShowBestMatchFor(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID ClassId, BOOL8 AdaptiveOn, BOOL8 PreTrainedOn); UNICHAR_ID *BaselineClassifier(TBLOB *Blob, - LINE_STATS *LineStats, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results); int CharNormClassifier(TBLOB *Blob, - LINE_STATS *LineStats, INT_TEMPLATES Templates, ADAPT_RESULTS *Results); UNICHAR_ID *GetAmbiguities(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID CorrectClass); void DoAdaptiveMatch(TBLOB *Blob, - LINE_STATS *LineStats, ADAPT_RESULTS *Results); void AdaptToChar(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID ClassId, FLOAT32 Threshold); int AdaptableWord(TWERD *Word, @@ -155,38 +190,52 @@ class Classify : public CCStruct { void SettupPass1(); void SettupPass2(); void AdaptiveClassifier(TBLOB *Blob, - TBLOB *DotBlob, - TEXTROW *Row, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS cp_results); void ClassifyAsNoise(ADAPT_RESULTS *Results); void ResetAdaptiveClassifier(); + int GetBaselineFeatures(TBLOB *Blob, + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength); FLOAT32 GetBestRatingFor(TBLOB *Blob, - LINE_STATS *LineStats, CLASS_ID ClassId); int GetCharNormFeatures(TBLOB *Blob, - LINE_STATS *LineStats, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength); - int GetIntCharNormFeatures(TBLOB *Blob, - LINE_STATS *LineStats, + inT32 *BlobLength, + inT32 *FeatureOutlineIndex); + int GetIntBaselineFeatures(TBLOB *Blob, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, CLASS_NORMALIZATION_ARRAY CharNormArray, inT32 *BlobLength); + int GetIntCharNormFeatures(TBLOB *Blob, + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength, + inT32 *FeatureOutlineArray); + + bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config); + void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob); + void ResetFeaturesHaveBeenExtracted(); + bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; } /* float2int.cpp ************************************************************/ void ComputeIntCharNormArray(FEATURE NormFeature, INT_TEMPLATES Templates, CLASS_NORMALIZATION_ARRAY CharNormArray); + void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures); /* intproto.cpp *************************************************************/ INT_TEMPLATES ReadIntTemplates(FILE *File); void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET& target_unicharset); CLASS_ID GetClassToDebug(const char *Prompt); + void ShowMatchDisplay(); /* font detection ***********************************************************/ UnicityTable& get_fontinfo_table() { return fontinfo_table_; @@ -194,36 +243,145 @@ class Classify : public CCStruct { UnicityTable& get_fontset_table() { return fontset_table_; } - /* adaptmatch.cpp ***********************************************************/ - /* name of current image file being processed */ + /* mfoutline.cpp ***********************************************************/ + void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale); + /* outfeat.cpp ***********************************************************/ + FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob); + /* picofeat.cpp ***********************************************************/ + FEATURE_SET ExtractPicoFeatures(TBLOB *Blob); + + + // Member variables. + + // Parameters. INT_VAR_H(tessedit_single_match, FALSE, "Top choice only from CP"); - /* use class variables to hold onto built-in templates and adapted - templates */ + BOOL_VAR_H(classify_enable_learning, true, "Enable adaptive classifier"); + INT_VAR_H(classify_debug_level, 0, "Classify debug level"); + + /* mfoutline.cpp ***********************************************************/ + /* control knobs used to control normalization of outlines */ + INT_VAR_H(classify_norm_method, character, "Normalization Method ..."); + double_VAR_H(classify_char_norm_range, 0.2, + "Character Normalization Range ..."); + double_VAR_H(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ..."); + double_VAR_H(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ..."); + double_VAR_H(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ..."); + double_VAR_H(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ..."); + + /* adaptmatch.cpp ***********************************************************/ + BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching"); + BOOL_VAR_H(tess_bn_matching, 0, "Baseline Normalized Matching"); + BOOL_VAR_H(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier"); + BOOL_VAR_H(classify_use_pre_adapted_templates, 0, + "Use pre-adapted classifier templates"); + BOOL_VAR_H(classify_save_adapted_templates, 0, + "Save adapted templates to a file"); + BOOL_VAR_H(classify_enable_adaptive_debugger, 0, "Enable match debugger"); + INT_VAR_H(matcher_debug_level, 0, "Matcher Debug Level"); + INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags"); + INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: "); + double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)"); + double_VAR_H(matcher_great_threshold, 0.0, "Great Match (0-1)"); + double_VAR_H(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)"); + double_VAR_H(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)"); + double_VAR_H(matcher_rating_margin, 0.1, "New template margin (0-1)"); + double_VAR_H(matcher_avg_noise_size, 12.0, "Avg. noise blob length: "); + INT_VAR_H(matcher_permanent_classes_min, 1, "Min # of permanent classes"); + INT_VAR_H(matcher_min_examples_for_prototyping, 3, + "Reliable Config Threshold"); + INT_VAR_H(matcher_sufficient_examples_for_prototyping, 5, + "Enable adaption even if the ambiguities have not been seen"); + double_VAR_H(matcher_clustering_max_angle_delta, 0.015, + "Maximum angle delta for prototype clustering"); + double_VAR_H(classify_misfit_junk_penalty, 0.0, + "Penalty to apply when a non-alnum is vertically out of " + "its expected textline position"); + BOOL_VAR_H(classify_enable_int_fx, 1, "Enable integer fx"); + BOOL_VAR_H(classify_enable_new_adapt_rules, 1, "Enable new adaptation rules"); + double_VAR_H(rating_scale, 1.5, "Rating scaling factor"); + double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor"); + double_VAR_H(tessedit_class_miss_scale, 0.00390625, + "Scale factor for features not used"); + INT_VAR_H(classify_adapt_proto_threshold, 230, + "Threshold for good protos during adaptive 0-255"); + INT_VAR_H(classify_adapt_feature_threshold, 230, + "Threshold for good features during adaptive 0-255"); + BOOL_VAR_H(disable_character_fragments, FALSE, + "Do not include character fragments in the" + " results of the classifier"); + BOOL_VAR_H(matcher_debug_separate_windows, FALSE, + "Use two different windows for debugging the matching: " + "One for the protos and one for the features."); + STRING_VAR_H(classify_learn_debug_str, "", "Class str to debug learning"); + + /* intmatcher.cpp **********************************************************/ + INT_VAR_H(classify_class_pruner_threshold, 229, + "Class Pruner Threshold 0-255: "); + INT_VAR_H(classify_class_pruner_multiplier, 30, + "Class Pruner Multiplier 0-255: "); + INT_VAR_H(classify_cp_cutoff_strength, 7, + "Class Pruner CutoffStrength: "); + INT_VAR_H(classify_integer_matcher_multiplier, 14, + "Integer Matcher Multiplier 0-255: "); + + // Use class variables to hold onto built-in templates and adapted templates. INT_TEMPLATES PreTrainedTemplates; ADAPT_TEMPLATES AdaptedTemplates; - // Successful load of inttemp allows base tesseract classfier to be used. - bool inttemp_loaded_; - /* create dummy proto and config masks for use with the built-in templates */ + // Create dummy proto and config masks for use with the built-in templates. BIT_VECTOR AllProtosOn; BIT_VECTOR PrunedProtos; BIT_VECTOR AllConfigsOn; BIT_VECTOR AllProtosOff; BIT_VECTOR AllConfigsOff; BIT_VECTOR TempProtoMask; - // External control of adaption. - BOOL_VAR_H(classify_enable_learning, true, "Enable adaptive classifier"); - // Internal control of Adaption so it doesn't work on pass2. - BOOL_VAR_H(classify_recog_devanagari, false, - "Whether recognizing a language with devanagari script."); bool EnableLearning; /* normmatch.cpp */ NORM_PROTOS *NormProtos; /* font detection ***********************************************************/ UnicityTable fontinfo_table_; UnicityTable fontset_table_; + + INT_VAR_H(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word"); + BOOL_VAR_H(classify_bln_numeric_mode, 0, + "Assume the input is numbers [0-9]."); + protected: + IntegerMatcher im_; + FEATURE_DEFS_STRUCT feature_defs_; + // Must be set for the classifier to operate. Ususally set in + // Tesseract::recog_word_recursive, being the main word-level entry point. + DENORM denorm_; + private: + Dict dict_; + + /* variables used to hold performance statistics */ + int AdaptiveMatcherCalls; + int BaselineClassifierCalls; + int CharNormClassifierCalls; + int AmbigClassifierCalls; + int NumWordsAdaptedTo; + int NumCharsAdaptedTo; + int NumBaselineClassesTried; + int NumCharNormClassesTried; + int NumAmbigClassesTried; + int NumClassesOutput; + int NumAdaptationsFailed; + + /* variables used to hold onto extracted features. This is used + to map from the old scheme in which baseline features and char norm + features are extracted separately, to the new scheme in which they + are extracted at the same time. */ + bool FeaturesHaveBeenExtracted; + bool FeaturesOK; + INT_FEATURE_ARRAY BaselineFeatures; + INT_FEATURE_ARRAY CharNormFeatures; + INT_FX_RESULT_STRUCT FXInfo; + + CLASS_CUTOFF_ARRAY CharNormCutoffs; + CLASS_CUTOFF_ARRAY BaselineCutoffs; + ScrollView* learn_debug_win_; }; } // namespace tesseract diff --git a/classify/cluster.cpp b/classify/cluster.cpp index a67cb30c07..7c8d08a773 100644 --- a/classify/cluster.cpp +++ b/classify/cluster.cpp @@ -19,6 +19,7 @@ #include "const.h" #include "cluster.h" #include "emalloc.h" +#include "helpers.h" #include "tprintf.h" #include "danerror.h" #include "freelist.h" @@ -29,7 +30,7 @@ #define FTABLE_Y 100 // Size of FTable. // Table of values approximating the cumulative F-distribution for a confidence of 1%. -double FTable[FTABLE_Y][FTABLE_X] = { +const double FTable[FTABLE_Y][FTABLE_X] = { {4052.19, 4999.52, 5403.34, 5624.62, 5763.65, 5858.97, 5928.33, 5981.10, 6022.50, 6055.85,}, {98.502, 99.000, 99.166, 99.249, 99.300, 99.333, 99.356, 99.374, 99.388, 99.399,}, {34.116, 30.816, 29.457, 28.710, 28.237, 27.911, 27.672, 27.489, 27.345, 27.229,}, @@ -158,28 +159,19 @@ double FTable[FTABLE_Y][FTABLE_X] = { #define BUCKETTABLESIZE 1024 #define NORMALEXTENT 3.0 -typedef struct -{ +struct TEMPCLUSTER { CLUSTER *Cluster; CLUSTER *Neighbor; -} - - -TEMPCLUSTER; +}; -typedef struct -{ +struct STATISTICS { FLOAT32 AvgVariance; FLOAT32 *CoVariance; FLOAT32 *Min; // largest negative distance from the mean FLOAT32 *Max; // largest positive distance from the mean -} - - -STATISTICS; +}; -typedef struct -{ +struct BUCKETS { DISTRIBUTION Distribution; // distribution being tested for uinT32 SampleCount; // # of samples in histogram FLOAT64 Confidence; // confidence level of test @@ -188,20 +180,21 @@ typedef struct uinT16 Bucket[BUCKETTABLESIZE];// mapping to histogram buckets uinT32 *Count; // frequency of occurence histogram FLOAT32 *ExpectedCount; // expected histogram -} - - -BUCKETS; +}; -typedef struct -{ +struct CHISTRUCT{ uinT16 DegreesOfFreedom; FLOAT64 Alpha; FLOAT64 ChiSquared; -} - +}; -CHISTRUCT; +// For use with KDWalk / MakePotentialClusters +struct ClusteringContext { + HEAP *heap; // heap used to hold temp clusters, "best" on top + TEMPCLUSTER *candidates; // array of potential clusters + KDTREE *tree; // kd-tree to be searched for neighbors + inT32 next; // next candidate to be used +}; typedef FLOAT64 (*DENSITYFUNC) (inT32); typedef FLOAT64 (*SOLVEFUNC) (CHISTRUCT *, double); @@ -211,13 +204,6 @@ typedef FLOAT64 (*SOLVEFUNC) (CHISTRUCT *, double); #define Abs(N) ( ( (N) < 0 ) ? ( -(N) ) : (N) ) //--------------Global Data Definitions and Declarations---------------------- -/* the following variables are declared as global so that routines which -are called from the kd-tree walker can get to them. */ -static HEAP *Heap; -static TEMPCLUSTER *TempCluster; -static KDTREE *Tree; -static inT32 CurrentTemp; - /* the following variables describe a discrete normal distribution which is used by NormalDensity() and NormalBucket(). The constant NORMALEXTENT determines how many standard @@ -226,15 +212,12 @@ static inT32 CurrentTemp; deviations and x=BUCKETTABLESIZE is mapped to +NORMALEXTENT standard deviations. */ #define SqrtOf2Pi 2.506628275 -static FLOAT64 NormalStdDev = BUCKETTABLESIZE / (2.0 * NORMALEXTENT); -static FLOAT64 NormalVariance = -(BUCKETTABLESIZE * BUCKETTABLESIZE) / (4.0 * NORMALEXTENT * NORMALEXTENT); -static FLOAT64 NormalMagnitude = -(2.0 * NORMALEXTENT) / (SqrtOf2Pi * BUCKETTABLESIZE); -static FLOAT64 NormalMean = BUCKETTABLESIZE / 2; - -// keep a list of histogram buckets to minimize recomputing them -static LIST OldBuckets[] = { NIL, NIL, NIL }; +static const FLOAT64 kNormalStdDev = BUCKETTABLESIZE / (2.0 * NORMALEXTENT); +static const FLOAT64 kNormalVariance = + (BUCKETTABLESIZE * BUCKETTABLESIZE) / (4.0 * NORMALEXTENT * NORMALEXTENT); +static const FLOAT64 kNormalMagnitude = + (2.0 * NORMALEXTENT) / (SqrtOf2Pi * BUCKETTABLESIZE); +static const FLOAT64 kNormalMean = BUCKETTABLESIZE / 2; /* define lookup tables used to compute the number of histogram buckets that should be used for a given number of samples. */ @@ -242,19 +225,21 @@ static LIST OldBuckets[] = { NIL, NIL, NIL }; #define MAXBUCKETS 39 #define MAXDEGREESOFFREEDOM MAXBUCKETS -static uinT32 CountTable[LOOKUPTABLESIZE] = { +static const uinT32 kCountTable[LOOKUPTABLESIZE] = { MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000 -}; -static uinT16 BucketsTable[LOOKUPTABLESIZE] = { +}; // number of samples + +static const uinT16 kBucketsTable[LOOKUPTABLESIZE] = { MINBUCKETS, 16, 20, 24, 27, 30, 35, MAXBUCKETS -}; +}; // number of buckets /*------------------------------------------------------------------------- Private Function Prototypes --------------------------------------------------------------------------*/ void CreateClusterTree(CLUSTERER *Clusterer); -void MakePotentialClusters(CLUSTER *Cluster, VISIT Order, inT32 Level); +void MakePotentialClusters(ClusteringContext *context, CLUSTER *Cluster, + inT32 Level); CLUSTER *FindNearestNeighbor(KDTREE *Tree, CLUSTER *Cluster, @@ -324,7 +309,8 @@ PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster); BOOL8 Independent (PARAM_DESC ParamDesc[], inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence); -BUCKETS *GetBuckets(DISTRIBUTION Distribution, +BUCKETS *GetBuckets(CLUSTERER* clusterer, + DISTRIBUTION Distribution, uinT32 SampleCount, FLOAT64 Confidence); @@ -363,14 +349,15 @@ BOOL8 DistributionOK(BUCKETS *Buckets); void FreeStatistics(STATISTICS *Statistics); -void FreeBuckets(BUCKETS *Buckets); +void FreeBuckets(CLUSTERER* clusterer, + BUCKETS *Buckets); void FreeCluster(CLUSTER *Cluster); uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets); -int NumBucketsMatch(void *arg1, //BUCKETS *Histogram, - void *arg2); //uinT16 *DesiredNumberOfBuckets); +int NumBucketsMatch(void *arg1, // BUCKETS *Histogram, + void *arg2); // uinT16 *DesiredNumberOfBuckets); int ListEntryMatch(void *arg1, void *arg2); @@ -378,8 +365,8 @@ void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount); void InitBuckets(BUCKETS *Buckets); -int AlphaMatch(void *arg1, //CHISTRUCT *ChiStruct, - void *arg2); //CHISTRUCT *SearchKey); +int AlphaMatch(void *arg1, // CHISTRUCT *ChiStruct, + void *arg2); // CHISTRUCT *SearchKey); CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha); @@ -400,7 +387,6 @@ double InvertMatrix(const float* input, int size, float* inv); /** MakeClusterer ********************************************************** Parameters: SampleSize number of dimensions in feature space ParamDesc description of each dimension -Globals: None Operation: This routine creates a new clusterer data structure, initializes it, and returns a pointer to it. Return: pointer to the new clusterer data structure @@ -408,7 +394,7 @@ Exceptions: None History: 5/29/89, DSJ, Created. ****************************************************************************/ CLUSTERER * -MakeClusterer (inT16 SampleSize, PARAM_DESC ParamDesc[]) { +MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) { CLUSTERER *Clusterer; int i; @@ -420,7 +406,7 @@ MakeClusterer (inT16 SampleSize, PARAM_DESC ParamDesc[]) { // init fields which will not be used initially Clusterer->Root = NULL; - Clusterer->ProtoList = NIL; + Clusterer->ProtoList = NIL_LIST; // maintain a copy of param descriptors in the clusterer data structure Clusterer->ParamDesc = @@ -439,10 +425,12 @@ MakeClusterer (inT16 SampleSize, PARAM_DESC ParamDesc[]) { // allocate a kd tree to hold the samples Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc); - // execute hook for monitoring clustering operation - // (*ClustererCreationHook)( Clusterer ); + // keep a list of histogram buckets to minimize recomputing them + Clusterer->bucket_cache[0] = NIL_LIST; + Clusterer->bucket_cache[1] = NIL_LIST; + Clusterer->bucket_cache[2] = NIL_LIST; - return (Clusterer); + return Clusterer; } // MakeClusterer @@ -450,7 +438,6 @@ MakeClusterer (inT16 SampleSize, PARAM_DESC ParamDesc[]) { Parameters: Clusterer clusterer data structure to add sample to Feature feature to be added to clusterer CharID unique ident. of char that sample came from -Globals: None Operation: This routine creates a new sample data structure to hold the specified feature. This sample is added to the clusterer data structure (so that it knows which samples are to be @@ -501,7 +488,6 @@ MakeSample (CLUSTERER * Clusterer, FLOAT32 Feature[], inT32 CharID) { /** ClusterSamples *********************************************************** Parameters: Clusterer data struct containing samples to be clustered Config parameters which control clustering process -Globals: None Operation: This routine first checks to see if the samples in this clusterer have already been clustered before; if so, it does not bother to recreate the cluster tree. It simply recomputes @@ -523,7 +509,7 @@ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { //deallocate the old prototype list if one exists FreeProtoList (&Clusterer->ProtoList); - Clusterer->ProtoList = NIL; + Clusterer->ProtoList = NIL_LIST; //compute prototypes starting at the root node in the tree ComputePrototypes(Clusterer, Config); @@ -533,7 +519,6 @@ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { /** FreeClusterer ************************************************************* Parameters: Clusterer pointer to data structure to be freed -Globals: None Operation: This routine frees all of the memory allocated to the specified data structure. It will not, however, free the memory used by the prototype list. The pointers to @@ -562,7 +547,6 @@ void FreeClusterer(CLUSTERER *Clusterer) { /** FreeProtoList ************************************************************ Parameters: ProtoList pointer to list of prototypes to be freed -Globals: None Operation: This routine frees all of the memory allocated to the specified list of prototypes. The clusters which are pointed to by the prototypes are not freed. @@ -577,7 +561,6 @@ void FreeProtoList(LIST *ProtoList) { /** FreePrototype ************************************************************ Parameters: Prototype prototype data structure to be deallocated -Globals: None Operation: This routine deallocates the memory consumed by the specified prototype and modifies the corresponding cluster so that it is no longer marked as a prototype. The cluster is NOT @@ -612,7 +595,6 @@ void FreePrototype(void *arg) { //PROTOTYPE *Prototype) /** NextSample ************************************************************ Parameters: SearchState ptr to list containing clusters to be searched -Globals: None Operation: This routine is used to find all of the samples which belong to a cluster. It starts by removing the top cluster on the cluster list (SearchState). If this cluster is @@ -629,7 +611,7 @@ History: 6/16/89, DSJ, Created. CLUSTER *NextSample(LIST *SearchState) { CLUSTER *Cluster; - if (*SearchState == NIL) + if (*SearchState == NIL_LIST) return (NULL); Cluster = (CLUSTER *) first_node (*SearchState); *SearchState = pop (*SearchState); @@ -645,7 +627,6 @@ CLUSTER *NextSample(LIST *SearchState) { /** Mean *********************************************************** Parameters: Proto prototype to return mean of Dimension dimension whose mean is to be returned -Globals: none Operation: This routine returns the mean of the specified prototype in the indicated dimension. Return: Mean of Prototype in Dimension @@ -660,7 +641,6 @@ FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension) { /** StandardDeviation ************************************************* Parameters: Proto prototype to return standard deviation of Dimension dimension whose stddev is to be returned -Globals: none Operation: This routine returns the standard deviation of the prototype in the indicated dimension. Return: Standard deviation of Prototype in Dimension @@ -693,10 +673,6 @@ FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension) { ----------------------------------------------------------------------------*/ /** CreateClusterTree ******************************************************* Parameters: Clusterer data structure holdings samples to be clustered -Globals: Tree kd-tree holding samples - TempCluster array of temporary clusters - CurrentTemp index of next temp cluster to be used - Heap heap used to hold temp clusters - "best" on top Operation: This routine performs a bottoms-up clustering on the samples held in the kd-tree of the Clusterer data structure. The result is a cluster tree. Each node in the tree represents @@ -711,25 +687,22 @@ Exceptions: None History: 5/29/89, DSJ, Created. ******************************************************************************/ void CreateClusterTree(CLUSTERER *Clusterer) { + ClusteringContext context; HEAPENTRY HeapEntry; TEMPCLUSTER *PotentialCluster; - // save the kd-tree in a global variable so kd-tree walker can get at it - Tree = Clusterer->KDTree; - - // allocate memory to to hold all of the "potential" clusters - TempCluster = (TEMPCLUSTER *) - Emalloc (Clusterer->NumberOfSamples * sizeof (TEMPCLUSTER)); - CurrentTemp = 0; - // each sample and its nearest neighbor form a "potential" cluster // save these in a heap with the "best" potential clusters on top - Heap = MakeHeap (Clusterer->NumberOfSamples); - KDWalk (Tree, (void_proc) MakePotentialClusters); + context.tree = Clusterer->KDTree; + context.candidates = (TEMPCLUSTER *) + Emalloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER)); + context.next = 0; + context.heap = MakeHeap(Clusterer->NumberOfSamples); + KDWalk(context.tree, (void_proc)MakePotentialClusters, &context); // form potential clusters into actual clusters - always do "best" first - while (GetTopOfHeap (Heap, &HeapEntry) != EMPTY) { - PotentialCluster = (TEMPCLUSTER *) (HeapEntry.Data); + while (GetTopOfHeap(context.heap, &HeapEntry) != EMPTY) { + PotentialCluster = (TEMPCLUSTER *)HeapEntry.Data; // if main cluster of potential cluster is already in another cluster // then we don't need to worry about it @@ -741,67 +714,61 @@ void CreateClusterTree(CLUSTERER *Clusterer) { // then we must find a new nearest neighbor else if (PotentialCluster->Neighbor->Clustered) { PotentialCluster->Neighbor = - FindNearestNeighbor (Tree, PotentialCluster->Cluster, - &(HeapEntry.Key)); + FindNearestNeighbor(context.tree, PotentialCluster->Cluster, + &HeapEntry.Key); if (PotentialCluster->Neighbor != NULL) { - HeapStore(Heap, &HeapEntry); + HeapStore(context.heap, &HeapEntry); } } // if neither cluster is already clustered, form permanent cluster else { PotentialCluster->Cluster = - MakeNewCluster(Clusterer, PotentialCluster); + MakeNewCluster(Clusterer, PotentialCluster); PotentialCluster->Neighbor = - FindNearestNeighbor (Tree, PotentialCluster->Cluster, - &(HeapEntry.Key)); + FindNearestNeighbor(context.tree, PotentialCluster->Cluster, + &HeapEntry.Key); if (PotentialCluster->Neighbor != NULL) { - HeapStore(Heap, &HeapEntry); + HeapStore(context.heap, &HeapEntry); } } } // the root node in the cluster tree is now the only node in the kd-tree - Clusterer->Root = (CLUSTER *) RootOf (Clusterer->KDTree); + Clusterer->Root = (CLUSTER *) RootOf(Clusterer->KDTree); // free up the memory used by the K-D tree, heap, and temp clusters - FreeKDTree(Tree); + FreeKDTree(context.tree); Clusterer->KDTree = NULL; - FreeHeap(Heap); - memfree(TempCluster); + FreeHeap(context.heap); + memfree(context.candidates); } // CreateClusterTree /** MakePotentialClusters ************************************************** -Parameters: Cluster current cluster being visited in kd-tree walk - Order order in which cluster is being visited - Level level of this cluster in the kd-tree -Globals: Tree kd-tree to be searched for neighbors - TempCluster array of temporary clusters - CurrentTemp index of next temp cluster to be used - Heap heap used to hold temp clusters - "best" on top -Operation: This routine is designed to be used in concert with the + Parameters: + context ClusteringContext (see definition above) + Cluster current cluster being visited in kd-tree walk + Level level of this cluster in the kd-tree + Operation: + This routine is designed to be used in concert with the KDWalk routine. It will create a potential cluster for each sample in the kd-tree that is being walked. This potential cluster will then be pushed on the heap. -Return: none -Exceptions: none -History: 5/29/89, DSJ, Created. - 7/13/89, DSJ, Removed visibility of kd-tree node data struct. ******************************************************************************/ -void MakePotentialClusters(CLUSTER *Cluster, VISIT Order, inT32 Level) { +void MakePotentialClusters(ClusteringContext *context, + CLUSTER *Cluster, inT32 Level) { HEAPENTRY HeapEntry; - - if ((Order == preorder) || (Order == leaf)) { - TempCluster[CurrentTemp].Cluster = Cluster; - HeapEntry.Data = (char *) &(TempCluster[CurrentTemp]); - TempCluster[CurrentTemp].Neighbor = - FindNearestNeighbor (Tree, TempCluster[CurrentTemp].Cluster, - &(HeapEntry.Key)); - if (TempCluster[CurrentTemp].Neighbor != NULL) { - HeapStore(Heap, &HeapEntry); - CurrentTemp++; - } + int next = context->next; + context->candidates[next].Cluster = Cluster; + HeapEntry.Data = (char *) &(context->candidates[next]); + context->candidates[next].Neighbor = + FindNearestNeighbor(context->tree, + context->candidates[next].Cluster, + &HeapEntry.Key); + if (context->candidates[next].Neighbor != NULL) { + HeapStore(context->heap, &HeapEntry); + context->next++; } } // MakePotentialClusters @@ -810,7 +777,6 @@ void MakePotentialClusters(CLUSTER *Cluster, VISIT Order, inT32 Level) { Parameters: Tree kd-tree to search in for nearest neighbor Cluster cluster whose nearest neighbor is to be found Distance ptr to variable to report distance found -Globals: none Operation: This routine searches the specified kd-tree for the nearest neighbor of the specified cluster. It actually uses the kd routines to find the 2 nearest neighbors since one of them @@ -824,19 +790,19 @@ History: 5/29/89, DSJ, Created. 7/13/89, DSJ, Removed visibility of kd-tree node data struct ********************************************************************************/ CLUSTER * -FindNearestNeighbor (KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance) +FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance) #define MAXNEIGHBORS 2 #define MAXDISTANCE MAX_FLOAT32 { CLUSTER *Neighbor[MAXNEIGHBORS]; FLOAT32 Dist[MAXNEIGHBORS]; - inT32 NumberOfNeighbors; + int NumberOfNeighbors; inT32 i; CLUSTER *BestNeighbor; // find the 2 nearest neighbors of the cluster - NumberOfNeighbors = KDNearestNeighborSearch - (Tree, Cluster->Mean, MAXNEIGHBORS, MAXDISTANCE, Neighbor, Dist); + KDNearestNeighborSearch(Tree, Cluster->Mean, MAXNEIGHBORS, MAXDISTANCE, + &NumberOfNeighbors, (void **)Neighbor, Dist); // search for the nearest neighbor that is not the cluster itself *Distance = MAXDISTANCE; @@ -847,14 +813,13 @@ FindNearestNeighbor (KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance) BestNeighbor = Neighbor[i]; } } - return (BestNeighbor); + return BestNeighbor; } // FindNearestNeighbor /** MakeNewCluster ************************************************************* Parameters: Clusterer current clustering environment TempCluster potential cluster to make permanent -Globals: none Operation: This routine creates a new permanent cluster from the clusters specified in TempCluster. The 2 clusters in TempCluster are marked as "clustered" and deleted from @@ -868,9 +833,8 @@ CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { CLUSTER *Cluster; // allocate the new cluster and initialize it - Cluster = (CLUSTER *) Emalloc (sizeof (CLUSTER) + - (Clusterer->SampleSize - - 1) * sizeof (FLOAT32)); + Cluster = (CLUSTER *) Emalloc( + sizeof(CLUSTER) + (Clusterer->SampleSize - 1) * sizeof(FLOAT32)); Cluster->Clustered = FALSE; Cluster->Prototype = FALSE; Cluster->Left = TempCluster->Cluster; @@ -880,18 +844,18 @@ CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { // mark the old clusters as "clustered" and delete them from the kd-tree Cluster->Left->Clustered = TRUE; Cluster->Right->Clustered = TRUE; - KDDelete (Clusterer->KDTree, Cluster->Left->Mean, Cluster->Left); - KDDelete (Clusterer->KDTree, Cluster->Right->Mean, Cluster->Right); + KDDelete(Clusterer->KDTree, Cluster->Left->Mean, Cluster->Left); + KDDelete(Clusterer->KDTree, Cluster->Right->Mean, Cluster->Right); // compute the mean and sample count for the new cluster Cluster->SampleCount = - MergeClusters (Clusterer->SampleSize, Clusterer->ParamDesc, - Cluster->Left->SampleCount, Cluster->Right->SampleCount, - Cluster->Mean, Cluster->Left->Mean, Cluster->Right->Mean); + MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc, + Cluster->Left->SampleCount, Cluster->Right->SampleCount, + Cluster->Mean, Cluster->Left->Mean, Cluster->Right->Mean); // add the new cluster to the KD tree - KDStore (Clusterer->KDTree, Cluster->Mean, Cluster); - return (Cluster); + KDStore(Clusterer->KDTree, Cluster->Mean, Cluster); + return Cluster; } // MakeNewCluster @@ -901,7 +865,6 @@ Parameters: N # of dimensions (size of arrays) n1, n2 number of samples in each old cluster m array to hold mean of new cluster m1, m2 arrays containing means of old clusters -Globals: None Operation: This routine merges two clusters into one larger cluster. To do this it computes the number of samples in the new cluster and the mean of the new cluster. The ParamDesc @@ -911,14 +874,13 @@ Return: The number of samples in the new cluster. Exceptions: None History: 5/31/89, DSJ, Created. *********************************************************************************/ -inT32 -MergeClusters (inT16 N, -register PARAM_DESC ParamDesc[], -register inT32 n1, -register inT32 n2, -register FLOAT32 m[], -register FLOAT32 m1[], register FLOAT32 m2[]) { - register inT32 i, n; +inT32 MergeClusters(inT16 N, + PARAM_DESC ParamDesc[], + inT32 n1, + inT32 n2, + FLOAT32 m[], + FLOAT32 m1[], FLOAT32 m2[]) { + inT32 i, n; n = n1 + n2; for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) { @@ -942,14 +904,13 @@ register FLOAT32 m1[], register FLOAT32 m2[]) { else *m = (n1 * *m1 + n2 * *m2) / n; } - return (n); + return n; } // MergeClusters /** ComputePrototypes ******************************************************* Parameters: Clusterer data structure holding cluster tree Config parameters used to control prototype generation -Globals: None Operation: This routine decides which clusters in the cluster tree should be represented by prototypes, forms a list of these prototypes, and places the list in the Clusterer data @@ -959,23 +920,23 @@ Exceptions: None History: 5/30/89, DSJ, Created. *******************************************************************************/ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { - LIST ClusterStack = NIL; + LIST ClusterStack = NIL_LIST; CLUSTER *Cluster; PROTOTYPE *Prototype; // use a stack to keep track of clusters waiting to be processed // initially the only cluster on the stack is the root cluster if (Clusterer->Root != NULL) - ClusterStack = push (NIL, Clusterer->Root); + ClusterStack = push (NIL_LIST, Clusterer->Root); // loop until we have analyzed all clusters which are potential prototypes - while (ClusterStack != NIL) { + while (ClusterStack != NIL_LIST) { // remove the next cluster to be analyzed from the stack // try to make a prototype from the cluster // if successful, put it on the proto list, else split the cluster Cluster = (CLUSTER *) first_node (ClusterStack); ClusterStack = pop (ClusterStack); - Prototype = MakePrototype (Clusterer, Config, Cluster); + Prototype = MakePrototype(Clusterer, Config, Cluster); if (Prototype != NULL) { Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype); } @@ -988,10 +949,10 @@ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { /** MakePrototype *********************************************************** -Parameters: Clusterer data structure holding cluster tree +Parameters: + Clusterer data structure holding cluster tree Config parameters used to control prototype generation Cluster cluster to be made into a prototype -Globals: None Operation: This routine attempts to create a prototype from the specified cluster that conforms to the distribution specified in Config. If there are too few samples in the @@ -1014,28 +975,27 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, // filter out clusters which contain samples from the same character if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal)) - return (NULL); + return NULL; // compute the covariance matrix and ranges for the cluster Statistics = - ComputeStatistics (Clusterer->SampleSize, Clusterer->ParamDesc, Cluster); + ComputeStatistics(Clusterer->SampleSize, Clusterer->ParamDesc, Cluster); // check for degenerate clusters which need not be analyzed further // note that the MinSamples test assumes that all clusters with multiple // character samples have been removed (as above) - Proto = MakeDegenerateProto (Clusterer->SampleSize, Cluster, Statistics, - Config->ProtoStyle, - (inT32) (Config->MinSamples * - Clusterer->NumChar)); + Proto = MakeDegenerateProto( + Clusterer->SampleSize, Cluster, Statistics, Config->ProtoStyle, + (inT32) (Config->MinSamples * Clusterer->NumChar)); if (Proto != NULL) { FreeStatistics(Statistics); - return (Proto); + return Proto; } // check to ensure that all dimensions are independent - if (!Independent (Clusterer->ParamDesc, Clusterer->SampleSize, - Statistics->CoVariance, Config->Independence)) { + if (!Independent(Clusterer->ParamDesc, Clusterer->SampleSize, + Statistics->CoVariance, Config->Independence)) { FreeStatistics(Statistics); - return (NULL); + return NULL; } if (HOTELLING && Config->ProtoStyle == elliptical) { @@ -1047,34 +1007,35 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, } // create a histogram data structure used to evaluate distributions - Buckets = GetBuckets (normal, Cluster->SampleCount, Config->Confidence); + Buckets = GetBuckets(Clusterer, normal, Cluster->SampleCount, + Config->Confidence); // create a prototype based on the statistics and test it switch (Config->ProtoStyle) { case spherical: - Proto = MakeSphericalProto (Clusterer, Cluster, Statistics, Buckets); + Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets); break; case elliptical: - Proto = MakeEllipticalProto (Clusterer, Cluster, Statistics, Buckets); + Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets); break; case mixed: - Proto = MakeMixedProto (Clusterer, Cluster, Statistics, Buckets, - Config->Confidence); + Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets, + Config->Confidence); break; case automatic: - Proto = MakeSphericalProto (Clusterer, Cluster, Statistics, Buckets); + Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets); if (Proto != NULL) break; - Proto = MakeEllipticalProto (Clusterer, Cluster, Statistics, Buckets); + Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets); if (Proto != NULL) break; - Proto = MakeMixedProto (Clusterer, Cluster, Statistics, Buckets, - Config->Confidence); + Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets, + Config->Confidence); break; } - FreeBuckets(Buckets); + FreeBuckets(Clusterer, Buckets); FreeStatistics(Statistics); - return (Proto); + return Proto; } // MakePrototype @@ -1084,7 +1045,6 @@ Parameters: N number of dimensions Statistics statistical info about cluster Style type of prototype to be generated MinSamples minimum number of samples in a cluster -Globals: None Operation: This routine checks for clusters which are degenerate and therefore cannot be analyzed in a statistically valid way. A cluster is defined as degenerate if it does not have at @@ -1134,7 +1094,6 @@ Parameters: Clusterer data struct containing samples being clustered Config provides the magic number of samples that make a good cluster Cluster cluster to be made into an elliptical prototype Statistics statistical info about cluster -Globals: None Operation: This routine tests the specified cluster to see if ** * there is a statistically significant difference between * the sub-clusters that would be made if the cluster were to @@ -1242,7 +1201,6 @@ Parameters: Clusterer data struct containing samples being clustered Cluster cluster to be made into a spherical prototype Statistics statistical info about cluster Buckets histogram struct used to analyze distribution -Globals: None Operation: This routine tests the specified cluster to see if it can be approximated by a spherical normal distribution. If it can be, then a new prototype is formed and returned to the @@ -1281,7 +1239,6 @@ Parameters: Clusterer data struct containing samples being clustered Cluster cluster to be made into an elliptical prototype Statistics statistical info about cluster Buckets histogram struct used to analyze distribution -Globals: None Operation: This routine tests the specified cluster to see if it can be approximated by an elliptical normal distribution. If it can be, then a new prototype is formed and returned to the @@ -1317,12 +1274,12 @@ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, /** MakeMixedProto *********************************************************** -Parameters: Clusterer data struct containing samples being clustered +Parameters: + Clusterer data struct containing samples being clustered Cluster cluster to be made into a prototype Statistics statistical info about cluster NormalBuckets histogram struct used to analyze distribution Confidence confidence level for alternate distributions -Globals: None Operation: This routine tests each dimension of the specified cluster to see what distribution would best approximate that dimension. Each dimension is compared to the following distributions @@ -1360,7 +1317,7 @@ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, if (RandomBuckets == NULL) RandomBuckets = - GetBuckets (D_random, Cluster->SampleCount, Confidence); + GetBuckets(Clusterer, D_random, Cluster->SampleCount, Confidence); MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i])); FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), Proto->Mean[i], Proto->Variance.Elliptical[i]); @@ -1369,7 +1326,7 @@ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, if (UniformBuckets == NULL) UniformBuckets = - GetBuckets (uniform, Cluster->SampleCount, Confidence); + GetBuckets(Clusterer, uniform, Cluster->SampleCount, Confidence); MakeDimUniform(i, Proto, Statistics); FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), Proto->Mean[i], Proto->Variance.Elliptical[i]); @@ -1383,9 +1340,9 @@ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, Proto = NULL; } if (UniformBuckets != NULL) - FreeBuckets(UniformBuckets); + FreeBuckets(Clusterer, UniformBuckets); if (RandomBuckets != NULL) - FreeBuckets(RandomBuckets); + FreeBuckets(Clusterer, RandomBuckets); return (Proto); } // MakeMixedProto @@ -1394,7 +1351,6 @@ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, Parameters: i index of dimension to be changed Proto prototype whose dimension is to be altered ParamDesc description of specified dimension -Globals: None Operation: This routine alters the ith dimension of the specified mixed prototype to be D_random. Return: None @@ -1420,7 +1376,6 @@ void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { Parameters: i index of dimension to be changed Proto prototype whose dimension is to be altered Statistics statistical info about prototype -Globals: None Operation: This routine alters the ith dimension of the specified mixed prototype to be uniform. Return: None @@ -1451,7 +1406,6 @@ void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) { Parameters: N number of dimensions ParamDesc array of dimension descriptions Cluster cluster whose stats are to be computed -Globals: None Operation: This routine searches the cluster tree for all leaf nodes which are samples in the specified cluster. It computes a full covariance matrix for these samples as well as @@ -1544,7 +1498,6 @@ ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { Parameters: N number of dimensions Cluster cluster to be made into a spherical prototype Statistics statistical info about samples in cluster -Globals: None Operation: This routine creates a spherical prototype data structure to approximate the samples in the specified cluster. Spherical prototypes have a single variance which is @@ -1580,7 +1533,6 @@ PROTOTYPE *NewSphericalProto(uinT16 N, Parameters: N number of dimensions Cluster cluster to be made into an elliptical prototype Statistics statistical info about samples in cluster -Globals: None Operation: This routine creates an elliptical prototype data structure to approximate the samples in the specified cluster. Elliptical prototypes have a variance for each dimension. @@ -1623,7 +1575,6 @@ PROTOTYPE *NewEllipticalProto(inT16 N, Parameters: N number of dimensions Cluster cluster to be made into a mixed prototype Statistics statistical info about samples in cluster -Globals: None Operation: This routine creates a mixed prototype data structure to approximate the samples in the specified cluster. Mixed prototypes can have different distributions for @@ -1653,7 +1604,6 @@ PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics) { /** NewSimpleProto *********************************************************** Parameters: N number of dimensions Cluster cluster to be made into a prototype -Globals: None Operation: This routine allocates memory to hold a simple prototype data structure, i.e. one without independent distributions and variances for each dimension. @@ -1686,7 +1636,6 @@ Parameters: ParamDesc descriptions of each feature space dimension N number of dimensions CoVariance ptr to a covariance matrix Independence max off-diagonal correlation coefficient -Globals: None Operation: This routine returns TRUE if the specified covariance matrix indicates that all N dimensions are independent of one another. One dimension is judged to be independent of @@ -1735,10 +1684,11 @@ inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) { /** GetBuckets ************************************************************** -Parameters: Distribution type of probability distribution to test for + Parameters: + Clusterer which keeps a bucket_cache for us. + Distribution type of probability distribution to test for SampleCount number of samples that are available Confidence probability of a Type I error -Globals: none Operation: This routine returns a histogram data structure which can be used by other routines to place samples into histogram buckets, and then apply a goodness of fit test to the @@ -1751,43 +1701,45 @@ Return: Bucket data structure Exceptions: none History: Thu Aug 3 12:58:10 1989, DSJ, Created. *****************************************************************************/ -BUCKETS *GetBuckets(DISTRIBUTION Distribution, +BUCKETS *GetBuckets(CLUSTERER* clusterer, + DISTRIBUTION Distribution, uinT32 SampleCount, FLOAT64 Confidence) { - uinT16 NumberOfBuckets; - BUCKETS *Buckets; - // search for an old bucket structure with the same number of buckets - NumberOfBuckets = OptimumNumberOfBuckets (SampleCount); - Buckets = (BUCKETS *) first_node (search (OldBuckets[(int) Distribution], - &NumberOfBuckets, NumBucketsMatch)); + LIST *bucket_cache = clusterer->bucket_cache; + uinT16 NumberOfBuckets = OptimumNumberOfBuckets(SampleCount); + BUCKETS *Buckets = (BUCKETS *) first_node(search( + bucket_cache[(int)Distribution], &NumberOfBuckets, + NumBucketsMatch)); // if a matching bucket structure is found, delete it from the list if (Buckets != NULL) { - OldBuckets[(int) Distribution] = - delete_d (OldBuckets[(int) Distribution], Buckets, ListEntryMatch); + bucket_cache[(int) Distribution] = + delete_d(bucket_cache[(int) Distribution], Buckets, ListEntryMatch); if (SampleCount != Buckets->SampleCount) AdjustBuckets(Buckets, SampleCount); if (Confidence != Buckets->Confidence) { Buckets->Confidence = Confidence; - Buckets->ChiSquared = ComputeChiSquared - (DegreesOfFreedom (Distribution, Buckets->NumberOfBuckets), - Confidence); + Buckets->ChiSquared = ComputeChiSquared( + DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), + Confidence); } InitBuckets(Buckets); + } else { + // otherwise create a new structure + Buckets = MakeBuckets(Distribution, SampleCount, Confidence); } - else // otherwise create a new structure - Buckets = MakeBuckets (Distribution, SampleCount, Confidence); - return (Buckets); + return Buckets; } // GetBuckets /** Makebuckets ************************************************************* -Parameters: Distribution type of probability distribution to test for +Parameters: + Distribution type of probability distribution to test for SampleCount number of samples that are available Confidence probability of a Type I error -Globals: None -Operation: This routine creates a histogram data structure which can +Operation: + This routine creates a histogram data structure which can be used by other routines to place samples into histogram buckets, and then apply a goodness of fit test to the histogram data to determine if the samples belong to the @@ -1804,7 +1756,7 @@ History: 6/4/89, DSJ, Created. BUCKETS *MakeBuckets(DISTRIBUTION Distribution, uinT32 SampleCount, FLOAT64 Confidence) { - static DENSITYFUNC DensityFunction[] = + const DENSITYFUNC DensityFunction[] = { NormalDensity, UniformDensity, UniformDensity }; int i, j; BUCKETS *Buckets; @@ -1818,14 +1770,14 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, BOOL8 Symmetrical; // allocate memory needed for data structure - Buckets = (BUCKETS *) Emalloc (sizeof (BUCKETS)); + Buckets = (BUCKETS *) Emalloc(sizeof(BUCKETS)); Buckets->NumberOfBuckets = OptimumNumberOfBuckets (SampleCount); Buckets->SampleCount = SampleCount; Buckets->Confidence = Confidence; Buckets->Count = - (uinT32 *) Emalloc (Buckets->NumberOfBuckets * sizeof (uinT32)); + (uinT32 *) Emalloc(Buckets->NumberOfBuckets * sizeof (uinT32)); Buckets->ExpectedCount = - (FLOAT32 *) Emalloc (Buckets->NumberOfBuckets * sizeof (FLOAT32)); + (FLOAT32 *) Emalloc(Buckets->NumberOfBuckets * sizeof (FLOAT32)); // initialize simple fields Buckets->Distribution = Distribution; @@ -1836,8 +1788,8 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, // all currently defined distributions are symmetrical Symmetrical = TRUE; - Buckets->ChiSquared = ComputeChiSquared - (DegreesOfFreedom (Distribution, Buckets->NumberOfBuckets), Confidence); + Buckets->ChiSquared = ComputeChiSquared( + DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), Confidence); if (Symmetrical) { // allocate buckets so that all have approx. equal probability @@ -1874,13 +1826,13 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, // copy upper half of distribution to lower half for (i = 0, j = BUCKETTABLESIZE - 1; i < j; i++, j--) Buckets->Bucket[i] = - Mirror (Buckets->Bucket[j], Buckets->NumberOfBuckets); + Mirror(Buckets->Bucket[j], Buckets->NumberOfBuckets); // copy upper half of expected counts to lower half for (i = 0, j = Buckets->NumberOfBuckets - 1; i <= j; i++, j--) Buckets->ExpectedCount[i] += Buckets->ExpectedCount[j]; } - return (Buckets); + return Buckets; } // MakeBuckets @@ -1889,10 +1841,7 @@ uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) { /* ** Parameters: ** SampleCount number of samples to be tested - ** Globals: - ** CountTable lookup table for number of samples - ** BucketsTable lookup table for necessary number of buckets - ** Operation: + ** Operation: ** This routine computes the optimum number of histogram ** buckets that should be used in a chi-squared goodness of ** fit test for the specified number of samples. The optimum @@ -1912,18 +1861,18 @@ uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) { uinT8 Last, Next; FLOAT32 Slope; - if (SampleCount < CountTable[0]) - return (BucketsTable[0]); + if (SampleCount < kCountTable[0]) + return kBucketsTable[0]; for (Last = 0, Next = 1; Next < LOOKUPTABLESIZE; Last++, Next++) { - if (SampleCount <= CountTable[Next]) { - Slope = (FLOAT32) (BucketsTable[Next] - BucketsTable[Last]) / - (FLOAT32) (CountTable[Next] - CountTable[Last]); - return ((uinT16) (BucketsTable[Last] + - Slope * (SampleCount - CountTable[Last]))); + if (SampleCount <= kCountTable[Next]) { + Slope = (FLOAT32) (kBucketsTable[Next] - kBucketsTable[Last]) / + (FLOAT32) (kCountTable[Next] - kCountTable[Last]); + return ((uinT16) (kBucketsTable[Last] + + Slope * (SampleCount - kCountTable[Last]))); } } - return (BucketsTable[Last]); + return kBucketsTable[Last]; } // OptimumNumberOfBuckets @@ -1934,7 +1883,6 @@ ComputeChiSquared (uinT16 DegreesOfFreedom, FLOAT64 Alpha) ** Parameters: ** DegreesOfFreedom determines shape of distribution ** Alpha probability of right tail - ** Globals: none ** Operation: ** This routine computes the chi-squared value which will ** leave a cumulative probability of Alpha in the right tail @@ -1961,10 +1909,7 @@ ComputeChiSquared (uinT16 DegreesOfFreedom, FLOAT64 Alpha) // limit the minimum alpha that can be used - if alpha is too small // it may not be possible to compute chi-squared. - if (Alpha < MINALPHA) - Alpha = MINALPHA; - if (Alpha > 1.0) - Alpha = 1.0; + Alpha = ClipToRange(Alpha, MINALPHA, 1.0); if (Odd (DegreesOfFreedom)) DegreesOfFreedom++; @@ -1998,13 +1943,13 @@ FLOAT64 NormalDensity(inT32 x) { ** Parameters: ** x number to compute the normal probability density for ** Globals: - ** NormalMean mean of a discrete normal distribution - ** NormalVariance variance of a discrete normal distribution - ** NormalMagnitude magnitude of a discrete normal distribution + ** kNormalMean mean of a discrete normal distribution + ** kNormalVariance variance of a discrete normal distribution + ** kNormalMagnitude magnitude of a discrete normal distribution ** Operation: ** This routine computes the probability density function ** of a discrete normal distribution defined by the global - ** variables NormalMean, NormalVariance, and NormalMagnitude. + ** variables kNormalMean, kNormalVariance, and kNormalMagnitude. ** Normal magnitude could, of course, be computed in terms of ** the normal variance but it is precomputed for efficiency. ** Return: @@ -2016,9 +1961,8 @@ FLOAT64 NormalDensity(inT32 x) { */ FLOAT64 Distance; - Distance = x - NormalMean; - return (NormalMagnitude * - exp (-0.5 * Distance * Distance / NormalVariance)); + Distance = x - kNormalMean; + return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance); } // NormalDensity @@ -2027,8 +1971,6 @@ FLOAT64 UniformDensity(inT32 x) { /* ** Parameters: ** x number to compute the uniform probability density for - ** Globals: - ** BUCKETTABLESIZE determines range of distribution ** Operation: ** This routine computes the probability density function ** of a uniform distribution at the specified point. The @@ -2043,9 +1985,9 @@ FLOAT64 UniformDensity(inT32 x) { static FLOAT64 UniformDistributionDensity = (FLOAT64) 1.0 / BUCKETTABLESIZE; if ((x >= 0.0) && (x <= BUCKETTABLESIZE)) - return (UniformDistributionDensity); + return UniformDistributionDensity; else - return ((FLOAT64) 0.0); + return (FLOAT64) 0.0; } // UniformDensity @@ -2056,8 +1998,6 @@ FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) { ** f1 value of function at x1 ** f2 value of function at x2 ** Dx x2 - x1 (should always be positive) - ** Globals: - ** None ** Operation: ** This routine computes a trapezoidal approximation to the ** integral of a function over a small delta in x. @@ -2068,7 +2008,7 @@ FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) { ** History: ** 6/5/89, DSJ, Created. */ - return ((f1 + f2) * Dx / 2.0); + return (f1 + f2) * Dx / 2.0; } // Integral @@ -2087,8 +2027,6 @@ void FillBuckets(BUCKETS *Buckets, ** ParamDesc description of the dimension ** Mean "mean" of the distribution ** StdDev "standard deviation" of the distribution - ** Globals: - ** None ** Operation: ** This routine counts the number of cluster samples which ** fall within the various histogram buckets in Buckets. Only @@ -2173,14 +2111,10 @@ uinT16 NormalBucket(PARAM_DESC *ParamDesc, ** x value to be normalized ** Mean mean of normal distribution ** StdDev standard deviation of normal distribution - ** Globals: - ** NormalMean mean of discrete normal distribution - ** NormalStdDev standard deviation of discrete normal dist. - ** BUCKETTABLESIZE range of the discrete distribution ** Operation: ** This routine determines which bucket x falls into in the - ** discrete normal distribution defined by NormalMean - ** and NormalStdDev. x values which exceed the range of + ** discrete normal distribution defined by kNormalMean + ** and kNormalStdDev. x values which exceed the range of ** the discrete distribution are clipped. ** Return: ** Bucket number into which x falls @@ -2199,12 +2133,12 @@ uinT16 NormalBucket(PARAM_DESC *ParamDesc, x += ParamDesc->Range; } - X = ((x - Mean) / StdDev) * NormalStdDev + NormalMean; + X = ((x - Mean) / StdDev) * kNormalStdDev + kNormalMean; if (X < 0) - return ((uinT16) 0); + return 0; if (X > BUCKETTABLESIZE - 1) return ((uinT16) (BUCKETTABLESIZE - 1)); - return ((uinT16) floor ((FLOAT64) X)); + return (uinT16) floor((FLOAT64) X); } // NormalBucket @@ -2219,8 +2153,6 @@ uinT16 UniformBucket(PARAM_DESC *ParamDesc, ** x value to be normalized ** Mean center of range of uniform distribution ** StdDev 1/2 the range of the uniform distribution - ** Globals: - ** BUCKETTABLESIZE range of the discrete distribution ** Operation: ** This routine determines which bucket x falls into in the ** discrete uniform distribution defined by @@ -2245,10 +2177,10 @@ uinT16 UniformBucket(PARAM_DESC *ParamDesc, X = ((x - Mean) / (2 * StdDev) * BUCKETTABLESIZE + BUCKETTABLESIZE / 2.0); if (X < 0) - return ((uinT16) 0); + return 0; if (X > BUCKETTABLESIZE - 1) - return ((uinT16) (BUCKETTABLESIZE - 1)); - return ((uinT16) floor ((FLOAT64) X)); + return (uinT16) (BUCKETTABLESIZE - 1); + return (uinT16) floor((FLOAT64) X); } // UniformBucket @@ -2257,8 +2189,6 @@ BOOL8 DistributionOK(BUCKETS *Buckets) { /* ** Parameters: ** Buckets histogram data to perform chi-square test on - ** Globals: - ** None ** Operation: ** This routine performs a chi-square goodness of fit test ** on the histogram data in the Buckets data structure. TRUE @@ -2287,9 +2217,9 @@ BOOL8 DistributionOK(BUCKETS *Buckets) { // test to see if the difference is more than expected if (TotalDifference > Buckets->ChiSquared) - return (FALSE); + return FALSE; else - return (TRUE); + return TRUE; } // DistributionOK @@ -2298,8 +2228,6 @@ void FreeStatistics(STATISTICS *Statistics) { /* ** Parameters: ** Statistics pointer to data structure to be freed - ** Globals: - ** None ** Operation: ** This routine frees the memory used by the statistics ** data structure. @@ -2318,27 +2246,23 @@ void FreeStatistics(STATISTICS *Statistics) { //--------------------------------------------------------------------------- -void FreeBuckets(BUCKETS *Buckets) { +void FreeBuckets(CLUSTERER* clusterer, BUCKETS *buckets) { /* - ** Parameters: - ** Buckets pointer to data structure to be freed - ** Globals: none - ** Operation: - ** This routine places the specified histogram data structure - ** at the front of a list of histograms so that it can be - ** reused later if necessary. A separate list is maintained - ** for each different type of distribution. - ** Return: none - ** Exceptions: none - ** History: 6/5/89, DSJ, Created. + ** Parameters: + ** clusterer->bucket_cache + ** distribution-indexed cache of old bucket structures. + ** buckets pointer to data structure to be freed + ** Operation: + ** This routine places the specified histogram data structure + ** at the front of a list of histograms so that it can be reused + ** later if necessary. A separate list is maintained for each + ** different type of distribution. */ - int Dist; - - if (Buckets != NULL) { - Dist = (int) Buckets->Distribution; - OldBuckets[Dist] = (LIST) push (OldBuckets[Dist], Buckets); + LIST *bucket_cache = clusterer->bucket_cache; + if (buckets != NULL) { + int dist = (int)buckets->Distribution; + bucket_cache[dist] = (LIST) push(bucket_cache[dist], buckets); } - } // FreeBuckets @@ -2347,8 +2271,6 @@ void FreeCluster(CLUSTER *Cluster) { /* ** Parameters: ** Cluster pointer to cluster to be freed - ** Globals: - ** None ** Operation: ** This routine frees the memory consumed by the specified ** cluster and all of its subclusters. This is done by @@ -2374,7 +2296,6 @@ uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets) { ** Parameters: ** Distribution distribution being tested for ** HistogramBuckets number of buckets in chi-square test - ** Globals: none ** Operation: ** This routine computes the degrees of freedom that should ** be used in a chi-squared test with the specified number of @@ -2400,13 +2321,12 @@ uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets) { //--------------------------------------------------------------------------- -int NumBucketsMatch(void *arg1, //BUCKETS *Histogram, - void *arg2) { //uinT16 *DesiredNumberOfBuckets) +int NumBucketsMatch(void *arg1, // BUCKETS *Histogram, + void *arg2) { // uinT16 *DesiredNumberOfBuckets) /* ** Parameters: ** Histogram current histogram being tested for a match ** DesiredNumberOfBuckets match key - ** Globals: none ** Operation: ** This routine is used to search a list of histogram data ** structures to find one with the specified number of @@ -2428,7 +2348,6 @@ int ListEntryMatch(void *arg1, //ListNode void *arg2) { //Key /* ** Parameters: none - ** Globals: none ** Operation: ** This routine is used to search a list for a list node ** whose contents match Key. It is called by the list @@ -2448,7 +2367,6 @@ void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount) { ** Parameters: ** Buckets histogram data structure to adjust ** NewSampleCount new sample count to adjust to - ** Globals: none ** Operation: ** This routine multiplies each ExpectedCount histogram entry ** by NewSampleCount/OldSampleCount so that the histogram @@ -2477,7 +2395,6 @@ void InitBuckets(BUCKETS *Buckets) { /* ** Parameters: ** Buckets histogram data structure to init - ** Globals: none ** Operation: ** This routine sets the bucket counts in the specified histogram ** to zero. @@ -2501,7 +2418,6 @@ int AlphaMatch(void *arg1, //CHISTRUCT *ChiStruct ** Parameters: ** ChiStruct chi-squared struct being tested for a match ** SearchKey chi-squared struct that is the search key - ** Globals: none ** Operation: ** This routine is used to search a list of structures which ** hold pre-computed chi-squared values for a chi-squared @@ -2526,7 +2442,6 @@ CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha) { ** Parameters: ** DegreesOfFreedom degrees of freedom for new chi value ** Alpha confidence level for new chi value - ** Globals: none ** Operation: ** This routine allocates a new data structure which is used ** to hold a chi-squared value along with its associated @@ -2555,7 +2470,6 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy) ** FunctionParams arbitrary data to pass to function ** InitialGuess point to start solution search at ** Accuracy maximum allowed error - ** Globals: none ** Operation: ** This routine attempts to find an x value at which Function ** goes to zero (i.e. a root of the function ). It will only @@ -2617,7 +2531,6 @@ FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) { ** Parameters: ** ChiParams contains degrees of freedom and alpha ** x value of chi-squared to evaluate - ** Globals: none ** Operation: ** This routine computes the area under a chi density curve ** from 0 to x, minus the desired area under the curve. The @@ -2665,7 +2578,6 @@ CLUSTER * Cluster, FLOAT32 MaxIllegal) ** Cluster cluster containing samples to be tested ** MaxIllegal max percentage of samples allowed to have ** more than 1 feature in the cluster - ** Globals: none ** Operation: ** This routine looks at all samples in the specified cluster. ** It computes a running estimate of the percentage of the diff --git a/classify/cluster.h b/classify/cluster.h index 4843e32df9..94f9d716cb 100644 --- a/classify/cluster.h +++ b/classify/cluster.h @@ -24,8 +24,7 @@ /*---------------------------------------------------------------------- Types ----------------------------------------------------------------------*/ -typedef struct sample -{ +typedef struct sample { unsigned Clustered:1; // TRUE if included in a higher cluster unsigned Prototype:1; // TRUE if cluster represented by a proto unsigned SampleCount:30; // number of samples in this cluster @@ -33,22 +32,15 @@ typedef struct sample struct sample *Right; // ptr to right sub-cluster inT32 CharID; // identifier of char sample came from FLOAT32 Mean[1]; // mean of cluster - SampleSize floats -} - - -CLUSTER; +} CLUSTER; typedef CLUSTER SAMPLE; // can refer to as either sample or cluster typedef enum { spherical, elliptical, mixed, automatic -} - +} PROTOSTYLE; -PROTOSTYLE; - -typedef struct // parameters to control clustering -{ +typedef struct { // parameters to control clustering PROTOSTYLE ProtoStyle; // specifies types of protos to be made FLOAT32 MinSamples; // min # of samples per proto - % of total FLOAT32 MaxIllegal; // max percentage of samples in a cluster which have @@ -56,30 +48,18 @@ typedef struct // parameters to control clustering FLOAT32 Independence; // desired independence between dimensions FLOAT64 Confidence; // desired confidence in prototypes created int MagicSamples; // Ideal number of samples in a cluster. -} - - -CLUSTERCONFIG; +} CLUSTERCONFIG; typedef enum { normal, uniform, D_random -} - +} DISTRIBUTION; -DISTRIBUTION; - -typedef union -{ +typedef union { FLOAT32 Spherical; FLOAT32 *Elliptical; +} FLOATUNION; -} - - -FLOATUNION; - -typedef struct -{ +typedef struct { unsigned Significant:1; // TRUE if prototype is significant unsigned Merged:1; // Merged after clustering so do not output // but kept for display purposes. If it has no @@ -96,13 +76,9 @@ typedef struct FLOATUNION Variance; // prototype variance FLOATUNION Magnitude; // magnitude of density function FLOATUNION Weight; // weight of density function -} - - -PROTOTYPE; +} PROTOTYPE; -typedef struct -{ +typedef struct { inT16 SampleSize; // number of parameters per sample PARAM_DESC *ParamDesc; // description of each parameter inT32 NumberOfSamples; // total number of samples being clustered @@ -110,28 +86,22 @@ typedef struct CLUSTER *Root; // ptr to root cluster of cluster tree LIST ProtoList; // list of prototypes inT32 NumChar; // # of characters represented by samples -} + LIST bucket_cache[3]; // cache of reusable histograms by distribution type +} CLUSTERER; - -CLUSTERER; - -typedef struct -{ +typedef struct { inT32 NumSamples; // number of samples in list inT32 MaxNumSamples; // maximum size of list SAMPLE *Sample[1]; // array of ptrs to sample data structures -} - - -SAMPLELIST; +} SAMPLELIST; // low level cluster tree analysis routines. -#define InitSampleSearch(S,C) (((C)==NULL)?(S=NIL):(S=push(NIL,(C)))) +#define InitSampleSearch(S,C) (((C)==NULL)?(S=NIL_LIST):(S=push(NIL_LIST,(C)))) /*-------------------------------------------------------------------------- Public Function Prototypes --------------------------------------------------------------------------*/ -CLUSTERER *MakeClusterer (inT16 SampleSize, PARAM_DESC ParamDesc[]); +CLUSTERER *MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]); SAMPLE *MakeSample (CLUSTERER * Clusterer, FLOAT32 Feature[], inT32 CharID); @@ -141,7 +111,7 @@ void FreeClusterer(CLUSTERER *Clusterer); void FreeProtoList(LIST *ProtoList); -void FreePrototype(void *arg); //PROTOTYPE *Prototype); +void FreePrototype(void *arg); // PROTOTYPE *Prototype); CLUSTER *NextSample(LIST *SearchState); diff --git a/classify/clusttool.cpp b/classify/clusttool.cpp index 2ccc383150..8bb4313b4d 100644 --- a/classify/clusttool.cpp +++ b/classify/clusttool.cpp @@ -87,13 +87,8 @@ PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) { ParamDesc[i].NonEssential = FALSE; else ParamDesc[i].NonEssential = TRUE; -#ifndef _MSC_VER - if (tess_fscanf (File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != - 2) -#else if (fscanf (File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != 2) -#endif DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification"); ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; ParamDesc[i].HalfRange = ParamDesc[i].Range / 2; @@ -294,11 +289,7 @@ ReadNFloats (FILE * File, uinT16 N, FLOAT32 Buffer[]) { Buffer = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); for (i = 0; i < N; i++) { -#ifndef _MSC_VER - NumFloatsRead = tess_fscanf (File, "%f", &(Buffer[i])); -#else NumFloatsRead = fscanf (File, "%f", &(Buffer[i])); -#endif if (NumFloatsRead != 1) { if ((NumFloatsRead == EOF) && (i == 0)) return (NULL); diff --git a/classify/extract.cpp b/classify/extract.cpp index 23678a7394..8075d0342b 100644 --- a/classify/extract.cpp +++ b/classify/extract.cpp @@ -20,7 +20,6 @@ -----------------------------------------------------------------------------*/ #include "extract.h" #include "flexfx.h" -#include "funcdefs.h" #include "danerror.h" typedef CHAR_FEATURES (*CF_FUNC) (); @@ -50,15 +49,17 @@ int CurrentFx = DEFAULT_FX; * extraction. The caller can extract any type of features * from a blob without understanding any lower level details. * + * @param FeatureDefs definitions of feature types/extractors + * @param denorm Normalize/denormalize to access original image * @param Blob blob to extract features from - * @param LineStats statistics about line blob is in * * @return The character features extracted from Blob. * @note Exceptions: none * @note History: Sun Jan 21 10:07:28 1990, DSJ, Created. */ -CHAR_DESC ExtractBlobFeatures(TBLOB *Blob, LINE_STATS *LineStats) { - return (ExtractFlexFeatures (Blob, LineStats)); +CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs, + const DENORM& denorm, TBLOB *Blob) { + return (ExtractFlexFeatures(FeatureDefs, Blob, denorm)); } /* ExtractBlobFeatures */ /*----------------------------------------------------------------------------- diff --git a/classify/extract.h b/classify/extract.h index 751c90cc1d..8443931579 100644 --- a/classify/extract.h +++ b/classify/extract.h @@ -18,14 +18,16 @@ #ifndef EXTRACT_H #define EXTRACT_H -#include "fxdefs.h" #include "featdefs.h" #include +class DENORM; + /*----------------------------------------------------------------------------- Public Function Prototypes -----------------------------------------------------------------------------*/ -CHAR_DESC ExtractBlobFeatures(TBLOB *Blob, LINE_STATS *LineStats); +CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs, + const DENORM& denorm, TBLOB *Blob); /*--------------------------------------------------------------------------- Private Function Prototypes diff --git a/classify/featdefs.cpp b/classify/featdefs.cpp index 514a4273b8..fc8a12e1d0 100644 --- a/classify/featdefs.cpp +++ b/classify/featdefs.cpp @@ -64,8 +64,8 @@ DefineFeature (PicoFeatDesc, 2, 1, 1, MAX_UINT8, "Pico", "pf", PicoFeatParams) StartParamDesc (CharNormParams) DefineParam (0, 0, -0.25, 0.75) DefineParam (0, 1, 0.0, 1.0) -DefineParam (0, 1, 0.0, 1.0) -DefineParam (0, 1, 0.0, 1.0) +DefineParam (0, 0, 0.0, 1.0) +DefineParam (0, 0, 0.0, 1.0) EndParamDesc /* now define the feature type itself (see features.h for info about each parameter).*/ @@ -83,22 +83,23 @@ EndParamDesc DefineFeature (OutlineFeatDesc, 3, 1, 1, MAX_OUTLINE_FEATURES, "Outline", "of", OutlineFeatParams) -/*----------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -FEATURE_DEFS_STRUCT FeatureDefs = { - NUM_FEATURE_TYPES, - { - &MicroFeatureDesc, - &PicoFeatDesc, - &OutlineFeatDesc, - &CharNormDesc - } +static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = { + &MicroFeatureDesc, + &PicoFeatDesc, + &OutlineFeatDesc, + &CharNormDesc }; /*----------------------------------------------------------------------------- Public Code -----------------------------------------------------------------------------*/ +void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) { + featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES; + for (int i = 0; i < NUM_FEATURE_TYPES; ++i) { + featuredefs->FeatureDesc[i] = DescDefs[i]; + } +} + /*---------------------------------------------------------------------------*/ /** * Release the memory consumed by the specified character @@ -135,7 +136,7 @@ void FreeCharDescription(CHAR_DESC CharDesc) { * @note Exceptions: none * @note History: Wed May 23 15:27:10 1990, DSJ, Created. */ -CHAR_DESC NewCharDescription() { +CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) { CHAR_DESC CharDesc; int i; @@ -170,7 +171,8 @@ CHAR_DESC NewCharDescription() { * @note Exceptions: none * @note History: Wed May 23 17:21:18 1990, DSJ, Created. */ -void WriteCharDescription(FILE *File, CHAR_DESC CharDesc) { +void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, + FILE *File, CHAR_DESC CharDesc) { int Type; int NumSetsToWrite = 0; @@ -208,7 +210,8 @@ void WriteCharDescription(FILE *File, CHAR_DESC CharDesc) { * - ILLEGAL_NUM_SETS * @note History: Wed May 23 17:32:48 1990, DSJ, Created. */ -CHAR_DESC ReadCharDescription(FILE *File) { +CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, + FILE *File) { int NumSetsToRead; char ShortName[FEAT_NAME_SIZE]; CHAR_DESC CharDesc; @@ -218,10 +221,10 @@ CHAR_DESC ReadCharDescription(FILE *File) { NumSetsToRead < 0 || NumSetsToRead > FeatureDefs.NumFeatureTypes) DoError (ILLEGAL_NUM_SETS, "Illegal number of feature sets"); - CharDesc = NewCharDescription (); + CharDesc = NewCharDescription(FeatureDefs); for (; NumSetsToRead > 0; NumSetsToRead--) { fscanf (File, "%s", ShortName); - Type = ShortNameToFeatureType (ShortName); + Type = ShortNameToFeatureType(FeatureDefs, ShortName); CharDesc->FeatureSets[Type] = ReadFeatureSet (File, FeatureDefs.FeatureDesc[Type]); } @@ -231,7 +234,6 @@ CHAR_DESC ReadCharDescription(FILE *File) { /*---------------------------------------------------------------------------*/ -int ShortNameToFeatureType(const char *ShortName) { /** * Search thru all features currently defined and return * the feature type for the feature with the specified short @@ -246,6 +248,8 @@ int ShortNameToFeatureType(const char *ShortName) { * - ILLEGAL_SHORT_NAME * @note History: Wed May 23 15:36:05 1990, DSJ, Created. */ +int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, + const char *ShortName) { int i; for (i = 0; i < FeatureDefs.NumFeatureTypes; i++) diff --git a/classify/featdefs.h b/classify/featdefs.h index 3f90d2c629..1149bf3736 100644 --- a/classify/featdefs.h +++ b/classify/featdefs.h @@ -35,37 +35,43 @@ feature consists of a number of parameters. All features within a feature set contain the same number of parameters.*/ -typedef struct -{ +struct CHAR_DESC_STRUCT { uinT32 NumFeatureSets; FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]; -} CHAR_DESC_STRUCT; +}; typedef CHAR_DESC_STRUCT *CHAR_DESC; -typedef struct -{ +struct FEATURE_DEFS_STRUCT { uinT32 NumFeatureTypes; - FEATURE_DESC FeatureDesc[NUM_FEATURE_TYPES]; - FEATURE_EXT_STRUCT* FeatureExtractors[NUM_FEATURE_TYPES]; + const FEATURE_DESC_STRUCT* FeatureDesc[NUM_FEATURE_TYPES]; + const FEATURE_EXT_STRUCT* FeatureExtractors[NUM_FEATURE_TYPES]; int FeatureEnabled[NUM_FEATURE_TYPES]; -} FEATURE_DEFS_STRUCT; +}; typedef FEATURE_DEFS_STRUCT *FEATURE_DEFS; /*---------------------------------------------------------------------- Generic functions for manipulating character descriptions ----------------------------------------------------------------------*/ +void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs); + void FreeCharDescription(CHAR_DESC CharDesc); -CHAR_DESC NewCharDescription(); +CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs); -void WriteCharDescription(FILE *File, CHAR_DESC CharDesc); +void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, + FILE *File, CHAR_DESC CharDesc); -CHAR_DESC ReadCharDescription(FILE *File); +CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, + FILE *File); -int ShortNameToFeatureType(const char *ShortName); +int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, + const char *ShortName); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ -extern FEATURE_DEFS_STRUCT FeatureDefs; +extern const FEATURE_DESC_STRUCT MicroFeatureDesc; +extern const FEATURE_DESC_STRUCT PicoFeatDesc; +extern const FEATURE_DESC_STRUCT CharNormDesc; +extern const FEATURE_DESC_STRUCT OutlineFeatDesc; #endif diff --git a/classify/flexfx.cpp b/classify/flexfx.cpp index b54c815aa6..44f975e7e7 100644 --- a/classify/flexfx.cpp +++ b/classify/flexfx.cpp @@ -28,11 +28,12 @@ Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -CHAR_DESC ExtractFlexFeatures(TBLOB *Blob, LINE_STATS *LineStats) { +CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs, + TBLOB *Blob, const DENORM& denorm) { /* ** Parameters: ** Blob blob to extract features from - ** LineStats statistics about text line Blob is on + ** denorm control parameter for feature extractor ** Globals: none ** Operation: Allocate a new character descriptor and fill it in by ** calling all feature extractors which are enabled. @@ -43,13 +44,13 @@ CHAR_DESC ExtractFlexFeatures(TBLOB *Blob, LINE_STATS *LineStats) { int Type; CHAR_DESC CharDesc; - CharDesc = NewCharDescription (); + CharDesc = NewCharDescription(FeatureDefs); for (Type = 0; Type < CharDesc->NumFeatureSets; Type++) if (FeatureDefs.FeatureExtractors[Type] != NULL && FeatureDefs.FeatureExtractors[Type]->Extractor != NULL) { CharDesc->FeatureSets[Type] = - (FeatureDefs.FeatureExtractors[Type])->Extractor(Blob, LineStats); + (FeatureDefs.FeatureExtractors[Type])->Extractor(Blob, denorm); if (CharDesc->FeatureSets[Type] == NULL) { FreeCharDescription(CharDesc); return NULL; diff --git a/classify/flexfx.h b/classify/flexfx.h index 3d9e90dedb..52e45a6a35 100644 --- a/classify/flexfx.h +++ b/classify/flexfx.h @@ -21,13 +21,13 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "fxdefs.h" #include "featdefs.h" #include /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -CHAR_DESC ExtractFlexFeatures(TBLOB *Blob, LINE_STATS *LineStats); +CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs, + TBLOB *Blob, const DENORM& denorm); #endif diff --git a/classify/float2int.cpp b/classify/float2int.cpp index 0098b7a375..8c9c58e9fd 100644 --- a/classify/float2int.cpp +++ b/classify/float2int.cpp @@ -93,7 +93,6 @@ void Classify::ComputeIntCharNormArray( } } /* ComputeIntCharNormArray */ -} // namespace tesseract /*---------------------------------------------------------------------------*/ /** @@ -110,7 +109,8 @@ void Classify::ComputeIntCharNormArray( * @note Exceptions: none * @note History: Wed Feb 20 10:58:45 1991, DSJ, Created. */ -void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) { +void Classify::ComputeIntFeatures(FEATURE_SET Features, + INT_FEATURE_ARRAY IntFeatures) { int Fid; FEATURE Feature; FLOAT32 YShift; @@ -132,3 +132,4 @@ void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) { IntFeatures[Fid].CP_misses = 0; } } /* ComputeIntFeatures */ +} // namespace tesseract diff --git a/classify/float2int.h b/classify/float2int.h index 380c081351..bc422e67ba 100644 --- a/classify/float2int.h +++ b/classify/float2int.h @@ -35,6 +35,4 @@ void ClearCharNormArray(INT_TEMPLATES Templates, CLASS_NORMALIZATION_ARRAY CharNormArray); } // namespace tesseract. -void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures); - #endif diff --git a/classify/fpoint.h b/classify/fpoint.h index 74112761ec..e3fca911aa 100644 --- a/classify/fpoint.h +++ b/classify/fpoint.h @@ -21,7 +21,7 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "general.h" +#include "host.h" #include #include diff --git a/classify/fxdefs.cpp b/classify/fxdefs.cpp index a8c45c3f89..d809c3656f 100644 --- a/classify/fxdefs.cpp +++ b/classify/fxdefs.cpp @@ -25,51 +25,20 @@ /*----------------------------------------------------------------------------- Global Data Definitions and Declarations -----------------------------------------------------------------------------*/ -/** flag to control learn mode vs. classify mode */ -int ExtractMode; - // Definitions of extractors separated from feature definitions. -DefineFeatureExt (MicroFeatureExt, ExtractMicros) -DefineFeatureExt (PicoFeatExt, NULL) -DefineFeatureExt (CharNormExt, ExtractCharNormFeatures) -DefineFeatureExt (OutlineFeatExt, NULL) +const FEATURE_EXT_STRUCT MicroFeatureExt = { ExtractMicros }; +const FEATURE_EXT_STRUCT PicoFeatExt = { NULL }; +const FEATURE_EXT_STRUCT OutlineFeatExt = { NULL }; +const FEATURE_EXT_STRUCT CharNormExt = { ExtractCharNormFeatures }; -FEATURE_EXT_STRUCT* ExtractorDefs[NUM_FEATURE_TYPES] = { +const FEATURE_EXT_STRUCT* ExtractorDefs[NUM_FEATURE_TYPES] = { &MicroFeatureExt, &PicoFeatExt, &OutlineFeatExt, &CharNormExt }; - -/*----------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -void SetupExtractors() { +void SetupExtractors(FEATURE_DEFS_STRUCT *FeatureDefs) { for (int i = 0; i < NUM_FEATURE_TYPES; ++i) - FeatureDefs.FeatureExtractors[i] = ExtractorDefs[i]; + FeatureDefs->FeatureExtractors[i] = ExtractorDefs[i]; } - -/** - * This routine copies the relavent fields from the - * Row struct to the LineStats struct. - * - * Globals: - * - none - * - * @param Row text row to get line statistics from - * @param[out] LineStats place to put line statistics - * - * @note History: Mon Mar 11 10:38:43 1991, DSJ, Created. - */ -void GetLineStatsFromRow(TEXTROW *Row, LINE_STATS *LineStats) { - LineStats->Baseline = &(Row->baseline); - LineStats->XHeightLine = &(Row->xheight); - LineStats->xheight = Row->lineheight; - LineStats->AscRise = Row->ascrise; - LineStats->DescDrop = Row->descdrop; - LineStats->TextRow = Row; /* kludge - only needed by fx for */ - /* fast matcher - remove later */ - -} /* GetLineStatsFromRow */ diff --git a/classify/fxdefs.h b/classify/fxdefs.h index 41a624087b..67f1b2b3e9 100644 --- a/classify/fxdefs.h +++ b/classify/fxdefs.h @@ -18,76 +18,8 @@ #ifndef FXDEFS_H #define FXDEFS_H -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "tessclas.h" -#include "general.h" +#include "featdefs.h" -/* define different modes for feature extractor - learning vs. classifying */ -#define LEARN_MODE 0 -#define CLASSIFY_MODE 1 +void SetupExtractors(FEATURE_DEFS_STRUCT *FeatureDefs); -/** define a data structure to hold line statistics. These line statistics - are used to normalize character outlines to a standard size and position - relative to the baseline of the text. */ -typedef struct -{ - SPLINE_SPEC *Baseline; /**< collection of splines describing baseline */ - SPLINE_SPEC *XHeightLine; /**< collection of splines describing x-height */ - FLOAT32 xheight; /**< avg. distance from x-height to baseline */ - FLOAT32 AscRise; /**< avg. distance from ascenders to x-height */ - FLOAT32 DescDrop; /**< avg. distance from baseline to descenders */ - /* always a negative number */ - TEXTROW *TextRow; /**< kludge - only needed by fx for fast matcher */ - /* should be removed later */ -} - - -LINE_STATS; - -/** define a generic character description as a char pointer. In reality, - it will be a pointer to some data structure. Paired feature - extractors/matchers need to agree on the data structure to be used, - however, the high level classifier does not need to know the details - of this data structure. */ -typedef char *CHAR_FEATURES; - -/*----------------------------------------------------------------------------- - Macros ------------------------------------------------------------------------------*/ -/** macro to change and monitor the mode of the feature extractor. - In general, learn mode smears features which would otherwise be discrete - in nature; classify mode does not.*/ -#define SetExtractMode(M) (ExtractMode = (M)) -#define EnterLearnMode (SetExtractMode (LEARN_MODE)) -#define EnterClassifyMode (SetExtractMode (CLASSIFY_MODE)) - -/*---------------------------------------------------------------------------- - Public Function Prototypes ------------------------------------------------------------------------------*/ -void SetupExtractors(); - -void GetLineStatsFromRow(TEXTROW *Row, LINE_STATS *LineStats); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* fxdefs.c -void GetLineStatsFromRow - _ARGS((TEXTROW *Row, - LINE_STATS *LineStats)); - -#undef _ARGS -*/ - -/*----------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -/** flag to control learn mode vs. classify mode */ -extern int ExtractMode; #endif diff --git a/classify/fxid.h b/classify/fxid.h deleted file mode 100644 index 79b3c35475..0000000000 --- a/classify/fxid.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: fxid.h (Formerly fxid.h) - * Description: Feature extractor related includes - * Author: Mark Seaman, OCR Technology - * Created: Thu Oct 19 14:59:51 1989 - * Modified: Thu Jan 31 16:57:07 1991 (Dan Johnson) danj@hpgrlj - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - - *************************************************************************** - ********** A T T E N T I O N ******** - *************************************************************************** - -This module is divided into two sections the declarations for this module -(i.e. the function signatures) are listed in 'fxid.h'. The definitions -(i.e. the actual code and variables) are listed in 'fxid1.c' and 'fxid2.c'. -The appropriate piece of code you need for your application should be -included in your top level program file. - -*/ - -#ifndef FXID_H -#define FXID_H - -#include "extern.h" -#include "const.h" -#include "tessclas.h" -#include "oldlist.h" - -#define FEATURE_MATCHER 1 /**< Use micro-features */ - -#define WO_UNSCALED 0 /**< first square scaled fx */ -#define STATISTICAL_WO 1 /**< new wo */ -#define MICRO_FEATURES 2 /**< microfeature extractor */ -#define WO_SCALED 3 /**< wiseowl scaled to baseline */ -#define MAX_FX 3 /**< no of working fx-ers */ -#define NO_FUNCTION 0 /**< special value for nothing */ - -/* This file contains declarations of the top-level feature -extractor functions as used by the Classify process*/ - -typedef LIST (*LISTFUNC) (); - -//extern FUNCPTR word_matchers[MAX_FX]; - -//extern LISTFUNC blob_matchers[MAX_FX]; - -//extern FUNCPTR feature_learners[MAX_FX]; - -extern char fx_ids[MAX_FX]; /*one-char ids */ - -extern char *fx_names[MAX_FX]; -#endif diff --git a/classify/hideedge.cpp b/classify/hideedge.cpp deleted file mode 100644 index 5bf8e0a986..0000000000 --- a/classify/hideedge.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: hideedge.c (Formerly hideedge.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Apr 30 10:38:29 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - */ - -/* ----------------------------------------------------------------------- - I n c l u d e s ----------------------------------------------------------------------- -*/ - -#include "hideedge.h" diff --git a/classify/hideedge.h b/classify/hideedge.h deleted file mode 100644 index ba2f7b43fe..0000000000 --- a/classify/hideedge.h +++ /dev/null @@ -1,76 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: hideedge.h (Formerly hideedge.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Apr 30 12:49:57 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - */ - -#ifndef HIDEEDGE_H -#define HIDEEDGE_H - -/* ----------------------------------------------------------------------- - I n c l u d e s ----------------------------------------------------------------------- -*/ - -#include "general.h" - -/* ----------------------------------------------------------------------- - M a c r o s ----------------------------------------------------------------------- -*/ - -/********************************************************************** - * is_hidden_edge - * - * Check to see if this edge is a hidden edge. This will prohibit - * feature extraction and display functions on this edge. The - * argument should be of type (EDGEPT*). - **********************************************************************/ - -#define is_hidden_edge(edge) \ -/*(hidden_edges &&*/ (edge->flags[0]) /*) */ - -/********************************************************************** - * hide_edge - * - * Make this edge a hidden edge. This will prohibit feature extraction - * and display functions on this edge. The argument should be of type - * (EDGEPT*). - **********************************************************************/ - -#define hide_edge(edge) \ -/*if (hidden_edges)*/ edge->flags[0] = TRUE - -/********************************************************************** - * reveal_edge - * - * Make this edge a unhidden edge. This will prohibit feature extraction - * and display functions on this edge. The argument should be of type - * (EDGEPT*). - **********************************************************************/ - -#define reveal_edge(edge) \ -/*if (hidden_edges)*/ edge->flags[0] = FALSE -#endif diff --git a/classify/intfx.cpp b/classify/intfx.cpp index b5d1d740dc..0d67b86620 100644 --- a/classify/intfx.cpp +++ b/classify/intfx.cpp @@ -21,6 +21,8 @@ #include "intfx.h" #include "intmatcher.h" #include "const.h" +#include "helpers.h" +#include "ccutil.h" #ifdef __UNIX__ #endif @@ -28,7 +30,7 @@ Private Function Prototypes ----------------------------------------------------------------------------**/ int SaveFeature(); -uinT8 TableLookup(); +uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X); uinT8 MySqrt2(); void ClipRadius(); @@ -49,27 +51,58 @@ INT_VAR(classify_radius_gyr_max_exp, 8, ----------------------------------------------------------------------------**/ #define ATAN_TABLE_SIZE 64 +// Look up table for arc tangent containing: +// atan(0.0) ... atan(ATAN_TABLE_SIZE - 1 / ATAN_TABLE_SIZE) +// The entries are in binary degrees where a full circle is 256 binary degrees. static uinT8 AtanTable[ATAN_TABLE_SIZE]; +// Guards write access to AtanTable so we dont create it more than once. +tesseract::CCUtilMutex atan_table_mutex; + /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ void InitIntegerFX() { - int i; - - for (i = 0; i < ATAN_TABLE_SIZE; i++) - AtanTable[i] = - (uinT8) (atan ((i / (float) ATAN_TABLE_SIZE)) * 128.0 / PI + 0.5); - + static bool atan_table_init = false; + atan_table_mutex.Lock(); + if (!atan_table_init) { + for (int i = 0; i < ATAN_TABLE_SIZE; i++) { + AtanTable[i] = + (uinT8) (atan ((i / (float) ATAN_TABLE_SIZE)) * 128.0 / PI + 0.5); + } + atan_table_init = true; + } + atan_table_mutex.Unlock(); } /*--------------------------------------------------------------------------*/ +// Extract a set of standard-sized features from Blobs and write them out in +// two formats: baseline normalized and character normalized. +// +// We presume the Blobs are already scaled so that x-height=128 units +// +// Standard Features: +// We take all outline segments longer than 7 units and chop them into +// standard-sized segments of approximately 13 = (64 / 5) units. +// When writing these features out, we output their center and angle as +// measured counterclockwise from the vector <-1, 0> +// +// Baseline Normalized Output: +// We center the grapheme by aligning the x-coordinate of its centroid with +// x=0 and subtracting 128 from the y-coordinate. +// +// Character Normalized Output: +// We align the grapheme's centroid at the origin and scale it asymmetrically +// in x and y so that the result is vaguely square. +// int ExtractIntFeat(TBLOB *Blob, + const DENORM& denorm, INT_FEATURE_ARRAY BLFeat, INT_FEATURE_ARRAY CNFeat, - INT_FX_RESULT Results) { + INT_FX_RESULT Results, + inT32 *FeatureOutlineArray) { TESSLINE *OutLine; EDGEPT *Loop, *LoopStart, *Segment; @@ -78,6 +111,10 @@ int ExtractIntFeat(TBLOB *Blob, inT32 Xsum, Ysum; uinT32 Ix, Iy, LengthSum; uinT16 n; + // n - the number of features to extract from a given outline segment. + // We extract features from every outline segment longer than ~6 units. + // We chop these long segments into standard-sized features approximately + // 13 (= 64 / 5) units in length. uinT8 Theta; uinT16 NumBLFeatures, NumCNFeatures; uinT8 RxInv, RyInv; /* x.xxxxxxx * 2^Exp */ @@ -95,7 +132,10 @@ int ExtractIntFeat(TBLOB *Blob, Results->NumBL = 0; Results->NumCN = 0; - /* find Xmean, Ymean */ + // Calculate the centroid (Xmean, Ymean) for the blob. + // We use centroid (instead of center of bounding box or center of smallest + // enclosing circle) so the algorithm will not be too greatly influenced by + // small amounts of information at the edge of a character's bounding box. NumBLFeatures = 0; NumCNFeatures = 0; OutLine = Blob->outlines; @@ -117,10 +157,10 @@ int ExtractIntFeat(TBLOB *Blob, NormY = Loop->pos.y; n = 1; - if (!is_hidden_edge (Segment)) { + if (!Segment->IsHidden()) { DeltaX = NormX - LastX; DeltaY = NormY - LastY; - Length = MySqrt (DeltaX, DeltaY); + Length = MySqrt(DeltaX, DeltaY); n = ((Length << 2) + Length + 32) >> 6; if (n != 0) { Xsum += ((LastX << 1) + DeltaX) * (int) Length; @@ -145,8 +185,17 @@ int ExtractIntFeat(TBLOB *Blob, Results->Xmean = Xmean; Results->Ymean = Ymean; - /* extract Baseline normalized features, */ - /* and find 2nd moments & radius of gyration */ + // Extract Baseline normalized features, + // and find 2nd moments (Ix, Iy) & radius of gyration (Rx, Ry). + // + // Ix = Sum y^2 dA, where: + // Ix: the second moment of area about the axis x + // dA = 1 for our standard-sized piece of outline + // y: the perependicular distance to the x axis + // Rx = sqrt(Ix / A) + // Note: 1 <= Rx <= height of blob / 2 + // Ry = sqrt(Iy / A) + // Note: 1 <= Ry <= width of blob / 2 Ix = 0; Iy = 0; NumBLFeatures = 0; @@ -166,22 +215,27 @@ int ExtractIntFeat(TBLOB *Blob, NormY = Loop->pos.y; n = 1; - if (!is_hidden_edge (Segment)) { + if (!Segment->IsHidden()) { DeltaX = NormX - LastX; DeltaY = NormY - LastY; - Length = MySqrt (DeltaX, DeltaY); + Length = MySqrt(DeltaX, DeltaY); n = ((Length << 2) + Length + 32) >> 6; if (n != 0) { - Theta = TableLookup (DeltaY, DeltaX); + Theta = BinaryAnglePlusPi(DeltaY, DeltaX); dX = (DeltaX << 8) / n; dY = (DeltaY << 8) / n; pfX = (LastX << 8) + (dX >> 1); pfY = (LastY << 8) + (dY >> 1); Ix += ((pfY >> 8) - Ymean) * ((pfY >> 8) - Ymean); + // TODO(eger): Hmmm... Xmean is not necessarily 0. + // Figure out if we should center against Xmean for these + // features, and if so fix Iy & SaveFeature(). Iy += (pfX >> 8) * (pfX >> 8); - if (SaveFeature (BLFeat, NumBLFeatures, (inT16) (pfX >> 8), - (inT16) ((pfY >> 8) - 128), - Theta) == FALSE) + if (SaveFeature(BLFeat, + NumBLFeatures, + (inT16) (pfX >> 8), + (inT16) ((pfY >> 8) - 128), + Theta) == FALSE) return FALSE; NumBLFeatures++; for (i = 1; i < n; i++) { @@ -189,9 +243,11 @@ int ExtractIntFeat(TBLOB *Blob, pfY += dY; Ix += ((pfY >> 8) - Ymean) * ((pfY >> 8) - Ymean); Iy += (pfX >> 8) * (pfX >> 8); - if (SaveFeature - (BLFeat, NumBLFeatures, (inT16) (pfX >> 8), - (inT16) ((pfY >> 8) - 128), Theta) == FALSE) + if (SaveFeature(BLFeat, + NumBLFeatures, + (inT16) (pfX >> 8), + (inT16) ((pfY >> 8) - 128), + Theta) == FALSE) return FALSE; NumBLFeatures++; } @@ -224,9 +280,23 @@ int ExtractIntFeat(TBLOB *Blob, } Results->NumBL = NumBLFeatures; - /* extract character normalized features */ + // Extract character normalized features + // + // Rescale the co-ordinates to "equalize" distribution in X and Y, making + // all of the following unichars be sized to look similar: , ' 1 i + // + // We calculate co-ordinates relative to the centroid, and then scale them + // as follows (accomplishing a scale of up to 102.4 / dimension): + // y *= 51.2 / Rx [ y scaled by 0.0 ... 102.4 / height of glyph ] + // x *= 51.2 / Ry [ x scaled by 0.0 ... 102.4 / width of glyph ] + // Although tempting to think so, this does not guarantee that our range + // is within [-102.4...102.4] x [-102.4...102.4] because (Xmean, Ymean) + // is the centroid, not the center of the bounding box. Instead, we can + // only bound the result to [-204 ... 204] x [-204 ... 204] + // NumCNFeatures = 0; OutLine = Blob->outlines; + int OutLineIndex = -1; while (OutLine != NULL) { LoopStart = OutLine->loop; Loop = LoopStart; @@ -234,6 +304,8 @@ int ExtractIntFeat(TBLOB *Blob, LastY = (Loop->pos.y - Ymean) * RxInv; LastX >>= (inT8) RyExp; LastY >>= (inT8) RxExp; + OutLineIndex++; + /* Check for bad loops */ if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart)) return FALSE; @@ -246,28 +318,39 @@ int ExtractIntFeat(TBLOB *Blob, NormY >>= (inT8) RxExp; n = 1; - if (!is_hidden_edge (Segment)) { + if (!Segment->IsHidden()) { DeltaX = NormX - LastX; DeltaY = NormY - LastY; - Length = MySqrt (DeltaX, DeltaY); + Length = MySqrt(DeltaX, DeltaY); n = ((Length << 2) + Length + 32) >> 6; if (n != 0) { - Theta = TableLookup (DeltaY, DeltaX); + Theta = BinaryAnglePlusPi(DeltaY, DeltaX); dX = (DeltaX << 8) / n; dY = (DeltaY << 8) / n; pfX = (LastX << 8) + (dX >> 1); pfY = (LastY << 8) + (dY >> 1); - if (SaveFeature (CNFeat, NumCNFeatures, (inT16) (pfX >> 8), - (inT16) ((pfY >> 8)), Theta) == FALSE) + if (SaveFeature(CNFeat, + NumCNFeatures, + (inT16) (pfX >> 8), + (inT16) (pfY >> 8), + Theta) == FALSE) return FALSE; + if (FeatureOutlineArray) { + FeatureOutlineArray[NumCNFeatures] = OutLineIndex; + } NumCNFeatures++; for (i = 1; i < n; i++) { pfX += dX; pfY += dY; - if (SaveFeature - (CNFeat, NumCNFeatures, (inT16) (pfX >> 8), - (inT16) ((pfY >> 8)), Theta) == FALSE) + if (SaveFeature(CNFeat, + NumCNFeatures, + (inT16) (pfX >> 8), + (inT16) (pfY >> 8), + Theta) == FALSE) return FALSE; + if (FeatureOutlineArray) { + FeatureOutlineArray[NumCNFeatures] = OutLineIndex; + } NumCNFeatures++; } } @@ -287,7 +370,10 @@ int ExtractIntFeat(TBLOB *Blob, /*--------------------------------------------------------------------------*/ -uinT8 TableLookup(inT32 Y, inT32 X) { +// Return the "binary angle" [0..255] +// made by vector as measured counterclockwise from <-1, 0> +// The order of the arguments follows the convention of atan2(3) +uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X) { inT16 Angle; uinT16 Ratio; uinT32 AbsX, AbsY; @@ -312,17 +398,17 @@ uinT8 TableLookup(inT32 Y, inT32 X) { if (Y >= 0) if (AbsX > AbsY) Angle = Angle; - else - Angle = 64 - Angle; - else if (AbsX > AbsY) - Angle = 256 - Angle; - else - Angle = 192 + Angle; + else + Angle = 64 - Angle; + else if (AbsX > AbsY) + Angle = 256 - Angle; + else + Angle = 192 + Angle; else if (Y >= 0) - if (AbsX > AbsY) - Angle = 128 - Angle; - else - Angle = 64 + Angle; + if (AbsX > AbsY) + Angle = 128 - Angle; + else + Angle = 64 + Angle; else if (AbsX > AbsY) Angle = 128 + Angle; else @@ -351,20 +437,8 @@ int SaveFeature(INT_FEATURE_ARRAY FeatureArray, X = X + 128; Y = Y + 128; - if (X > 255) - Feature->X = 255; - else if (X < 0) - Feature->X = 0; - else - Feature->X = X; - - if (Y > 255) - Feature->Y = 255; - else if (Y < 0) - Feature->Y = 0; - else - Feature->Y = Y; - + Feature->X = ClipToRange(X, 0, 255); + Feature->Y = ClipToRange(Y, 0, 255); Feature->Theta = Theta; return TRUE; @@ -372,11 +446,15 @@ int SaveFeature(INT_FEATURE_ARRAY FeatureArray, /*---------------------------------------------------------------------------*/ +// Return floor(sqrt(min(emm, x)^2 + min(emm, y)^2)) +// where emm = EvidenceMultMask. uinT16 MySqrt(inT32 X, inT32 Y) { register uinT16 SqRoot; register uinT32 Square; register uinT16 BitLocation; register uinT32 Sum; + const uinT32 EvidenceMultMask = + ((1 << IntegerMatcher::kIntEvidenceTruncBits) - 1); if (X < 0) X = -X; @@ -390,7 +468,7 @@ uinT16 MySqrt(inT32 X, inT32 Y) { Sum = X * X + Y * Y; - BitLocation = 1024; + BitLocation = (EvidenceMultMask + 1) << 1; SqRoot = 0; do { Square = (SqRoot | BitLocation) * (SqRoot | BitLocation); @@ -405,6 +483,8 @@ uinT16 MySqrt(inT32 X, inT32 Y) { /*--------------------------------------------------------------------------*/ +// Return two integers which can be used to express the sqrt(I/N): +// sqrt(I/N) = 51.2 * 2^(*Exp) / retval uinT8 MySqrt2(uinT16 N, uinT32 I, uinT8 *Exp) { register inT8 k; register uinT32 N2; diff --git a/classify/intfx.h b/classify/intfx.h index 0d4c3d191f..06f008b4a6 100644 --- a/classify/intfx.h +++ b/classify/intfx.h @@ -21,11 +21,12 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "tessclas.h" -#include "hideedge.h" +#include "blobs.h" #include "intproto.h" #include +class DENORM; + typedef struct { inT32 Length; /* total length of all outlines */ @@ -43,11 +44,13 @@ INT_FX_RESULT_STRUCT, *INT_FX_RESULT; void InitIntegerFX(); int ExtractIntFeat(TBLOB *Blob, + const DENORM& denorm, INT_FEATURE_ARRAY BLFeat, INT_FEATURE_ARRAY CNFeat, - INT_FX_RESULT Results); + INT_FX_RESULT Results, + inT32 *FeatureOutlineArray = 0); -uinT8 TableLookup(inT32 Y, inT32 X); +uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X); int SaveFeature(INT_FEATURE_ARRAY FeatureArray, uinT16 FeatureNum, diff --git a/classify/intmatcher.cpp b/classify/intmatcher.cpp index d9108639ad..ae753eb1d4 100644 --- a/classify/intmatcher.cpp +++ b/classify/intmatcher.cpp @@ -21,7 +21,6 @@ ----------------------------------------------------------------------------*/ #include "intmatcher.h" #include "intproto.h" -#include "tordvars.h" #include "callcpp.h" #include "scrollview.h" #include "globals.h" @@ -33,17 +32,11 @@ #include "config_auto.h" #endif -#define CLASS_MASK_SIZE ((MAX_NUM_CLASSES*NUM_BITS_PER_CLASS \ - +BITS_PER_WERD-1)/BITS_PER_WERD) - /*---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------*/ -#define SE_TABLE_BITS 9 -#define SE_TABLE_SIZE 512 -#define TEMPLATE_CACHE 2 -static uinT8 SimilarityEvidenceTable[SE_TABLE_SIZE]; -static uinT8 offset_table[256] = { + +static const uinT8 offset_table[256] = { 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, @@ -61,7 +54,8 @@ static uinT8 offset_table[256] = { 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 }; -static uinT8 next_table[256] = { + +static const uinT8 next_table[256] = { 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, 0x18, 0x1c, 0x1c, 0x1e, @@ -95,63 +89,41 @@ static uinT8 next_table[256] = { 0xf8, 0xfc, 0xfc, 0xfe }; -static uinT32 EvidenceTableMask; - -static uinT32 MultTruncShiftBits; - -static uinT32 TableTruncShiftBits; - -uinT32 EvidenceMultMask; - -static inT16 LocalMatcherMultiplier; - -INT_VAR(classify_class_pruner_threshold, 229, - "Class Pruner Threshold 0-255: "); - -INT_VAR(classify_class_pruner_multiplier, 30, - "Class Pruner Multiplier 0-255: "); - -INT_VAR(classify_integer_matcher_multiplier, 14, - "Integer Matcher Multiplier 0-255: "); - -INT_VAR(classify_int_theta_fudge, 128, - "Integer Matcher Theta Fudge 0-255: "); - -INT_VAR(classify_cp_cutoff_strength, 7, - "Class Pruner CutoffStrength: "); - -INT_VAR(classify_evidence_table_bits, 9, - "Bits in Similarity to Evidence Lookup 8-9: "); - -INT_VAR(classify_int_evidence_trunc_bits, 14, - "Integer Evidence Truncation Bits (Distance) 8-14: "); - -double_VAR(classify_se_exponential_multiplier, 0, - "Similarity to Evidence Table Exponential Multiplier: "); - -double_VAR(classify_similarity_center, 0.0075, - "Center of Similarity Curve: "); - -INT_VAR(classify_adapt_proto_thresh, 230, - "Threshold for good protos during adaptive 0-255: "); - -INT_VAR(classify_adapt_feature_thresh, 230, - "Threshold for good features during adaptive 0-255: "); +struct ClassPrunerData { + int *class_count_; + int *norm_count_; + int *sort_key_; + int *sort_index_; + int max_classes_; + + ClassPrunerData(int max_classes) { + // class_count_ and friends are referenced by indexing off of data in + // class pruner word sized chunks. Each pruner word is of sized + // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are + // BITS_PER_WERD / NUM_BITS_PER_CLASS entries. + // See Classify::ClassPruner in intmatcher.cpp. + max_classes_ = RoundUp( + max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS); + class_count_ = new int[max_classes_]; + norm_count_ = new int[max_classes_]; + sort_key_ = new int[max_classes_ + 1]; + sort_index_ = new int[max_classes_ + 1]; + for (int i = 0; i < max_classes_; i++) { + class_count_[i] = 0; + } + } -BOOL_VAR(disable_character_fragments, FALSE, - "Do not include character fragments in the" - " results of the classifier"); + ~ClassPrunerData() { + delete []class_count_; + delete []norm_count_; + delete []sort_key_; + delete []sort_index_; + } -BOOL_VAR(matcher_debug_separate_windows, FALSE, - "Use two different windows for debugging the matching: " - "One for the protos and one for the features."); +}; -int protoword_lookups; -int zero_protowords; -int proto_shifts; -int set_proto_bits; -int config_shifts; -int set_config_bits; +const float IntegerMatcher::kSEExponentialMultiplier = 0.0; +const float IntegerMatcher::kSimilarityCenter = 0.0075; /*---------------------------------------------------------------------------- Public Code @@ -163,8 +135,7 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, INT_FEATURE_ARRAY Features, CLASS_NORMALIZATION_ARRAY NormalizationFactors, CLASS_CUTOFF_ARRAY ExpectedNumFeatures, - CLASS_PRUNER_RESULTS Results, - int Debug) { + CLASS_PRUNER_RESULTS Results) { /* ** Parameters: ** IntTemplates Class pruner tables @@ -178,10 +149,6 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, ** (by CLASS_INDEX) ** Results Sorted Array of pruned classes ** (by CLASS_ID) - ** Debug Debugger flag: 1=debugger on - ** Globals: - ** classify_class_pruner_threshold Cutoff threshold - ** classify_class_pruner_multiplier Normalization factor multiplier ** Operation: ** Prune the classes using a modified fast match table. ** Return a sorted list of classes along with the number @@ -201,26 +168,19 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, int NumPruners; inT32 feature_index; //current feature - static int ClassCount[MAX_NUM_CLASSES]; - static int NormCount[MAX_NUM_CLASSES]; - static int SortKey[MAX_NUM_CLASSES + 1]; - static int SortIndex[MAX_NUM_CLASSES + 1]; + int MaxNumClasses = IntTemplates->NumClasses; + ClassPrunerData data(IntTemplates->NumClasses); + int *ClassCount = data.class_count_; + int *NormCount = data.norm_count_; + int *SortKey = data.sort_key_; + int *SortIndex = data.sort_index_; + int out_class; - int MaxNumClasses; int MaxCount; int NumClasses; FLOAT32 max_rating; //max allowed rating - int *ClassCountPtr; CLASS_ID class_id; - MaxNumClasses = IntTemplates->NumClasses; - - /* Clear Class Counts */ - ClassCountPtr = &(ClassCount[0]); - for (class_id = 0; class_id < MaxNumClasses; class_id++) { - *ClassCountPtr++ = 0; - } - /* Update Class Counts */ NumPruners = IntTemplates->NumClassPruners; for (feature_index = 0; feature_index < NumFeatures; feature_index++) { @@ -246,37 +206,37 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, PrunerWord >>= NUM_BITS_PER_CLASS; } */ - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; - PrunerWord >>= 2; - ClassCount[class_index++] += cp_maps[PrunerWord & 3]; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; + PrunerWord >>= NUM_BITS_PER_CLASS; + ClassCount[class_index++] += PrunerWord & CLASS_PRUNER_CLASS_MASK; } } } @@ -286,7 +246,7 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, if (NumFeatures < ExpectedNumFeatures[class_id]) { int deficit = ExpectedNumFeatures[class_id] - NumFeatures; ClassCount[class_id] -= ClassCount[class_id] * deficit / - (NumFeatures*classify_cp_cutoff_strength + deficit); + (NumFeatures * classify_cp_cutoff_strength + deficit); } if (!unicharset.get_enabled(class_id)) ClassCount[class_id] = 0; // This char is disabled! @@ -302,8 +262,8 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, MaxCount = 0; for (class_id = 0; class_id < MaxNumClasses; class_id++) { NormCount[class_id] = ClassCount[class_id] - - ((classify_class_pruner_multiplier * NormalizationFactors[class_id]) >> 8) - * cp_maps[3] / 3; + - ((classify_class_pruner_multiplier * NormalizationFactors[class_id]) + >> 8); if (NormCount[class_id] > MaxCount && // This additional check is added in order to ensure that // the classifier will return at least one non-fragmented @@ -334,7 +294,7 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, if (NumClasses > 1) HeapSort(NumClasses, SortKey, SortIndex); - if (tord_display_ratings > 1) { + if (classify_debug_level > 1) { cprintf ("CP:%d classes, %d features:\n", NumClasses, NumFeatures); for (class_id = 0; class_id < NumClasses; class_id++) { cprintf ("%s:C=%d, E=%d, N=%d, Rat=%d\n", @@ -343,9 +303,9 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, ExpectedNumFeatures[SortIndex[NumClasses - class_id]], SortKey[NumClasses - class_id], 1010 - 1000 * SortKey[NumClasses - class_id] / - (cp_maps[3] * NumFeatures)); + (CLASS_PRUNER_CLASS_MASK * NumFeatures)); } - if (tord_display_ratings > 2) { + if (classify_debug_level > 2) { NumPruners = IntTemplates->NumClassPruners; for (feature_index = 0; feature_index < NumFeatures; feature_index++) { @@ -368,8 +328,8 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, if (NormCount[class_index] >= MaxCount) cprintf (" %s=%d,", unicharset.id_to_unichar(class_index), - PrunerWord & 3); - PrunerWord >>= 2; + PrunerWord & CLASS_PRUNER_CLASS_MASK); + PrunerWord >>= NUM_BITS_PER_CLASS; } } } @@ -378,10 +338,10 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, cprintf ("Adjustments:"); for (class_id = 0; class_id < MaxNumClasses; class_id++) { if (NormCount[class_id] > MaxCount) - cprintf (" %s=%d,", + cprintf(" %s=%d,", unicharset.id_to_unichar(class_id), -((classify_class_pruner_multiplier * - NormalizationFactors[class_id]) >> 8) * cp_maps[3] / 3); + NormalizationFactors[class_id]) >> 8)); } cprintf ("\n"); } @@ -392,26 +352,28 @@ int Classify::ClassPruner(INT_TEMPLATES IntTemplates, for (class_id = 0, out_class = 0; class_id < NumClasses; class_id++) { Results[out_class].Class = SortIndex[NumClasses - class_id]; Results[out_class].Rating = - 1.0 - SortKey[NumClasses - - class_id] / ((float) cp_maps[3] * NumFeatures); + 1.0 - SortKey[NumClasses - class_id] / + (static_cast(CLASS_PRUNER_CLASS_MASK) * NumFeatures); out_class++; } NumClasses = out_class; return NumClasses; - } + } // namespace tesseract /*---------------------------------------------------------------------------*/ -void IntegerMatcher(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uinT16 BlobLength, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, - uinT8 NormalizationFactor, - INT_RESULT Result, - int Debug) { +void IntegerMatcher::Match(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + uinT16 BlobLength, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + uinT8 NormalizationFactor, + INT_RESULT Result, + int AdaptFeatureThreshold, + int Debug, + bool SeparateDebugWindows) { /* ** Parameters: ** ClassTemplate Prototypes & tables for a class @@ -424,9 +386,7 @@ void IntegerMatcher(INT_CLASS ClassTemplate, ** (0.0 -> 1.0), 0=good, 1=bad ** Debug Debugger flag: 1=debugger on ** Globals: - ** LocalMatcherMultiplier Normalization factor multiplier - ** classify_int_theta_fudge Theta fudge factor used for - ** evidence calculation + ** local_matcher_multiplier_ Normalization factor multiplier ** Operation: ** IntegerMatcher returns the best configuration and rating ** for a single class. The class matched against is determined @@ -436,92 +396,72 @@ void IntegerMatcher(INT_CLASS ClassTemplate, ** Exceptions: none ** History: Tue Feb 19 16:36:23 MST 1991, RWM, Created. */ - static uinT8 FeatureEvidence[MAX_NUM_CONFIGS]; - static int SumOfFeatureEvidence[MAX_NUM_CONFIGS]; - static uinT8 ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; + ScratchEvidence *tables = new ScratchEvidence(); int Feature; int BestMatch; if (MatchDebuggingOn (Debug)) cprintf ("Integer Matcher -------------------------------------------\n"); - IMClearTables(ClassTemplate, SumOfFeatureEvidence, ProtoEvidence); + tables->Clear(ClassTemplate); Result->FeatureMisses = 0; for (Feature = 0; Feature < NumFeatures; Feature++) { - int csum = IMUpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, - Feature, &(Features[Feature]), - FeatureEvidence, SumOfFeatureEvidence, - ProtoEvidence, Debug); + int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, + Feature, &Features[Feature], + tables, Debug); // Count features that were missed over all configs. if (csum == 0) Result->FeatureMisses++; } #ifndef GRAPHICS_DISABLED - if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug)) - IMDebugFeatureProtoError(ClassTemplate, - ProtoMask, - ConfigMask, - SumOfFeatureEvidence, - ProtoEvidence, - NumFeatures, - Debug); - - if (DisplayProtoMatchesOn (Debug)) - IMDisplayProtoDebugInfo(ClassTemplate, - ProtoMask, - ConfigMask, - ProtoEvidence, - Debug); - - if (DisplayFeatureMatchesOn (Debug)) - IMDisplayFeatureDebugInfo(ClassTemplate, - ProtoMask, - ConfigMask, - NumFeatures, - Features, - Debug); -#endif + if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) { + DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, + NumFeatures, Debug); + } + + if (DisplayProtoMatchesOn(Debug)) { + DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask, + *tables, SeparateDebugWindows); + } - IMUpdateSumOfProtoEvidences(ClassTemplate, - ConfigMask, - SumOfFeatureEvidence, - ProtoEvidence, - NumFeatures); + if (DisplayFeatureMatchesOn(Debug)) { + DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures, + Features, AdaptFeatureThreshold, Debug, + SeparateDebugWindows); + } +#endif - IMNormalizeSumOfEvidences(ClassTemplate, - SumOfFeatureEvidence, - NumFeatures, - NumFeatures); + tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures); + tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures); - BestMatch = - IMFindBestMatch(ClassTemplate, - SumOfFeatureEvidence, - BlobLength, - NormalizationFactor, - Result); + BestMatch = FindBestMatch(ClassTemplate, *tables, BlobLength, + NormalizationFactor, Result); #ifndef GRAPHICS_DISABLED - if (PrintMatchSummaryOn (Debug)) - IMDebugBestMatch(BestMatch, Result, BlobLength, NormalizationFactor); + if (PrintMatchSummaryOn(Debug)) + DebugBestMatch(BestMatch, Result, BlobLength, NormalizationFactor); - if (MatchDebuggingOn (Debug)) - cprintf ("Match Complete --------------------------------------------\n"); + if (MatchDebuggingOn(Debug)) + cprintf("Match Complete --------------------------------------------\n"); #endif + delete tables; } /*---------------------------------------------------------------------------*/ -int FindGoodProtos(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uinT16 BlobLength, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, - PROTO_ID *ProtoArray, - int Debug) { +int IntegerMatcher::FindGoodProtos( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + uinT16 BlobLength, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + PROTO_ID *ProtoArray, + int AdaptProtoThreshold, + int Debug) { /* ** Parameters: ** ClassTemplate Prototypes & tables for a class @@ -531,12 +471,10 @@ int FindGoodProtos(INT_CLASS ClassTemplate, ** NumFeatures Number of features in blob ** Features Array of features ** ProtoArray Array of good protos + ** AdaptProtoThreshold Threshold for good protos ** Debug Debugger flag: 1=debugger on ** Globals: - ** LocalMatcherMultiplier Normalization factor multiplier - ** classify_int_theta_fudge Theta fudge factor used for - ** evidence calculation - ** classify_adapt_proto_thresh Threshold for good protos + ** local_matcher_multiplier_ Normalization factor multiplier ** Operation: ** FindGoodProtos finds all protos whose normalized proto-evidence ** exceed classify_adapt_proto_thresh. The list is ordered by increasing @@ -546,56 +484,39 @@ int FindGoodProtos(INT_CLASS ClassTemplate, ** Exceptions: none ** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created */ - static uinT8 FeatureEvidence[MAX_NUM_CONFIGS]; - static int SumOfFeatureEvidence[MAX_NUM_CONFIGS]; - static uinT8 ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; - int Feature; - register uinT8 *UINT8Pointer; - register int ProtoIndex; - int NumProtos; - int NumGoodProtos; - uinT16 ActualProtoNum; - register int Temp; + ScratchEvidence *tables = new ScratchEvidence(); + int NumGoodProtos = 0; /* DEBUG opening heading */ if (MatchDebuggingOn (Debug)) cprintf ("Find Good Protos -------------------------------------------\n"); - IMClearTables(ClassTemplate, SumOfFeatureEvidence, ProtoEvidence); + tables->Clear(ClassTemplate); - for (Feature = 0; Feature < NumFeatures; Feature++) - IMUpdateTablesForFeature (ClassTemplate, ProtoMask, ConfigMask, Feature, - &(Features[Feature]), FeatureEvidence, - SumOfFeatureEvidence, ProtoEvidence, Debug); + for (int Feature = 0; Feature < NumFeatures; Feature++) + UpdateTablesForFeature( + ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]), + tables, Debug); #ifndef GRAPHICS_DISABLED if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug)) - IMDebugFeatureProtoError(ClassTemplate, - ProtoMask, - ConfigMask, - SumOfFeatureEvidence, - ProtoEvidence, - NumFeatures, - Debug); + DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, + NumFeatures, Debug); #endif /* Average Proto Evidences & Find Good Protos */ - NumProtos = ClassTemplate->NumProtos; - NumGoodProtos = 0; - for (ActualProtoNum = 0; ActualProtoNum < NumProtos; ActualProtoNum++) { + for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) { /* Compute Average for Actual Proto */ - Temp = 0; - UINT8Pointer = &(ProtoEvidence[ActualProtoNum][0]); - for (ProtoIndex = ClassTemplate->ProtoLengths[ActualProtoNum]; - ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) - Temp += *UINT8Pointer; + int Temp = 0; + for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++) + Temp += tables->proto_evidence_[proto][i]; - Temp /= ClassTemplate->ProtoLengths[ActualProtoNum]; + Temp /= ClassTemplate->ProtoLengths[proto]; /* Find Good Protos */ - if (Temp >= classify_adapt_proto_thresh) { - *ProtoArray = ActualProtoNum; + if (Temp >= AdaptProtoThreshold) { + *ProtoArray = proto; ProtoArray++; NumGoodProtos++; } @@ -603,77 +524,63 @@ int FindGoodProtos(INT_CLASS ClassTemplate, if (MatchDebuggingOn (Debug)) cprintf ("Match Complete --------------------------------------------\n"); - return NumGoodProtos; + delete tables; + return NumGoodProtos; } /*---------------------------------------------------------------------------*/ -int FindBadFeatures(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uinT16 BlobLength, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_ID *FeatureArray, - int Debug) { +int IntegerMatcher::FindBadFeatures( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + uinT16 BlobLength, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_ID *FeatureArray, + int AdaptFeatureThreshold, + int Debug) { /* - ** Parameters: - ** ClassTemplate Prototypes & tables for a class - ** ProtoMask AND Mask for proto word - ** ConfigMask AND Mask for config word - ** BlobLength Length of unormalized blob - ** NumFeatures Number of features in blob - ** Features Array of features - ** FeatureArray Array of bad features - ** Debug Debugger flag: 1=debugger on - ** Globals: - ** LocalMatcherMultiplier Normalization factor multiplier - ** classify_int_theta_fudge Theta fudge factor used for - ** evidence calculation - ** classify_adapt_feature_thresh Threshold for bad features - ** Operation: - ** FindBadFeatures finds all features whose maximum feature-evidence - ** was less than classify_adapt_feature_thresh. The list is ordered by increasing - ** feature number. - ** Return: - ** Number of bad features in FeatureArray. - ** Exceptions: none - ** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created + ** Parameters: + ** ClassTemplate Prototypes & tables for a class + ** ProtoMask AND Mask for proto word + ** ConfigMask AND Mask for config word + ** BlobLength Length of unormalized blob + ** NumFeatures Number of features in blob + ** Features Array of features + ** FeatureArray Array of bad features + ** AdaptFeatureThreshold Threshold for bad features + ** Debug Debugger flag: 1=debugger on + ** Operation: + ** FindBadFeatures finds all features with maximum feature-evidence < + ** AdaptFeatureThresh. The list is ordered by increasing feature number. + ** Return: + ** Number of bad features in FeatureArray. + ** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created */ - static uinT8 FeatureEvidence[MAX_NUM_CONFIGS]; - static int SumOfFeatureEvidence[MAX_NUM_CONFIGS]; - static uinT8 ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; - int Feature; - register uinT8 *UINT8Pointer; - register int ConfigNum; - int NumConfigs; - int NumBadFeatures; - register int Temp; + ScratchEvidence *tables = new ScratchEvidence(); + int NumBadFeatures = 0; /* DEBUG opening heading */ - if (MatchDebuggingOn (Debug)) - cprintf - ("Find Bad Features -------------------------------------------\n"); + if (MatchDebuggingOn(Debug)) + cprintf("Find Bad Features -------------------------------------------\n"); - IMClearTables(ClassTemplate, SumOfFeatureEvidence, ProtoEvidence); + tables->Clear(ClassTemplate); - NumBadFeatures = 0; - NumConfigs = ClassTemplate->NumConfigs; - for (Feature = 0; Feature < NumFeatures; Feature++) { - IMUpdateTablesForFeature (ClassTemplate, ProtoMask, ConfigMask, Feature, - &(Features[Feature]), FeatureEvidence, - SumOfFeatureEvidence, ProtoEvidence, Debug); + for (int Feature = 0; Feature < NumFeatures; Feature++) { + UpdateTablesForFeature( + ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], + tables, Debug); /* Find Best Evidence for Current Feature */ - Temp = 0; - UINT8Pointer = FeatureEvidence; - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++, UINT8Pointer++) - if (*UINT8Pointer > Temp) - Temp = *UINT8Pointer; + int best = 0; + for (int i = 0; i < ClassTemplate->NumConfigs; i++) + if (tables->feature_evidence_[i] > best) + best = tables->feature_evidence_[i]; /* Find Bad Features */ - if (Temp < classify_adapt_feature_thresh) { + if (best < AdaptFeatureThreshold) { *FeatureArray = Feature; FeatureArray++; NumBadFeatures++; @@ -681,158 +588,80 @@ int FindBadFeatures(INT_CLASS ClassTemplate, } #ifndef GRAPHICS_DISABLED - if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug)) - IMDebugFeatureProtoError(ClassTemplate, - ProtoMask, - ConfigMask, - SumOfFeatureEvidence, - ProtoEvidence, - NumFeatures, - Debug); + if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) + DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, + NumFeatures, Debug); #endif - if (MatchDebuggingOn (Debug)) - cprintf ("Match Complete --------------------------------------------\n"); + if (MatchDebuggingOn(Debug)) + cprintf("Match Complete --------------------------------------------\n"); + delete tables; return NumBadFeatures; - } /*---------------------------------------------------------------------------*/ -void InitIntegerMatcher() { - int i; - uinT32 IntSimilarity; - double Similarity; - double Evidence; - double ScaleFactor; +void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level, + int classify_integer_matcher_multiplier) { + classify_debug_level_ = classify_debug_level; /* Set default mode of operation of IntegerMatcher */ - SetCharNormMatch(); + SetCharNormMatch(classify_integer_matcher_multiplier); /* Initialize table for evidence to similarity lookup */ - for (i = 0; i < SE_TABLE_SIZE; i++) { - IntSimilarity = i << (27 - SE_TABLE_BITS); - Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0; - Evidence = Similarity / classify_similarity_center; - Evidence *= Evidence; - Evidence += 1.0; - Evidence = 1.0 / Evidence; - Evidence *= 255.0; - - if (classify_se_exponential_multiplier > 0.0) { - ScaleFactor = 1.0 - exp (-classify_se_exponential_multiplier) * - exp (classify_se_exponential_multiplier * ((double) i / SE_TABLE_SIZE)); - if (ScaleFactor > 1.0) - ScaleFactor = 1.0; - if (ScaleFactor < 0.0) - ScaleFactor = 0.0; - Evidence *= ScaleFactor; + for (int i = 0; i < SE_TABLE_SIZE; i++) { + uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS); + double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0; + double evidence = Similarity / kSimilarityCenter; + evidence = 255.0 / (evidence * evidence + 1.0); + + if (kSEExponentialMultiplier > 0.0) { + double scale = 1.0 - exp(-kSEExponentialMultiplier) * + exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE)); + evidence *= ClipToRange(scale, 0.0, 1.0); } - SimilarityEvidenceTable[i] = (uinT8) (Evidence + 0.5); + similarity_evidence_table_[i] = (uinT8) (evidence + 0.5); } /* Initialize evidence computation variables */ - EvidenceTableMask = - ((1 << classify_evidence_table_bits) - 1) << (9 - classify_evidence_table_bits); - MultTruncShiftBits = (14 - classify_int_evidence_trunc_bits); - TableTruncShiftBits = (27 - SE_TABLE_BITS - (MultTruncShiftBits << 1)); - EvidenceMultMask = ((1 << classify_int_evidence_trunc_bits) - 1); - -} - -/*-------------------------------------------------------------------------*/ -void PrintIntMatcherStats(FILE *f) { - fprintf (f, "protoword_lookups=%d, zero_protowords=%d, proto_shifts=%d\n", - protoword_lookups, zero_protowords, proto_shifts); - fprintf (f, "set_proto_bits=%d, config_shifts=%d, set_config_bits=%d\n", - set_proto_bits, config_shifts, set_config_bits); -} - - -/*-------------------------------------------------------------------------*/ -void SetProtoThresh(FLOAT32 Threshold) { - classify_adapt_proto_thresh.set_value(255 * Threshold); - if (classify_adapt_proto_thresh < 0) - classify_adapt_proto_thresh.set_value(0); - if (classify_adapt_proto_thresh > 255) - classify_adapt_proto_thresh.set_value(255); -} - - -/*---------------------------------------------------------------------------*/ -void SetFeatureThresh(FLOAT32 Threshold) { - classify_adapt_feature_thresh.set_value(255 * Threshold); - if (classify_adapt_feature_thresh < 0) - classify_adapt_feature_thresh.set_value(0); - if (classify_adapt_feature_thresh > 255) - classify_adapt_feature_thresh.set_value(255); + evidence_table_mask_ = + ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits); + mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits); + table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1)); + evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); } - /*--------------------------------------------------------------------------*/ -void SetBaseLineMatch() { - LocalMatcherMultiplier = 0; +void IntegerMatcher::SetBaseLineMatch() { + local_matcher_multiplier_ = 0; } /*--------------------------------------------------------------------------*/ -void SetCharNormMatch() { - LocalMatcherMultiplier = classify_integer_matcher_multiplier; +void IntegerMatcher::SetCharNormMatch(int integer_matcher_multiplier) { + local_matcher_multiplier_ = integer_matcher_multiplier; } /**---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -void -IMClearTables (INT_CLASS ClassTemplate, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT8 ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX]) { -/* - ** Parameters: - ** SumOfFeatureEvidence Sum of Feature Evidence Table - ** NumConfigs Number of Configurations - ** ProtoEvidence Prototype Evidence Table - ** NumProtos Number of Prototypes - ** Globals: - ** Operation: - ** Clear SumOfFeatureEvidence and ProtoEvidence tables. - ** Return: - ** Exceptions: none - ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. - */ - int NumProtos = ClassTemplate->NumProtos; - int NumConfigs = ClassTemplate->NumConfigs; - - memset(SumOfFeatureEvidence, 0, - NumConfigs * sizeof(SumOfFeatureEvidence[0])); - memset(ProtoEvidence, 0, - NumProtos * sizeof(ProtoEvidence[0])); +void ScratchEvidence::Clear(const INT_CLASS class_template) { + memset(sum_feature_evidence_, 0, + class_template->NumConfigs * sizeof(sum_feature_evidence_[0])); + memset(proto_evidence_, 0, + class_template->NumProtos * sizeof(proto_evidence_[0])); } - -/*---------------------------------------------------------------------------*/ -void -IMClearFeatureEvidenceTable (uinT8 FeatureEvidence[MAX_NUM_CONFIGS], -int NumConfigs) { -/* - ** Parameters: - ** FeatureEvidence Feature Evidence Table - ** NumConfigs Number of Configurations - ** Globals: - ** Operation: - ** Clear FeatureEvidence table. - ** Return: - ** Exceptions: none - ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. - */ - memset(FeatureEvidence, 0, NumConfigs * sizeof(*FeatureEvidence)); +void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) { + memset(feature_evidence_, 0, + class_template->NumConfigs * sizeof(feature_evidence_[0])); } + /*---------------------------------------------------------------------------*/ void IMDebugConfiguration(int FeatureNum, uinT16 ActualProtoNum, @@ -874,47 +703,36 @@ void IMDebugConfigurationSum(int FeatureNum, ** Exceptions: none ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. */ - int ConfigNum; - - cprintf ("F=%3d, C=", (int) FeatureNum); - - for (ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) { - cprintf ("%4d", FeatureEvidence[ConfigNum]); + cprintf("F=%3d, C=", FeatureNum); + for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) { + cprintf("%4d", FeatureEvidence[ConfigNum]); } - cprintf ("\n"); - + cprintf("\n"); } /*---------------------------------------------------------------------------*/ -int -IMUpdateTablesForFeature (INT_CLASS ClassTemplate, -BIT_VECTOR ProtoMask, -BIT_VECTOR ConfigMask, -int FeatureNum, -INT_FEATURE Feature, -uinT8 FeatureEvidence[MAX_NUM_CONFIGS], -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT8 -ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX], -int Debug) { +int IntegerMatcher::UpdateTablesForFeature( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int FeatureNum, + INT_FEATURE Feature, + ScratchEvidence *tables, + int Debug) { /* - ** Parameters: - ** ClassTemplate Prototypes & tables for a class - ** FeatureNum Current feature number (for DEBUG only) - ** Feature Pointer to a feature struct - ** FeatureEvidence Feature Evidence Table - ** SumOfFeatureEvidence Sum of Feature Evidence Table - ** ProtoEvidence Prototype Evidence Table - ** Debug Debugger flag: 1=debugger on - ** Globals: - ** Operation: - ** For the given feature: prune protos, compute evidence, update Feature Evidence, - ** Proto Evidence, and Sum of Feature Evidence tables. - ** Return: - ** Exceptions: none - ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. + ** Parameters: + ** ClassTemplate Prototypes & tables for a class + ** FeatureNum Current feature number (for DEBUG only) + ** Feature Pointer to a feature struct + ** tables Evidence tables + ** Debug Debugger flag: 1=debugger on + ** Operation: + ** For the given feature: prune protos, compute evidence, + ** update Feature Evidence, Proto Evidence, and Sum of Feature + ** Evidence tables. + ** Return: */ register uinT32 ConfigWord; register uinT32 ProtoWord; @@ -942,7 +760,7 @@ int Debug) { register inT32 A3; register uinT32 A4; - IMClearFeatureEvidenceTable(FeatureEvidence, ClassTemplate->NumConfigs); + tables->ClearFeatureEvidence(ClassTemplate); /* Precompute Feature Address offset for Proto Pruning */ XFeatureAddress = ((Feature->X >> 2) << 1); @@ -979,26 +797,25 @@ int Debug) { A3 = (((Proto->A * (Feature->X - 128)) << 1) - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9)); M3 = - (((inT8) (Feature->Theta - Proto->Angle)) * - classify_int_theta_fudge) << 1; + (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1; if (A3 < 0) A3 = ~A3; if (M3 < 0) M3 = ~M3; - A3 >>= MultTruncShiftBits; - M3 >>= MultTruncShiftBits; - if (A3 > EvidenceMultMask) - A3 = EvidenceMultMask; - if (M3 > EvidenceMultMask) - M3 = EvidenceMultMask; + A3 >>= mult_trunc_shift_bits_; + M3 >>= mult_trunc_shift_bits_; + if (A3 > evidence_mult_mask_) + A3 = evidence_mult_mask_; + if (M3 > evidence_mult_mask_) + M3 = evidence_mult_mask_; A4 = (A3 * A3) + (M3 * M3); - A4 >>= TableTruncShiftBits; - if (A4 > EvidenceTableMask) + A4 >>= table_trunc_shift_bits_; + if (A4 > evidence_table_mask_) Evidence = 0; else - Evidence = SimilarityEvidenceTable[A4]; + Evidence = similarity_evidence_table_[A4]; if (PrintFeatureMatchesOn (Debug)) IMDebugConfiguration (FeatureNum, @@ -1007,14 +824,13 @@ int Debug) { ConfigWord &= *ConfigMask; - UINT8Pointer = FeatureEvidence - 8; + UINT8Pointer = tables->feature_evidence_ - 8; config_byte = 0; while (ConfigWord != 0 || config_byte != 0) { while (config_byte == 0) { config_byte = ConfigWord & 0xff; ConfigWord >>= 8; UINT8Pointer += 8; - // config_shifts++; } config_offset = offset_table[config_byte]; config_byte = next_table[config_byte]; @@ -1023,7 +839,7 @@ int Debug) { } UINT8Pointer = - &(ProtoEvidence[ActualProtoNum + proto_offset][0]); + &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]); for (ProtoIndex = ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset]; ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) { @@ -1040,11 +856,13 @@ int Debug) { } } - if (PrintFeatureMatchesOn (Debug)) - IMDebugConfigurationSum (FeatureNum, FeatureEvidence, - ClassTemplate->NumConfigs); - IntPointer = SumOfFeatureEvidence; - UINT8Pointer = FeatureEvidence; + if (PrintFeatureMatchesOn(Debug)) { + IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_, + ClassTemplate->NumConfigs); + } + + IntPointer = tables->sum_feature_evidence_; + UINT8Pointer = tables->feature_evidence_; int SumOverConfigs = 0; for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) { int evidence = *UINT8Pointer++; @@ -1057,14 +875,13 @@ int Debug) { /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED -void -IMDebugFeatureProtoError (INT_CLASS ClassTemplate, -BIT_VECTOR ProtoMask, -BIT_VECTOR ConfigMask, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT8 -ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX], -inT16 NumFeatures, int Debug) { +void IntegerMatcher::DebugFeatureProtoError( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence& tables, + inT16 NumFeatures, + int Debug) { /* ** Parameters: ** Globals: @@ -1074,8 +891,6 @@ inT16 NumFeatures, int Debug) { ** Exceptions: none ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. */ - uinT8 *UINT8Pointer; - int *IntPointer; FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS]; int ConfigNum; uinT32 ConfigWord; @@ -1083,28 +898,23 @@ inT16 NumFeatures, int Debug) { uinT16 ProtoNum; uinT8 ProtoWordNum; PROTO_SET ProtoSet; - int ProtoIndex; - int NumProtos; uinT16 ActualProtoNum; - int Temp; - int NumConfigs; - NumProtos = ClassTemplate->NumProtos; - NumConfigs = ClassTemplate->NumConfigs; - - if (PrintMatchSummaryOn (Debug)) { - cprintf ("Configuration Mask:\n"); - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++) - cprintf ("%1d", (((*ConfigMask) >> ConfigNum) & 1)); - cprintf ("\n"); - - cprintf ("Feature Error for Configurations:\n"); - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++) - cprintf (" %5.1f", - 100.0 * (1.0 - - (FLOAT32) SumOfFeatureEvidence[ConfigNum] / - NumFeatures / 256.0)); - cprintf ("\n\n\n"); + if (PrintMatchSummaryOn(Debug)) { + cprintf("Configuration Mask:\n"); + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) + cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1)); + cprintf("\n"); + + cprintf("Feature Error for Configurations:\n"); + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { + cprintf( + " %5.1f", + 100.0 * (1.0 - + (FLOAT32) tables.sum_feature_evidence_[ConfigNum] + / NumFeatures / 256.0)); + } + cprintf("\n\n\n"); } if (PrintMatchSummaryOn (Debug)) { @@ -1117,7 +927,7 @@ inT16 NumFeatures, int Debug) { ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoNum = 0; ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) - && (ActualProtoNum < NumProtos)); + && (ActualProtoNum < ClassTemplate->NumProtos)); ProtoNum++, ActualProtoNum++) cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1)); cprintf ("\n"); @@ -1126,8 +936,8 @@ inT16 NumFeatures, int Debug) { cprintf ("\n"); } - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++) - ProtoConfigs[ConfigNum] = 0; + for (int i = 0; i < ClassTemplate->NumConfigs; i++) + ProtoConfigs[i] = 0; if (PrintProtoMatchesOn (Debug)) { cprintf ("Proto Evidence:\n"); @@ -1136,41 +946,37 @@ inT16 NumFeatures, int Debug) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoNum = 0; - ((ProtoNum < PROTOS_PER_PROTO_SET) - && (ActualProtoNum < NumProtos)); - ProtoNum++, ActualProtoNum++) { + ((ProtoNum < PROTOS_PER_PROTO_SET) && + (ActualProtoNum < ClassTemplate->NumProtos)); + ProtoNum++, ActualProtoNum++) { cprintf ("P %3d =", ActualProtoNum); - Temp = 0; - UINT8Pointer = &(ProtoEvidence[ActualProtoNum][0]); - for (ProtoIndex = 0; - ProtoIndex < ClassTemplate->ProtoLengths[ActualProtoNum]; - ProtoIndex++, UINT8Pointer++) { - cprintf (" %d", *UINT8Pointer); - Temp += *UINT8Pointer; + int temp = 0; + for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) { + uinT8 data = tables.proto_evidence_[ActualProtoNum][j]; + cprintf(" %d", data); + temp += data; } - cprintf (" = %6.4f%%\n", Temp / - 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]); + cprintf(" = %6.4f%%\n", + temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]); - ConfigWord = (ProtoSet->Protos[ProtoNum]).Configs[0]; - IntPointer = SumOfFeatureEvidence; + ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; ConfigNum = 0; while (ConfigWord) { - cprintf ("%5d", ConfigWord & 1 ? Temp : 0); + cprintf ("%5d", ConfigWord & 1 ? temp : 0); if (ConfigWord & 1) - ProtoConfigs[ConfigNum] += Temp; - IntPointer++; + ProtoConfigs[ConfigNum] += temp; ConfigNum++; ConfigWord >>= 1; } - cprintf ("\n"); + cprintf("\n"); } } } if (PrintMatchSummaryOn (Debug)) { cprintf ("Proto Error for Configurations:\n"); - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++) + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) cprintf (" %5.1f", 100.0 * (1.0 - ProtoConfigs[ConfigNum] / @@ -1180,12 +986,12 @@ inT16 NumFeatures, int Debug) { if (PrintProtoMatchesOn (Debug)) { cprintf ("Proto Sum for Configurations:\n"); - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++) + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0); cprintf ("\n\n"); cprintf ("Proto Length for Configurations:\n"); - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++) + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) cprintf (" %4.1f", (float) ClassTemplate->ConfigLengths[ConfigNum]); cprintf ("\n\n"); @@ -1195,62 +1001,41 @@ inT16 NumFeatures, int Debug) { /*---------------------------------------------------------------------------*/ -void -IMDisplayProtoDebugInfo (INT_CLASS ClassTemplate, -BIT_VECTOR ProtoMask, -BIT_VECTOR ConfigMask, -uinT8 ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX], -int Debug) { - register uinT8 *UINT8Pointer; - register uinT32 ConfigWord; - register uinT16 ProtoNum; - register uinT16 ActualProtoNum; +void IntegerMatcher::DisplayProtoDebugInfo( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence& tables, + bool SeparateDebugWindows) { + uinT16 ProtoNum; + uinT16 ActualProtoNum; PROTO_SET ProtoSet; int ProtoSetIndex; - int ProtoIndex; - int NumProtos; - register int Temp; InitIntMatchWindowIfReqd(); - if (matcher_debug_separate_windows) { + if (SeparateDebugWindows) { InitFeatureDisplayWindowIfReqd(); InitProtoDisplayWindowIfReqd(); } - NumProtos = ClassTemplate->NumProtos; for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; - ProtoSetIndex++) { + ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; for (ProtoNum = 0; - ((ProtoNum < PROTOS_PER_PROTO_SET) - && (ActualProtoNum < NumProtos)); ProtoNum++, ActualProtoNum++) { + ((ProtoNum < PROTOS_PER_PROTO_SET) && + (ActualProtoNum < ClassTemplate->NumProtos)); + ProtoNum++, ActualProtoNum++) { /* Compute Average for Actual Proto */ - Temp = 0; - UINT8Pointer = &(ProtoEvidence[ActualProtoNum][0]); - for (ProtoIndex = ClassTemplate->ProtoLengths[ActualProtoNum]; - ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) - Temp += *UINT8Pointer; + int temp = 0; + for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) + temp += tables.proto_evidence_[ActualProtoNum][i]; - Temp /= ClassTemplate->ProtoLengths[ActualProtoNum]; + temp /= ClassTemplate->ProtoLengths[ActualProtoNum]; - ConfigWord = (ProtoSet->Protos[ProtoNum]).Configs[0]; - ConfigWord &= *ConfigMask; - if (ConfigWord) { - /* Update display for current proto */ - if (ClipMatchEvidenceOn (Debug)) { - if (Temp < classify_adapt_proto_thresh) - DisplayIntProto (ClassTemplate, ActualProtoNum, - (Temp / 255.0)); - else - DisplayIntProto (ClassTemplate, ActualProtoNum, - (Temp / 255.0)); - } - else { - DisplayIntProto (ClassTemplate, ActualProtoNum, - (Temp / 255.0)); - } + if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) { + DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0); } } } @@ -1258,105 +1043,83 @@ int Debug) { /*---------------------------------------------------------------------------*/ -void IMDisplayFeatureDebugInfo(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, - int Debug) { - static uinT8 FeatureEvidence[MAX_NUM_CONFIGS]; - static int SumOfFeatureEvidence[MAX_NUM_CONFIGS]; - static uinT8 ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; - int Feature; - register uinT8 *UINT8Pointer; - register int ConfigNum; - int NumConfigs; - register int Temp; - - IMClearTables(ClassTemplate, SumOfFeatureEvidence, ProtoEvidence); +void IntegerMatcher::DisplayFeatureDebugInfo( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + int AdaptFeatureThreshold, + int Debug, + bool SeparateDebugWindows) { + ScratchEvidence *tables = new ScratchEvidence(); + + tables->Clear(ClassTemplate); InitIntMatchWindowIfReqd(); - if (matcher_debug_separate_windows) { + if (SeparateDebugWindows) { InitFeatureDisplayWindowIfReqd(); InitProtoDisplayWindowIfReqd(); } - NumConfigs = ClassTemplate->NumConfigs; - for (Feature = 0; Feature < NumFeatures; Feature++) { - IMUpdateTablesForFeature (ClassTemplate, ProtoMask, ConfigMask, Feature, - &(Features[Feature]), FeatureEvidence, - SumOfFeatureEvidence, ProtoEvidence, 0); + for (int Feature = 0; Feature < NumFeatures; Feature++) { + UpdateTablesForFeature( + ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], + tables, 0); /* Find Best Evidence for Current Feature */ - Temp = 0; - UINT8Pointer = FeatureEvidence; - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++, UINT8Pointer++) - if (*UINT8Pointer > Temp) - Temp = *UINT8Pointer; + int best = 0; + for (int i = 0; i < ClassTemplate->NumConfigs; i++) + if (tables->feature_evidence_[i] > best) + best = tables->feature_evidence_[i]; /* Update display for current feature */ - if (ClipMatchEvidenceOn (Debug)) { - if (Temp < classify_adapt_feature_thresh) - DisplayIntFeature (&(Features[Feature]), 0.0); + if (ClipMatchEvidenceOn(Debug)) { + if (best < AdaptFeatureThreshold) + DisplayIntFeature(&Features[Feature], 0.0); else - DisplayIntFeature (&(Features[Feature]), 1.0); - } - else { - DisplayIntFeature (&(Features[Feature]), (Temp / 255.0)); + DisplayIntFeature(&Features[Feature], 1.0); + } else { + DisplayIntFeature(&Features[Feature], best / 255.0); } } + + delete tables; } #endif /*---------------------------------------------------------------------------*/ -void -IMUpdateSumOfProtoEvidences (INT_CLASS ClassTemplate, -BIT_VECTOR ConfigMask, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT8 -ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX], -inT16 NumFeatures) { -/* - ** Parameters: - ** Globals: - ** Operation: - ** Add sum of Proto Evidences into Sum Of Feature Evidence Array - ** Return: - ** Exceptions: none - ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. - */ - register uinT8 *UINT8Pointer; - register int *IntPointer; - register uinT32 ConfigWord; +// Add sum of Proto Evidences into Sum Of Feature Evidence Array +void ScratchEvidence::UpdateSumOfProtoEvidences( + INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) { + + int *IntPointer; + uinT32 ConfigWord; int ProtoSetIndex; - register uinT16 ProtoNum; + uinT16 ProtoNum; PROTO_SET ProtoSet; - register int ProtoIndex; int NumProtos; uinT16 ActualProtoNum; - int Temp; NumProtos = ClassTemplate->NumProtos; for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; - ProtoSetIndex++) { + ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoNum = 0; - ((ProtoNum < PROTOS_PER_PROTO_SET) - && (ActualProtoNum < NumProtos)); ProtoNum++, ActualProtoNum++) { - Temp = 0; - UINT8Pointer = &(ProtoEvidence[ActualProtoNum][0]); - for (ProtoIndex = ClassTemplate->ProtoLengths[ActualProtoNum]; - ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) - Temp += *UINT8Pointer; - - ConfigWord = (ProtoSet->Protos[ProtoNum]).Configs[0]; + ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos)); + ProtoNum++, ActualProtoNum++) { + int temp = 0; + for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) + temp += proto_evidence_[ActualProtoNum] [i]; + + ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; ConfigWord &= *ConfigMask; - IntPointer = SumOfFeatureEvidence; + IntPointer = sum_feature_evidence_; while (ConfigWord) { if (ConfigWord & 1) - *IntPointer += Temp; + *IntPointer += temp; IntPointer++; ConfigWord >>= 1; } @@ -1367,40 +1130,25 @@ inT16 NumFeatures) { /*---------------------------------------------------------------------------*/ -void -IMNormalizeSumOfEvidences (INT_CLASS ClassTemplate, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -inT16 NumFeatures, inT32 used_features) { -/* - ** Parameters: - ** Globals: - ** Operation: - ** Normalize Sum of Proto and Feature Evidence by dividing by - ** the sum of the Feature Lengths and the Proto Lengths for each - ** configuration. - ** Return: - ** Exceptions: none - ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. - */ - register int *IntPointer; - register int ConfigNum; - int NumConfigs; - - NumConfigs = ClassTemplate->NumConfigs; - - IntPointer = SumOfFeatureEvidence; - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++, IntPointer++) - *IntPointer = (*IntPointer << 8) / - (NumFeatures + ClassTemplate->ConfigLengths[ConfigNum]); +// Normalize Sum of Proto and Feature Evidence by dividing by the sum of +// the Feature Lengths and the Proto Lengths for each configuration. +void ScratchEvidence::NormalizeSums( + INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) { + + for (int i = 0; i < ClassTemplate->NumConfigs; i++) { + sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) / + (NumFeatures + ClassTemplate->ConfigLengths[i]); + } } /*---------------------------------------------------------------------------*/ -int -IMFindBestMatch (INT_CLASS ClassTemplate, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT16 BlobLength, -uinT8 NormalizationFactor, INT_RESULT Result) { +int IntegerMatcher::FindBestMatch( + INT_CLASS ClassTemplate, + const ScratchEvidence &tables, + uinT16 BlobLength, + uinT8 NormalizationFactor, + INT_RESULT Result) { /* ** Parameters: ** Globals: @@ -1412,71 +1160,55 @@ uinT8 NormalizationFactor, INT_RESULT Result) { ** Exceptions: none ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. */ - register int *IntPointer; - register int ConfigNum; - register int NumConfigs; - register int BestMatch; - register int Best2Match; - - NumConfigs = ClassTemplate->NumConfigs; + int BestMatch = 0; + int Best2Match = 0; /* Find best match */ - BestMatch = 0; - Best2Match = 0; - IntPointer = SumOfFeatureEvidence; - for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++, IntPointer++) { - if (tord_display_ratings > 1) - cprintf ("Config %d, rating=%d\n", ConfigNum, *IntPointer); - if (*IntPointer > BestMatch) { + for (int ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { + int rating = tables.sum_feature_evidence_[ConfigNum]; + if (*classify_debug_level_ > 1) + cprintf("Config %d, rating=%d\n", ConfigNum, rating); + if (rating > BestMatch) { if (BestMatch > 0) { Result->Config2 = Result->Config; Best2Match = BestMatch; - } - else + } else { Result->Config2 = ConfigNum; + } Result->Config = ConfigNum; - BestMatch = *IntPointer; - } - else if (*IntPointer > Best2Match) { + BestMatch = rating; + } else if (rating > Best2Match) { Result->Config2 = ConfigNum; - Best2Match = *IntPointer; + Best2Match = rating; } } /* Compute Certainty Rating */ - (*Result).Rating = ((65536.0 - BestMatch) / 65536.0 * BlobLength + - LocalMatcherMultiplier * NormalizationFactor / 256.0) / - (BlobLength + LocalMatcherMultiplier); + Result->Rating = ((65536.0 - BestMatch) / 65536.0 * BlobLength + + local_matcher_multiplier_ * NormalizationFactor / 256.0) / + (BlobLength + local_matcher_multiplier_); return BestMatch; } - /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED -void IMDebugBestMatch(int BestMatch, - INT_RESULT Result, - uinT16 BlobLength, - uinT8 NormalizationFactor) { -/* - ** Parameters: - ** Globals: - ** Operation: - ** Find the best match for the current class and update the Result - ** Return: - ** Exceptions: none - ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created. - */ - cprintf ("Rating = %5.1f%% Best Config = %3d\n", - 100.0 * ((*Result).Rating), (int) ((*Result).Config)); +// Print debug information about the best match for the current class. +void IntegerMatcher::DebugBestMatch( + int BestMatch, INT_RESULT Result, uinT16 BlobLength, + uinT8 NormalizationFactor) { + cprintf("Rating = %5.1f%% Best Config = %3d\n", + 100.0 * ((*Result).Rating), (int) ((*Result).Config)); cprintf ("Matcher Error = %5.1f%% Blob Length = %3d Weight = %4.1f%%\n", 100.0 * (65536.0 - BestMatch) / 65536.0, (int) BlobLength, - 100.0 * BlobLength / (BlobLength + LocalMatcherMultiplier)); + 100.0 * BlobLength / (BlobLength + local_matcher_multiplier_)); cprintf ("Char Norm Error = %5.1f%% Norm Strength = %3d Weight = %4.1f%%\n", - 100.0 * NormalizationFactor / 256.0, LocalMatcherMultiplier, - 100.0 * LocalMatcherMultiplier / (BlobLength + LocalMatcherMultiplier)); + 100.0 * NormalizationFactor / 256.0, + local_matcher_multiplier_, + 100.0 * local_matcher_multiplier_ / + (BlobLength + local_matcher_multiplier_)); } #endif diff --git a/classify/intmatcher.h b/classify/intmatcher.h index 2516b3dc80..0d28988fe8 100644 --- a/classify/intmatcher.h +++ b/classify/intmatcher.h @@ -18,7 +18,7 @@ #ifndef INTMATCHER_H #define INTMATCHER_H -#include "varable.h" +#include "params.h" // Character fragments could be present in the trained templaes // but turned on/off on the language-by-language basis or depending @@ -38,28 +38,22 @@ extern INT_VAR_H(classify_integer_matcher_multiplier, 14, #include "intproto.h" #include "cutoffs.h" -typedef struct -{ +struct INT_RESULT_STRUCT { FLOAT32 Rating; uinT8 Config; uinT8 Config2; uinT16 FeatureMisses; -} +}; +typedef INT_RESULT_STRUCT *INT_RESULT; -INT_RESULT_STRUCT, *INT_RESULT; -typedef struct -{ +struct CP_RESULT_STRUCT { FLOAT32 Rating; INT_RESULT_STRUCT IMResult; CLASS_ID Class; -} +}; - -CP_RESULT_STRUCT; - -/*typedef CLASS_ID CLASS_PRUNER_RESULTS [MAX_NUM_CLASSES]; */ typedef CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES]; typedef uinT8 CLASS_NORMALIZATION_ARRAY[MAX_NUM_CLASSES]; @@ -78,56 +72,138 @@ extern INT_VAR_H(classify_adapt_feature_thresh, 230, Public Function Prototypes ----------------------------------------------------------------------------**/ -void IntegerMatcher(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, +#define SE_TABLE_BITS 9 +#define SE_TABLE_SIZE 512 + +struct ScratchEvidence { + uinT8 feature_evidence_[MAX_NUM_CONFIGS]; + int sum_feature_evidence_[MAX_NUM_CONFIGS]; + uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; + + void Clear(const INT_CLASS class_template); + void ClearFeatureEvidence(const INT_CLASS class_template); + void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, + inT32 used_features); + void UpdateSumOfProtoEvidences( + INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures); +}; + + +class IntegerMatcher { + public: + // Integer Matcher Theta Fudge (0-255). + static const int kIntThetaFudge = 128; + // Bits in Similarity to Evidence Lookup (8-9). + static const int kEvidenceTableBits = 9; + // Integer Evidence Truncation Bits (8-14). + static const int kIntEvidenceTruncBits = 14; + // Similarity to Evidence Table Exponential Multiplier. + static const float kSEExponentialMultiplier; + // Center of Similarity Curve. + static const float kSimilarityCenter; + + IntegerMatcher() : classify_debug_level_(0) {} + + void Init(tesseract::IntParam *classify_debug_level, + int classify_integer_matcher_multiplier); + + void SetBaseLineMatch(); + void SetCharNormMatch(int integer_matcher_multiplier); + + void Match(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + uinT16 BlobLength, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + uinT8 NormalizationFactor, + INT_RESULT Result, + int AdaptFeatureThreshold, + int Debug, + bool SeparateDebugWindows); + + int FindGoodProtos(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + uinT16 BlobLength, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + PROTO_ID *ProtoArray, + int AdaptProtoThreshold, + int Debug); + + int FindBadFeatures(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + uinT16 BlobLength, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_ID *FeatureArray, + int AdaptFeatureThreshold, + int Debug); + + private: + int UpdateTablesForFeature( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int FeatureNum, + INT_FEATURE Feature, + ScratchEvidence *evidence, + int Debug); + + int FindBestMatch(INT_CLASS ClassTemplate, + const ScratchEvidence &tables, uinT16 BlobLength, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, uinT8 NormalizationFactor, - INT_RESULT Result, - int Debug); - -int FindGoodProtos(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uinT16 BlobLength, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, - PROTO_ID *ProtoArray, - int Debug); - -int FindBadFeatures(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - uinT16 BlobLength, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_ID *FeatureArray, - int Debug); - -void InitIntegerMatcher(); - -void PrintIntMatcherStats(FILE *f); - -void SetProtoThresh(FLOAT32 Threshold); + INT_RESULT Result); -void SetFeatureThresh(FLOAT32 Threshold); +#ifndef GRAPHICS_DISABLED + void DebugFeatureProtoError( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence &tables, + inT16 NumFeatures, + int Debug); + + void DisplayProtoDebugInfo( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence &tables, + bool SeparateDebugWindows); + + void DisplayFeatureDebugInfo( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + int AdaptFeatureThreshold, + int Debug, + bool SeparateDebugWindows); + + void DebugBestMatch(int BestMatch, + INT_RESULT Result, + uinT16 BlobLength, + uinT8 NormalizationFactor); +#endif -void SetBaseLineMatch(); -void SetCharNormMatch(); + private: + uinT8 similarity_evidence_table_[SE_TABLE_SIZE]; + uinT32 evidence_table_mask_; + uinT32 mult_trunc_shift_bits_; + uinT32 table_trunc_shift_bits_; + inT16 local_matcher_multiplier_; + tesseract::IntParam *classify_debug_level_; + uinT32 evidence_mult_mask_; +}; /**---------------------------------------------------------------------------- Private Function Prototypes ----------------------------------------------------------------------------**/ -void IMClearTables (INT_CLASS ClassTemplate, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT8 ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX]); - -void IMClearFeatureEvidenceTable (uinT8 FeatureEvidence[MAX_NUM_CONFIGS], -int NumConfigs); - void IMDebugConfiguration(INT_FEATURE FeatureNum, uinT16 ActualProtoNum, uinT8 Evidence, @@ -138,68 +214,9 @@ void IMDebugConfigurationSum(INT_FEATURE FeatureNum, uinT8 *FeatureEvidence, inT32 ConfigCount); -int IMUpdateTablesForFeature (INT_CLASS ClassTemplate, -BIT_VECTOR ProtoMask, -BIT_VECTOR ConfigMask, -int FeatureNum, -INT_FEATURE Feature, -uinT8 FeatureEvidence[MAX_NUM_CONFIGS], -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT8 -ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX], -int Debug); - -#ifndef GRAPHICS_DISABLED -void IMDebugFeatureProtoError (INT_CLASS ClassTemplate, -BIT_VECTOR ProtoMask, -BIT_VECTOR ConfigMask, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT8 -ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX], -inT16 NumFeatures, int Debug); - -void IMDisplayProtoDebugInfo (INT_CLASS ClassTemplate, -BIT_VECTOR ProtoMask, -BIT_VECTOR ConfigMask, -uinT8 -ProtoEvidence[MAX_NUM_PROTOS][MAX_PROTO_INDEX], -int Debug); - -void IMDisplayFeatureDebugInfo(INT_CLASS ClassTemplate, - BIT_VECTOR ProtoMask, - BIT_VECTOR ConfigMask, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, - int Debug); -#endif - -void IMUpdateSumOfProtoEvidences (INT_CLASS ClassTemplate, -BIT_VECTOR ConfigMask, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT8 -ProtoEvidence[MAX_NUM_PROTOS] -[MAX_PROTO_INDEX], inT16 NumFeatures); - -void IMNormalizeSumOfEvidences (INT_CLASS ClassTemplate, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -inT16 NumFeatures, inT32 used_features); - -int IMFindBestMatch (INT_CLASS ClassTemplate, -int SumOfFeatureEvidence[MAX_NUM_CONFIGS], -uinT16 BlobLength, -uinT8 NormalizationFactor, INT_RESULT Result); - -#ifndef GRAPHICS_DISABLED -void IMDebugBestMatch(int BestMatch, - INT_RESULT Result, - uinT16 BlobLength, - uinT8 NormalizationFactor); -#endif - void HeapSort (int n, register int ra[], register int rb[]); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ -extern uinT32 EvidenceMultMask; #endif diff --git a/classify/intproto.cpp b/classify/intproto.cpp index c27e9e5d58..dbda9478ea 100644 --- a/classify/intproto.cpp +++ b/classify/intproto.cpp @@ -26,7 +26,6 @@ #include "const.h" #include "ndminx.h" #include "svmnode.h" -#include "adaptmatch.h" #include "globals.h" #include "classify.h" #include "genericvector.h" @@ -136,12 +135,12 @@ void DoFill(FILL_SPEC *FillSpec, BOOL8 FillerDone(TABLE_FILLER *Filler); -void FillPPCircularBits (uinT32 - ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, FLOAT32 Center, FLOAT32 Spread); +void FillPPCircularBits(uinT32 + ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], + int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug); -void FillPPLinearBits (uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, FLOAT32 Center, FLOAT32 Spread); +void FillPPLinearBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], + int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug); #ifndef GRAPHICS_DISABLED CLASS_ID GetClassToDebug(const char *Prompt); @@ -368,7 +367,8 @@ void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, /*---------------------------------------------------------------------------*/ -void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class) { +void AddProtoToProtoPruner(PROTO Proto, int ProtoId, + INT_CLASS Class, bool debug) { /* ** Parameters: ** Proto floating-pt proto to be added to proto pruner @@ -401,7 +401,8 @@ void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class) { #endif FillPPCircularBits (ProtoSet->ProtoPruner[PRUNER_ANGLE], Index, - Angle + ANGLE_SHIFT, classify_pp_angle_pad / 360.0); + Angle + ANGLE_SHIFT, classify_pp_angle_pad / 360.0, + debug); Angle *= 2.0 * PI; Length = Proto->Length; @@ -413,7 +414,7 @@ void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class) { fabs (sin (Angle)) * (classify_pp_side_pad * GetPicoFeatureLength ())); - FillPPLinearBits (ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad); + FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug); Y = Proto->Y + Y_SHIFT; Pad = MAX (fabs (sin (Angle)) * (Length / 2.0 + @@ -422,7 +423,7 @@ void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class) { fabs (cos (Angle)) * (classify_pp_side_pad * GetPicoFeatureLength ())); - FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad); + FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug); } /* AddProtoToProtoPruner */ @@ -528,8 +529,9 @@ void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) { } /* ConvertConfig */ +namespace tesseract { /*---------------------------------------------------------------------------*/ -void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { +void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { /* ** Parameters: ** Proto floating-pt proto to be converted to integer format @@ -574,7 +576,6 @@ void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { /*---------------------------------------------------------------------------*/ -namespace tesseract { INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, const UNICHARSET& target_unicharset) { @@ -623,7 +624,8 @@ INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) { AddIntProto(IClass); ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass); - AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass); + AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass, + classify_learning_debug_level >= 2); AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates); } @@ -1131,8 +1133,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) { } } if (version_id >= 4) { - this->fontinfo_table_.read(File, NewPermanentCallback(read_info), swap); - this->fontset_table_.read(File, NewPermanentCallback(read_set), swap); + this->fontinfo_table_.read(File, NewPermanentTessCallback(read_info), swap); + this->fontset_table_.read(File, NewPermanentTessCallback(read_set), swap); } // Clean up. @@ -1143,12 +1145,10 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) { return (Templates); } /* ReadIntTemplates */ -} // namespace tesseract - /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED -void ShowMatchDisplay() { +void Classify::ShowMatchDisplay() { /* ** Parameters: none ** Globals: @@ -1220,7 +1220,6 @@ void ShowMatchDisplay() { #endif /*---------------------------------------------------------------------------*/ -namespace tesseract { void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET& target_unicharset) { /* @@ -1287,8 +1286,8 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, } /* Write the fonts info tables */ - this->fontinfo_table_.write(File, NewPermanentCallback(write_info)); - this->fontset_table_.write(File, NewPermanentCallback(write_set)); + this->fontinfo_table_.write(File, NewPermanentTessCallback(write_info)); + this->fontset_table_.write(File, NewPermanentTessCallback(write_set)); } /* WriteIntTemplates */ } // namespace tesseract @@ -1412,9 +1411,8 @@ BOOL8 FillerDone(TABLE_FILLER *Filler) { /*---------------------------------------------------------------------------*/ -void -FillPPCircularBits (uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, FLOAT32 Center, FLOAT32 Spread) { +void FillPPCircularBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], + int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) { /* ** Parameters: ** ParamTable table of bit vectors, one per param bucket @@ -1444,8 +1442,7 @@ FillPPCircularBits (uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], LastBucket = (int) floor ((Center + Spread) * NUM_PP_BUCKETS); if (LastBucket >= NUM_PP_BUCKETS) LastBucket -= NUM_PP_BUCKETS; - if (classify_learning_debug_level >= 2) - cprintf ("Circular fill from %d to %d", FirstBucket, LastBucket); + if (debug) tprintf("Circular fill from %d to %d", FirstBucket, LastBucket); for (i = FirstBucket; TRUE; CircularIncrement (i, NUM_PP_BUCKETS)) { SET_BIT (ParamTable[i], Bit); @@ -1458,9 +1455,8 @@ FillPPCircularBits (uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], /*---------------------------------------------------------------------------*/ -void -FillPPLinearBits (uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], - int Bit, FLOAT32 Center, FLOAT32 Spread) { +void FillPPLinearBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], + int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) { /* ** Parameters: ** ParamTable table of bit vectors, one per param bucket @@ -1489,8 +1485,7 @@ FillPPLinearBits (uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], if (LastBucket >= NUM_PP_BUCKETS) LastBucket = NUM_PP_BUCKETS - 1; - if (classify_learning_debug_level >= 2) - cprintf ("Linear fill from %d to %d", FirstBucket, LastBucket); + if (debug) tprintf("Linear fill from %d to %d", FirstBucket, LastBucket); for (i = FirstBucket; i <= LastBucket; i++) SET_BIT (ParamTable[i], Bit); diff --git a/classify/intproto.h b/classify/intproto.h index 04abd97276..be1b2b0bde 100644 --- a/classify/intproto.h +++ b/classify/intproto.h @@ -48,6 +48,7 @@ #define NUM_CP_BUCKETS 24 #define CLASSES_PER_CP 32 #define NUM_BITS_PER_CLASS 2 +#define CLASS_PRUNER_CLASS_MASK (~(~0 << NUM_BITS_PER_CLASS)) #define CLASSES_PER_CP_WERD (CLASSES_PER_CP / NUM_BITS_PER_CLASS) #define PROTOS_PER_PP_WERD BITS_PER_WERD #define BITS_PER_CP_VECTOR (CLASSES_PER_CP * NUM_BITS_PER_CLASS) @@ -229,7 +230,8 @@ void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates); -void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class); +void AddProtoToProtoPruner(PROTO Proto, int ProtoId, + INT_CLASS Class, bool debug); int BucketFor(FLOAT32 Param, FLOAT32 Offset, int NumBuckets); @@ -239,8 +241,6 @@ void UpdateMatchDisplay(); void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class); -void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class); - void DisplayIntFeature(INT_FEATURE Feature, FLOAT32 Evidence); void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence); diff --git a/classify/kdtree.cpp b/classify/kdtree.cpp index 04ebc9e52d..cc81f06272 100644 --- a/classify/kdtree.cpp +++ b/classify/kdtree.cpp @@ -1,12 +1,12 @@ /****************************************************************************** - ** Filename: kdtree.c - ** Purpose: Routines for managing K-D search trees - ** Author: Dan Johnson - ** History: 3/10/89, DSJ, Created. - ** 5/23/89, DSJ, Added circular feature capability. - ** 7/13/89, DSJ, Made tree nodes invisible to outside. + ** Filename: kdtree.cpp + ** Purpose: Routines for managing K-D search trees + ** Author: Dan Johnson + ** History: 3/10/89, DSJ, Created. + ** 5/23/89, DSJ, Added circular feature capability. + ** 7/13/89, DSJ, Made tree nodes invisible to outside. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -27,10 +27,8 @@ #include "freelist.h" #include #include -#include #define Magnitude(X) ((X) < 0 ? -(X) : (X)) -#define MIN(A,B) ((A) < (B) ? (A) : (B)) #define NodeFound(N,K,D) (( (N)->Key == (K) ) && ( (N)->Data == (D) )) /*----------------------------------------------------------------------------- @@ -39,99 +37,154 @@ #define MINSEARCH -MAX_FLOAT32 #define MAXSEARCH MAX_FLOAT32 -static int NumberOfNeighbors; -static inT16 N; /* number of dimensions in the kd tree */ +// Helper function to find the next essential dimension in a cycle. +static int NextLevel(KDTREE *tree, int level) { + do { + ++level; + if (level >= tree->KeySize) + level = 0; + } while (tree->KeyDesc[level].NonEssential); + return level; +} -static FLOAT32 *QueryPoint; -static int MaxNeighbors; -static FLOAT32 Radius; -static int Furthest; -static char **Neighbor; -static FLOAT32 *Distance; +//----------------------------------------------------------------------------- +// Store the k smallest-keyed key-value pairs. +template +class MinK { + public: + MinK(Key max_key, int k); + ~MinK(); + + struct Element { + Element() {} + Element(const Key& k, const Value& v) : key(k), value(v) {} + + Key key; + Value value; + }; + + bool insert(Key k, Value v); + const Key& max_insertable_key(); + + int elements_count() { return elements_count_; } + const Element* elements() { return elements_; } + + private: + const Key max_key_; // the maximum possible Key + Element* elements_; // unsorted array of elements + int elements_count_; // the number of results collected so far + int k_; // the number of results we want from the search + int max_index_; // the index of the result with the largest key +}; + +template +MinK::MinK(Key max_key, int k) : + max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) { + elements_ = new Element[k_]; +} -static int MaxDimension = 0; -static FLOAT32 *SBMin; -static FLOAT32 *SBMax; -static FLOAT32 *LBMin; -static FLOAT32 *LBMax; +template +MinK::~MinK() { + delete []elements_; +} -static PARAM_DESC *KeyDesc; +template +const Key& MinK::max_insertable_key() { + if (elements_count_ < k_) + return max_key_; + return elements_[max_index_].key; +} -static jmp_buf QuickExit; +template +bool MinK::insert(Key key, Value value) { + if (elements_count_ < k_) { + elements_[elements_count_++] = Element(key, value); + if (key > elements_[max_index_].key) + max_index_ = elements_count_ - 1; + return true; + } else if (key < elements_[max_index_].key) { + // evict the largest element. + elements_[max_index_] = Element(key, value); + // recompute max_index_ + for (int i = 0; i < elements_count_; i++) { + if (elements_[i].key > elements_[max_index_].key) + max_index_ = i; + } + return true; + } + return false; +} -static void_proc WalkAction; -// Helper function to find the next essential dimension in a cycle. -static int NextLevel(int level) { - do { - ++level; - if (level >= N) - level = 0; - } while (KeyDesc[level].NonEssential); - return level; +//----------------------------------------------------------------------------- +// Helper class for searching for the k closest points to query_point in tree. +class KDTreeSearch { + public: + KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest); + ~KDTreeSearch(); + + // Return the k nearest points' data. + void Search(int *result_count, FLOAT32 *distances, void **results); + + private: + void SearchRec(int Level, KDNODE *SubTree); + bool BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper); + + KDTREE *tree_; + FLOAT32 *query_point_; + MinK* results_; + FLOAT32 *sb_min_; // search box minimum + FLOAT32 *sb_max_; // search box maximum +}; + +KDTreeSearch::KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest) : + tree_(tree), + query_point_(query_point) { + results_ = new MinK(MAXSEARCH, k_closest); + sb_min_ = new FLOAT32[tree->KeySize]; + sb_max_ = new FLOAT32[tree->KeySize]; } -/// Helper function to find the previous essential dimension in a cycle. -static int PrevLevel(int level) { - do { - --level; - if (level < 0) - level = N - 1; - } while (KeyDesc[level].NonEssential); - return level; +KDTreeSearch::~KDTreeSearch() { + delete results_; + delete[] sb_min_; + delete[] sb_max_; +} + +// Locate the k_closest points to query_point_, and return their distances and +// data into the given buffers. +void KDTreeSearch::Search(int *result_count, + FLOAT32 *distances, + void **results) { + if (tree_->Root.Left == NULL) { + *result_count = 0; + } else { + for (int i = 0; i < tree_->KeySize; i++) { + sb_min_[i] = tree_->KeyDesc[i].Min; + sb_max_[i] = tree_->KeyDesc[i].Max; + } + SearchRec(0, tree_->Root.Left); + int count = results_->elements_count(); + *result_count = count; + for (int j = 0; j < count; j++) { + distances[j] = (FLOAT32) sqrt((FLOAT64)results_->elements()[j].key); + results[j] = results_->elements()[j].value; + } + } } /*----------------------------------------------------------------------------- Public Code -----------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ -/** - * This routine allocates and returns a new K-D tree data - * structure. It also reallocates the small and large - * search region boxes if they are not large enough to - * accomodate the size of the new K-D tree. KeyDesc is - * an array of key descriptors that indicate which dimensions - * are circular and, if they are circular, what the range is. - * - * Globals: - * - MaxDimension largest # of dimensions in any K-D tree - * - SBMin small search region box - * - SBMax - * - LBMin large search region box - * - LBMax - * - Key description of key dimensions - * - * @param KeySize # of dimensions in the K-D tree - * @param KeyDesc array of params to describe key dimensions - * - * @return Pointer to new K-D tree - * @note Exceptions: None - * @note History: 3/13/89, DSJ, Created. - */ -KDTREE * -MakeKDTree (inT16 KeySize, PARAM_DESC KeyDesc[]) { - int i; - void *NewMemory; - KDTREE *KDTree; - - if (KeySize > MaxDimension) { - NewMemory = Emalloc (KeySize * 4 * sizeof (FLOAT32)); - if (MaxDimension > 0) { - memfree ((char *) SBMin); - memfree ((char *) SBMax); - memfree ((char *) LBMin); - memfree ((char *) LBMax); - } - SBMin = (FLOAT32 *) NewMemory; - SBMax = SBMin + KeySize; - LBMin = SBMax + KeySize; - LBMax = LBMin + KeySize; - } - - KDTree = - (KDTREE *) Emalloc (sizeof (KDTREE) + - (KeySize - 1) * sizeof (PARAM_DESC)); - for (i = 0; i < KeySize; i++) { +/// Return a new KDTREE based on the specified parameters. +/// Parameters: +/// KeySize # of dimensions in the K-D tree +/// KeyDesc array of params to describe key dimensions +KDTREE *MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[]) { + KDTREE *KDTree = (KDTREE *) Emalloc( + sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC)); + for (int i = 0; i < KeySize; i++) { KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential; KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular; if (KeyDesc[i].Circular) { @@ -140,8 +193,7 @@ MakeKDTree (inT16 KeySize, PARAM_DESC KeyDesc[]) { KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min; KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2; KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2; - } - else { + } else { KDTree->KeyDesc[i].Min = MINSEARCH; KDTree->KeyDesc[i].Max = MAXSEARCH; } @@ -149,8 +201,8 @@ MakeKDTree (inT16 KeySize, PARAM_DESC KeyDesc[]) { KDTree->KeySize = KeySize; KDTree->Root.Left = NULL; KDTree->Root.Right = NULL; - return (KDTree); -} /* MakeKDTree */ + return KDTree; +} /*---------------------------------------------------------------------------*/ @@ -159,30 +211,21 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) { * This routine stores Data in the K-D tree specified by Tree * using Key as an access key. * - * @param Tree K-D tree in which data is to be stored - * @param Key ptr to key by which data can be retrieved - * @param Data ptr to data to be stored in the tree - * - * Globals: - * - N dimension of the K-D tree - * - KeyDesc descriptions of tree dimensions - * - StoreCount debug variables for performance tests - * - StoreUniqueCount - * - StoreProbeCount + * @param Tree K-D tree in which data is to be stored + * @param Key ptr to key by which data can be retrieved + * @param Data ptr to data to be stored in the tree * * @note Exceptions: none - * @note History: 3/10/89, DSJ, Created. - * 7/13/89, DSJ, Changed return to void. + * @note History: 3/10/89, DSJ, Created. + * 7/13/89, DSJ, Changed return to void. */ int Level; KDNODE *Node; KDNODE **PtrToNode; - N = Tree->KeySize; - KeyDesc = &(Tree->KeyDesc[0]); PtrToNode = &(Tree->Root.Left); Node = *PtrToNode; - Level = NextLevel(-1); + Level = NextLevel(Tree, -1); while (Node != NULL) { if (Key[Level] < Node->BranchPoint) { PtrToNode = &(Node->Left); @@ -194,43 +237,24 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) { if (Key[Level] < Node->RightBranch) Node->RightBranch = Key[Level]; } - Level = NextLevel(Level); + Level = NextLevel(Tree, Level); Node = *PtrToNode; } - *PtrToNode = MakeKDNode (Key, (char *) Data, Level); + *PtrToNode = MakeKDNode(Tree, Key, (void *) Data, Level); } /* KDStore */ /*---------------------------------------------------------------------------*/ /** - * This routine deletes a node from Tree. The node to be - * deleted is specified by the Key for the node and the Data - * contents of the node. These two pointers must be identical - * to the pointers that were used for the node when it was - * originally stored in the tree. A node will be deleted from - * the tree only if its key and data pointers are identical - * to Key and Data respectively. The empty space left in the tree - * is filled by pulling a leaf up from the bottom of one of - * the subtrees of the node being deleted. The leaf node will - * be pulled from left subtrees whenever possible (this was - * an arbitrary decision). No attempt is made to pull the leaf - * from the deepest subtree (to minimize length). The branch - * point for the replacement node is changed to be the same as - * the branch point of the deleted node. This keeps us from - * having to rearrange the tree every time we delete a node. - * Also, the LeftBranch and RightBranch numbers of the - * replacement node are set to be the same as the deleted node. - * The makes the delete easier and more efficient, but it may - * make searches in the tree less efficient after many nodes are - * deleted. If the node specified by Key and Data does not - * exist in the tree, then nothing is done. - * - * Globals: - * - N dimension of the K-D tree - * - KeyDesc description of each dimension - * - DeleteCount debug variables for performance tests - * - DeleteProbeCount + * This routine deletes a node from Tree. The node to be + * deleted is specified by the Key for the node and the Data + * contents of the node. These two pointers must be identical + * to the pointers that were used for the node when it was + * originally stored in the tree. A node will be deleted from + * the tree only if its key and data pointers are identical + * to Key and Data respectively. The tree is re-formed by removing + * the affected subtree and inserting all elements but the root. * * @param Tree K-D tree to delete node from * @param Key key of node to be deleted @@ -238,23 +262,19 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) { * * @note Exceptions: none * - * @note History: 3/13/89, DSJ, Created. - * 7/13/89, DSJ, Specify node indirectly by key and data. + * @note History: 3/13/89, DSJ, Created. + * 7/13/89, DSJ, Specify node indirectly by key and data. */ void KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) { int Level; KDNODE *Current; KDNODE *Father; - KDNODE *Replacement; - KDNODE *FatherReplacement; /* initialize search at root of tree */ - N = Tree->KeySize; - KeyDesc = &(Tree->KeyDesc[0]); Father = &(Tree->Root); Current = Father->Left; - Level = NextLevel(-1); + Level = NextLevel(Tree, -1); /* search tree for node to be deleted */ while ((Current != NULL) && (!NodeFound (Current, Key, Data))) { @@ -264,175 +284,82 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) { else Current = Current->Right; - Level = NextLevel(Level); + Level = NextLevel(Tree, Level); } if (Current != NULL) { /* if node to be deleted was found */ - Replacement = Current; - FatherReplacement = Father; - - /* search for replacement node (a leaf under node to be deleted */ - while (TRUE) { - if (Replacement->Left != NULL) { - FatherReplacement = Replacement; - Replacement = Replacement->Left; - } - else if (Replacement->Right != NULL) { - FatherReplacement = Replacement; - Replacement = Replacement->Right; - } - else - break; - - Level = NextLevel(Level); + if (Current == Father->Left) { + Father->Left = NULL; + Father->LeftBranch = Tree->KeyDesc[Level].Min; + } else { + Father->Right = NULL; + Father->RightBranch = Tree->KeyDesc[Level].Max; } - /* compute level of replacement node's father */ - Level = PrevLevel(Level); - - /* disconnect replacement node from it's father */ - if (FatherReplacement->Left == Replacement) { - FatherReplacement->Left = NULL; - FatherReplacement->LeftBranch = KeyDesc[Level].Min; - } - else { - FatherReplacement->Right = NULL; - FatherReplacement->RightBranch = KeyDesc[Level].Max; - } - - /* replace deleted node with replacement (unless they are the same) */ - if (Replacement != Current) { - Replacement->BranchPoint = Current->BranchPoint; - Replacement->LeftBranch = Current->LeftBranch; - Replacement->RightBranch = Current->RightBranch; - Replacement->Left = Current->Left; - Replacement->Right = Current->Right; - - if (Father->Left == Current) - Father->Left = Replacement; - else - Father->Right = Replacement; - } - FreeKDNode(Current); + InsertNodes(Tree, Current->Left); + InsertNodes(Tree, Current->Right); + FreeSubTree(Current); } } /* KDDelete */ /*---------------------------------------------------------------------------*/ -int -KDNearestNeighborSearch (KDTREE * Tree, -FLOAT32 Query[], -int QuerySize, -FLOAT32 MaxDistance, -void *NBuffer, FLOAT32 DBuffer[]) { +void KDNearestNeighborSearch( + KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance, + int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[]) { /* - ** Parameters: - ** Tree ptr to K-D tree to be searched - ** Query ptr to query key (point in D-space) - ** QuerySize number of nearest neighbors to be found - ** MaxDistance all neighbors must be within this distance - ** NBuffer ptr to QuerySize buffer to hold nearest neighbors - ** DBuffer ptr to QuerySize buffer to hold distances - ** from nearest neighbor to query point - ** Globals: - ** NumberOfNeighbors # of neighbors found so far - ** N # of features in each key - ** KeyDesc description of tree dimensions - ** QueryPoint point in D-space to find neighbors of - ** MaxNeighbors maximum # of neighbors to find - ** Radius current distance of furthest neighbor - ** Furthest index of furthest neighbor - ** Neighbor buffer of current neighbors - ** Distance buffer of neighbor distances - ** SBMin lower extent of small search region - ** SBMax upper extent of small search region - ** LBMin lower extent of large search region - ** LBMax upper extent of large search region - ** QuickExit quick exit from recursive search - ** Operation: - ** This routine searches the K-D tree specified by Tree and - ** finds the QuerySize nearest neighbors of Query. All neighbors - ** must be within MaxDistance of Query. The data contents of - ** the nearest neighbors - ** are placed in NBuffer and their distances from Query are - ** placed in DBuffer. - ** Return: Number of nearest neighbors actually found - ** Exceptions: none - ** History: - ** 3/10/89, DSJ, Created. - ** 7/13/89, DSJ, Return contents of node instead of node itself. + ** Parameters: + ** Tree ptr to K-D tree to be searched + ** Query ptr to query key (point in D-space) + ** QuerySize number of nearest neighbors to be found + ** MaxDistance all neighbors must be within this distance + ** NBuffer ptr to QuerySize buffer to hold nearest neighbors + ** DBuffer ptr to QuerySize buffer to hold distances + ** from nearest neighbor to query point + ** Operation: + ** This routine searches the K-D tree specified by Tree and + ** finds the QuerySize nearest neighbors of Query. All neighbors + ** must be within MaxDistance of Query. The data contents of + ** the nearest neighbors + ** are placed in NBuffer and their distances from Query are + ** placed in DBuffer. + ** Return: Number of nearest neighbors actually found + ** Exceptions: none + ** History: + ** 3/10/89, DSJ, Created. + ** 7/13/89, DSJ, Return contents of node instead of node itself. */ - int i; - - NumberOfNeighbors = 0; - N = Tree->KeySize; - KeyDesc = &(Tree->KeyDesc[0]); - QueryPoint = Query; - MaxNeighbors = QuerySize; - Radius = MaxDistance; - Furthest = 0; - Neighbor = (char **) NBuffer; - Distance = DBuffer; - - for (i = 0; i < N; i++) { - SBMin[i] = KeyDesc[i].Min; - SBMax[i] = KeyDesc[i].Max; - LBMin[i] = KeyDesc[i].Min; - LBMax[i] = KeyDesc[i].Max; - } - - if (Tree->Root.Left != NULL) { - if (setjmp (QuickExit) == 0) - Search (0, Tree->Root.Left); - } - return (NumberOfNeighbors); -} /* KDNearestNeighborSearch */ + KDTreeSearch search(Tree, Query, QuerySize); + search.Search(NumberOfResults, DBuffer, NBuffer); +} /*---------------------------------------------------------------------------*/ -void KDWalk(KDTREE *Tree, void_proc Action) { -/* - ** Parameters: - ** Tree ptr to K-D tree to be walked - ** Action ptr to function to be executed at each node - ** Globals: - ** WalkAction action to be performed at every node - ** Operation: - ** This routine stores the desired action in a global - ** variable and starts a recursive walk of Tree. The walk - ** is started at the root node. - ** Return: - ** None - ** Exceptions: - ** None - ** History: - ** 3/13/89, DSJ, Created. - */ - WalkAction = Action; +// Walk a given Tree with action. +void KDWalk(KDTREE *Tree, void_proc action, void *context) { if (Tree->Root.Left != NULL) - Walk (Tree->Root.Left, NextLevel(-1)); -} /* KDWalk */ + Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1)); +} /*---------------------------------------------------------------------------*/ void FreeKDTree(KDTREE *Tree) { /* - ** Parameters: - ** Tree tree data structure to be released - ** Globals: none - ** Operation: - ** This routine frees all memory which is allocated to the - ** specified KD-tree. This includes the data structure for - ** the kd-tree itself plus the data structures for each node - ** in the tree. It does not include the Key and Data items - ** which are pointed to by the nodes. This memory is left - ** untouched. - ** Return: none - ** Exceptions: none - ** History: - ** 5/26/89, DSJ, Created. + ** Parameters: + ** Tree tree data structure to be released + ** Operation: + ** This routine frees all memory which is allocated to the + ** specified KD-tree. This includes the data structure for + ** the kd-tree itself plus the data structures for each node + ** in the tree. It does not include the Key and Data items + ** which are pointed to by the nodes. This memory is left + ** untouched. + ** Return: none + ** Exceptions: none + ** History: + ** 5/26/89, DSJ, Created. */ - FreeSubTree (Tree->Root.Left); + FreeSubTree(Tree->Root.Left); memfree(Tree); } /* FreeKDTree */ @@ -441,52 +368,24 @@ void FreeKDTree(KDTREE *Tree) { Private Code -----------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ -int -Equal (FLOAT32 Key1[], FLOAT32 Key2[]) { -/* - ** Parameters: - ** Key1,Key2 search keys to be compared for equality - ** Globals: - ** N number of parameters per key - ** Operation: - ** This routine returns TRUE if Key1 = Key2. - ** Return: - ** TRUE if Key1 = Key2, else FALSE. - ** Exceptions: - ** None - ** History: - ** 3/11/89, DSJ, Created. - */ - int i; - - for (i = N; i > 0; i--, Key1++, Key2++) - if (*Key1 != *Key2) - return (FALSE); - return (TRUE); -} /* Equal */ - - -/*---------------------------------------------------------------------------*/ -KDNODE * -MakeKDNode (FLOAT32 Key[], char *Data, int Index) { +KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index) { /* - ** Parameters: - ** Key Access key for new node in KD tree - ** Data ptr to data to be stored in new node - ** Index index of Key to branch on - ** Globals: - ** KeyDesc descriptions of key dimensions - ** Operation: - ** This routine allocates memory for a new K-D tree node - ** and places the specified Key and Data into it. The - ** left and right subtree pointers for the node are - ** initialized to empty subtrees. - ** Return: - ** pointer to new K-D tree node - ** Exceptions: - ** None - ** History: - ** 3/11/89, DSJ, Created. + ** Parameters: + ** tree The tree to create the node for + ** Key Access key for new node in KD tree + ** Data ptr to data to be stored in new node + ** Index index of Key to branch on + ** Operation: + ** This routine allocates memory for a new K-D tree node + ** and places the specified Key and Data into it. The + ** left and right subtree pointers for the node are + ** initialized to empty subtrees. + ** Return: + ** pointer to new K-D tree node + ** Exceptions: + ** None + ** History: + ** 3/11/89, DSJ, Created. */ KDNODE *NewNode; @@ -495,387 +394,181 @@ MakeKDNode (FLOAT32 Key[], char *Data, int Index) { NewNode->Key = Key; NewNode->Data = Data; NewNode->BranchPoint = Key[Index]; - NewNode->LeftBranch = KeyDesc[Index].Min; - NewNode->RightBranch = KeyDesc[Index].Max; + NewNode->LeftBranch = tree->KeyDesc[Index].Min; + NewNode->RightBranch = tree->KeyDesc[Index].Max; NewNode->Left = NULL; NewNode->Right = NULL; - return (NewNode); + return NewNode; } /* MakeKDNode */ /*---------------------------------------------------------------------------*/ void FreeKDNode(KDNODE *Node) { -/* - ** Parameters: - ** Node ptr to node data structure to be freed - ** Globals: - ** None - ** Operation: - ** This routine frees up the memory allocated to Node. - ** Return: - ** None - ** Exceptions: - ** None - ** History: - ** 3/13/89, DSJ, Created. - */ - memfree ((char *) Node); -} /* FreeKDNode */ + memfree ((char *)Node); +} /*---------------------------------------------------------------------------*/ -void Search(int Level, KDNODE *SubTree) { -/* - ** Parameters: - ** Level level in tree of sub-tree to be searched - ** SubTree sub-tree to be searched - ** Globals: - ** NumberOfNeighbors # of neighbors found so far - ** N # of features in each key - ** KeyDesc description of key dimensions - ** QueryPoint point in D-space to find neighbors of - ** MaxNeighbors maximum # of neighbors to find - ** Radius current distance of furthest neighbor - ** Furthest index of furthest neighbor - ** Neighbor buffer of current neighbors - ** Distance buffer of neighbor distances - ** SBMin lower extent of small search region - ** SBMax upper extent of small search region - ** LBMin lower extent of large search region - ** LBMax upper extent of large search region - ** QuickExit quick exit from recursive search - ** Operation: - ** This routine searches SubTree for those entries which are - ** possibly among the MaxNeighbors nearest neighbors of the - ** QueryPoint and places their data in the Neighbor buffer and - ** their distances from QueryPoint in the Distance buffer. - ** Return: none - ** Exceptions: none - ** History: - ** 3/11/89, DSJ, Created. - ** 7/13/89, DSJ, Save node contents, not node, in neighbor buffer - */ - FLOAT32 d; - FLOAT32 OldSBoxEdge; - FLOAT32 OldLBoxEdge; - - if (Level >= N) - Level = 0; - - d = ComputeDistance (N, KeyDesc, QueryPoint, SubTree->Key); - if (d < Radius) { - if (NumberOfNeighbors < MaxNeighbors) { - Neighbor[NumberOfNeighbors] = SubTree->Data; - Distance[NumberOfNeighbors] = d; - NumberOfNeighbors++; - if (NumberOfNeighbors == MaxNeighbors) - FindMaxDistance(); +// Recursively accumulate the k_closest points to query_point_ into results_. +// Parameters: +// Level level in tree of sub-tree to be searched +// SubTree sub-tree to be searched +void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { + if (level >= tree_->KeySize) + level = 0; + + if (!BoxIntersectsSearch(sb_min_, sb_max_)) + return; + + results_->insert(DistanceSquared(tree_->KeySize, tree_->KeyDesc, + query_point_, sub_tree->Key), + sub_tree->Data); + + if (query_point_[level] < sub_tree->BranchPoint) { + if (sub_tree->Left != NULL) { + FLOAT32 tmp = sb_max_[level]; + sb_max_[level] = sub_tree->LeftBranch; + SearchRec(NextLevel(tree_, level), sub_tree->Left); + sb_max_[level] = tmp; } - else { - Neighbor[Furthest] = SubTree->Data; - Distance[Furthest] = d; - FindMaxDistance(); + if (sub_tree->Right != NULL) { + FLOAT32 tmp = sb_min_[level]; + sb_min_[level] = sub_tree->RightBranch; + SearchRec(NextLevel(tree_, level), sub_tree->Right); + sb_min_[level] = tmp; + } + } else { + if (sub_tree->Right != NULL) { + FLOAT32 tmp = sb_min_[level]; + sb_min_[level] = sub_tree->RightBranch; + SearchRec(NextLevel(tree_, level), sub_tree->Right); + sb_min_[level] = tmp; + } + if (sub_tree->Left != NULL) { + FLOAT32 tmp = sb_max_[level]; + sb_max_[level] = sub_tree->LeftBranch; + SearchRec(NextLevel(tree_, level), sub_tree->Left); + sb_max_[level] = tmp; } } - if (QueryPoint[Level] < SubTree->BranchPoint) { - OldSBoxEdge = SBMax[Level]; - SBMax[Level] = SubTree->LeftBranch; - OldLBoxEdge = LBMax[Level]; - LBMax[Level] = SubTree->RightBranch; - if (SubTree->Left != NULL) - Search (NextLevel(Level), SubTree->Left); - SBMax[Level] = OldSBoxEdge; - LBMax[Level] = OldLBoxEdge; - OldSBoxEdge = SBMin[Level]; - SBMin[Level] = SubTree->RightBranch; - OldLBoxEdge = LBMin[Level]; - LBMin[Level] = SubTree->LeftBranch; - if ((SubTree->Right != NULL) && QueryIntersectsSearch ()) - Search (NextLevel(Level), SubTree->Right); - SBMin[Level] = OldSBoxEdge; - LBMin[Level] = OldLBoxEdge; - } - else { - OldSBoxEdge = SBMin[Level]; - SBMin[Level] = SubTree->RightBranch; - OldLBoxEdge = LBMin[Level]; - LBMin[Level] = SubTree->LeftBranch; - if (SubTree->Right != NULL) - Search (NextLevel(Level), SubTree->Right); - SBMin[Level] = OldSBoxEdge; - LBMin[Level] = OldLBoxEdge; - OldSBoxEdge = SBMax[Level]; - SBMax[Level] = SubTree->LeftBranch; - OldLBoxEdge = LBMax[Level]; - LBMax[Level] = SubTree->RightBranch; - if ((SubTree->Left != NULL) && QueryIntersectsSearch ()) - Search (NextLevel(Level), SubTree->Left); - SBMax[Level] = OldSBoxEdge; - LBMax[Level] = OldLBoxEdge; - } - if (QueryInSearch ()) - longjmp (QuickExit, 1); -} /* Search */ +} /*---------------------------------------------------------------------------*/ -FLOAT32 -ComputeDistance (register int N, -register PARAM_DESC Dim[], -register FLOAT32 p1[], register FLOAT32 p2[]) { -/* - ** Parameters: - ** N number of dimensions in K-D space - ** Dim descriptions of each dimension - ** p1,p2 two different points in K-D space - ** Globals: - ** None - ** Operation: - ** This routine computes the euclidian distance - ** between p1 and p2 in K-D space (an N dimensional space). - ** Return: - ** Distance between p1 and p2. - ** Exceptions: - ** None - ** History: - ** 3/11/89, DSJ, Created. - */ - register FLOAT32 TotalDistance; - register FLOAT32 DimensionDistance; - FLOAT32 WrapDistance; - - TotalDistance = 0; - for (; N > 0; N--, p1++, p2++, Dim++) { - if (Dim->NonEssential) +// Returns the Euclidean distance squared between p1 and p2 for all essential +// dimensions. +// Parameters: +// k keys are in k-space +// dim dimension descriptions (essential, circular, etc) +// p1,p2 two different points in K-D space +FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) { + FLOAT32 total_distance = 0; + + for (; k > 0; k--, p1++, p2++, dim++) { + if (dim->NonEssential) continue; - DimensionDistance = *p1 - *p2; + FLOAT32 dimension_distance = *p1 - *p2; /* if this dimension is circular - check wraparound distance */ - if (Dim->Circular) { - DimensionDistance = Magnitude (DimensionDistance); - WrapDistance = Dim->Max - Dim->Min - DimensionDistance; - DimensionDistance = MIN (DimensionDistance, WrapDistance); + if (dim->Circular) { + dimension_distance = Magnitude(dimension_distance); + FLOAT32 wrap_distance = dim->Max - dim->Min - dimension_distance; + dimension_distance = MIN(dimension_distance, wrap_distance); } - TotalDistance += DimensionDistance * DimensionDistance; - } - return ((FLOAT32) sqrt ((FLOAT64) TotalDistance)); -} /* ComputeDistance */ - - -/*---------------------------------------------------------------------------*/ -void FindMaxDistance() { -/* - ** Parameters: - ** None - ** Globals: - ** MaxNeighbors maximum # of neighbors to find - ** Radius current distance of furthest neighbor - ** Furthest index of furthest neighbor - ** Distance buffer of neighbor distances - ** Operation: - ** This routine searches the Distance buffer for the maximum - ** distance, places this distance in Radius, and places the - ** index of this distance in Furthest. - ** Return: - ** None - ** Exceptions: - ** None - ** History: - ** 3/11/89, DSJ, Created. - */ - int i; - - Radius = Distance[Furthest]; - for (i = 0; i < MaxNeighbors; i++) { - if (Distance[i] > Radius) { - Radius = Distance[i]; - Furthest = i; - } + total_distance += dimension_distance * dimension_distance; } -} /* FindMaxDistance */ + return total_distance; +} +FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) { + return sqrt(DistanceSquared(k, dim, p1, p2)); +} /*---------------------------------------------------------------------------*/ -int QueryIntersectsSearch() { -/* - ** Parameters: - ** None - ** Globals: - ** N # of features in each key - ** KeyDesc descriptions of each dimension - ** QueryPoint point in D-space to find neighbors of - ** Radius current distance of furthest neighbor - ** SBMin lower extent of small search region - ** SBMax upper extent of small search region - ** Operation: - ** This routine returns TRUE if the query region intersects - ** the current smallest search region. The query region is - ** the circle of radius Radius centered at QueryPoint. - ** The smallest search region is the box (in N dimensions) - ** whose edges in each dimension are specified by SBMin and SBMax. - ** In the case of circular dimensions, we must also check the - ** point which is one wrap-distance away from the query to - ** see if it would intersect the search region. - ** Return: - ** TRUE if query region intersects search region, else FALSE - ** Exceptions: - ** None - ** History: - ** 3/11/89, DSJ, Created. - */ - register int i; - register FLOAT32 *Query; - register FLOAT32 *Lower; - register FLOAT32 *Upper; - register FLOAT64 TotalDistance; - register FLOAT32 DimensionDistance; - register FLOAT64 RadiusSquared; - register PARAM_DESC *Dim; - register FLOAT32 WrapDistance; - - RadiusSquared = Radius * Radius; - Query = QueryPoint; - Lower = SBMin; - Upper = SBMax; - TotalDistance = 0.0; - Dim = KeyDesc; - for (i = N; i > 0; i--, Dim++, Query++, Lower++, Upper++) { - if (Dim->NonEssential) +// Return whether the query region (the smallest known circle about +// query_point_ containing results->k_ points) intersects the box specified +// between lower and upper. For circular dimensions, we also check the point +// one wrap distance away from the query. +bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) { + FLOAT32 *query = query_point_; + FLOAT64 total_distance = 0.0; + FLOAT64 radius_squared = + results_->max_insertable_key() * results_->max_insertable_key(); + PARAM_DESC *dim = tree_->KeyDesc; + + for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) { + if (dim->NonEssential) continue; - if (*Query < *Lower) - DimensionDistance = *Lower - *Query; - else if (*Query > *Upper) - DimensionDistance = *Query - *Upper; + FLOAT32 dimension_distance; + if (*query < *lower) + dimension_distance = *lower - *query; + else if (*query > *upper) + dimension_distance = *query - *upper; else - DimensionDistance = 0; + dimension_distance = 0; /* if this dimension is circular - check wraparound distance */ - if (Dim->Circular) { - if (*Query < *Lower) - WrapDistance = *Query + Dim->Max - Dim->Min - *Upper; - else if (*Query > *Upper) - WrapDistance = *Lower - (*Query - (Dim->Max - Dim->Min)); - else - WrapDistance = MAX_FLOAT32; - - DimensionDistance = MIN (DimensionDistance, WrapDistance); + if (dim->Circular) { + FLOAT32 wrap_distance = MAX_FLOAT32; + if (*query < *lower) + wrap_distance = *query + dim->Max - dim->Min - *upper; + else if (*query > *upper) + wrap_distance = *lower - (*query - (dim->Max - dim->Min)); + dimension_distance = MIN(dimension_distance, wrap_distance); } - TotalDistance += DimensionDistance * DimensionDistance; - if (TotalDistance >= RadiusSquared) - return (FALSE); + total_distance += dimension_distance * dimension_distance; + if (total_distance >= radius_squared) + return FALSE; } - return (TRUE); -} /* QueryIntersectsSearch */ + return TRUE; +} /*---------------------------------------------------------------------------*/ -int QueryInSearch() { -/* - ** Parameters: - ** None - ** Globals: - ** N # of features in each key - ** KeyDesc descriptions of each dimension - ** QueryPoint point in D-space to find neighbors of - ** Radius current distance of furthest neighbor - ** LBMin lower extent of large search region - ** LBMax upper extent of large search region - ** Operation: - ** This routine returns TRUE if the current query region is - ** totally contained in the current largest search region. - ** The query region is the circle of - ** radius Radius centered at QueryPoint. The search region is - ** the box (in N dimensions) whose edges in each - ** dimension are specified by LBMin and LBMax. - ** Return: - ** TRUE if query region is inside search region, else FALSE - ** Exceptions: - ** None - ** History: - ** 3/11/89, DSJ, Created. - */ - register int i; - register FLOAT32 *Query; - register FLOAT32 *Lower; - register FLOAT32 *Upper; - register PARAM_DESC *Dim; - - Query = QueryPoint; - Lower = LBMin; - Upper = LBMax; - Dim = KeyDesc; - - for (i = N - 1; i >= 0; i--, Dim++, Query++, Lower++, Upper++) { - if (Dim->NonEssential) - continue; - - if ((*Query < *Lower + Radius) || (*Query > *Upper - Radius)) - return (FALSE); - } - return (TRUE); -} /* QueryInSearch */ +// Walk a tree, calling action once on each node. +// +// Parameters: +// tree root of the tree being walked. +// action action to be performed at every node +// context action's context +// sub_tree ptr to root of subtree to be walked +// level current level in the tree for this node +// Operation: +// This routine walks thru the specified sub_tree and invokes action +// action at each node as follows: +// action(context, data, level) +// data the data contents of the node being visited, +// level is the level of the node in the tree with the root being level 0. +void Walk(KDTREE *tree, void_proc action, void *context, + KDNODE *sub_tree, inT32 level) { + (*action)(context, sub_tree->Data, level); + if (sub_tree->Left != NULL) + Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level)); + if (sub_tree->Right != NULL) + Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level)); +} -/*---------------------------------------------------------------------------*/ -void Walk(KDNODE *SubTree, inT32 Level) { -/* - ** Parameters: - ** SubTree ptr to root of subtree to be walked - ** Level current level in the tree for this node - ** Globals: - ** WalkAction action to be performed at every node - ** Operation: - ** This routine walks thru the specified SubTree and invokes - ** WalkAction at each node. WalkAction is invoked with three - ** arguments as follows: - ** WalkAction( NodeData, Order, Level ) - ** Data is the data contents of the node being visited, - ** Order is either preorder, - ** postorder, endorder, or leaf depending on whether this is - ** the 1st, 2nd, or 3rd time a node has been visited, or - ** whether the node is a leaf. Level is the level of the node in - ** the tree with the root being level 0. - ** Return: none - ** Exceptions: none - ** History: - ** 3/13/89, DSJ, Created. - ** 7/13/89, DSJ, Pass node contents, not node, to WalkAction(). - */ - if ((SubTree->Left == NULL) && (SubTree->Right == NULL)) - (*WalkAction) (SubTree->Data, leaf, Level); - else { - (*WalkAction) (SubTree->Data, preorder, Level); - if (SubTree->Left != NULL) - Walk (SubTree->Left, NextLevel(Level)); - (*WalkAction) (SubTree->Data, postorder, Level); - if (SubTree->Right != NULL) - Walk (SubTree->Right, NextLevel(Level)); - (*WalkAction) (SubTree->Data, endorder, Level); - } -} /* Walk */ +// Given a subtree nodes, insert all of its elements into tree. +void InsertNodes(KDTREE *tree, KDNODE *nodes) { + if (nodes == NULL) + return; + KDStore(tree, nodes->Key, nodes->Data); + InsertNodes(tree, nodes->Left); + InsertNodes(tree, nodes->Right); +} -/*---------------------------------------------------------------------------*/ -void FreeSubTree(KDNODE *SubTree) { -/* - ** Parameters: - ** SubTree ptr to root node of sub-tree to be freed - ** Globals: none - ** Operation: - ** This routine recursively frees the memory allocated to - ** to the specified subtree. - ** Return: none - ** Exceptions: none - ** History: 7/13/89, DSJ, Created. - */ - if (SubTree != NULL) { - FreeSubTree (SubTree->Left); - FreeSubTree (SubTree->Right); - memfree(SubTree); +// Free all of the nodes of a sub tree. +void FreeSubTree(KDNODE *sub_tree) { + if (sub_tree != NULL) { + FreeSubTree(sub_tree->Left); + FreeSubTree(sub_tree->Right); + memfree(sub_tree); } } /* FreeSubTree */ diff --git a/classify/kdtree.h b/classify/kdtree.h index 2f4f237a58..1294ea3c6a 100644 --- a/classify/kdtree.h +++ b/classify/kdtree.h @@ -22,7 +22,7 @@ /*----------------------------------------------------------------------------- Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "general.h" +#include "host.h" #include "cutil.h" #include "ocrfeatures.h" @@ -36,37 +36,21 @@ MakeKDTree. All KD routines assume that this is true and will not operate correctly if circular parameters outside the specified range are used. */ -typedef struct kdnode -{ +struct KDNODE { FLOAT32 *Key; /**< search key */ - char *Data; /**< data that corresponds to key */ + void *Data; /**< data that corresponds to key */ FLOAT32 BranchPoint; /**< needed to make deletes work efficiently */ FLOAT32 LeftBranch; /**< used to optimize search pruning */ FLOAT32 RightBranch; /**< used to optimize search pruning */ - struct kdnode *Left; /**< ptr for KD tree structure */ - struct kdnode *Right; /**< ptr for KD tree structure */ -} + struct KDNODE *Left; /**< ptrs for KD tree structure */ + struct KDNODE *Right; +}; - -KDNODE; - -typedef struct -{ +struct KDTREE { inT16 KeySize; /* number of dimensions in the tree */ KDNODE Root; /* Root.Left points to actual root node */ PARAM_DESC KeyDesc[1]; /* description of each dimension */ -} - - -KDTREE; - -/** used for walking thru KD trees */ -typedef enum { - preorder, postorder, endorder, leaf -} - - -VISIT; +}; /*---------------------------------------------------------------------------- Macros @@ -76,44 +60,37 @@ VISIT; /*----------------------------------------------------------------------------- Public Function Prototypes -----------------------------------------------------------------------------*/ -KDTREE *MakeKDTree (inT16 KeySize, PARAM_DESC KeyDesc[]); +KDTREE *MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[]); void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data); -void KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data); +void KDDelete(KDTREE * Tree, FLOAT32 Key[], void *Data); -int KDNearestNeighborSearch (KDTREE * Tree, -FLOAT32 Query[], -int QuerySize, -FLOAT32 MaxDistance, -void *NBuffer, FLOAT32 DBuffer[]); +void KDNearestNeighborSearch( + KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance, + int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[]); -void KDWalk(KDTREE *Tree, void_proc Action); +void KDWalk(KDTREE *Tree, void_proc Action, void *context); void FreeKDTree(KDTREE *Tree); /*----------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ -int Equal (FLOAT32 Key1[], FLOAT32 Key2[]); - -KDNODE *MakeKDNode (FLOAT32 Key[], char *Data, int Index); +KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index); void FreeKDNode(KDNODE *Node); -void Search(int Level, KDNODE *SubTree); - -FLOAT32 ComputeDistance (register int N, -register PARAM_DESC Dim[], -register FLOAT32 p1[], register FLOAT32 p2[]); +FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]); -void FindMaxDistance(); +FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]); -int QueryIntersectsSearch(); +int QueryInSearch(KDTREE *tree); -int QueryInSearch(); +void Walk(KDTREE *tree, void_proc action, void *context, + KDNODE *SubTree, inT32 Level); -void Walk(KDNODE *SubTree, inT32 Level); +void InsertNodes(KDTREE *tree, KDNODE *nodes); void FreeSubTree(KDNODE *SubTree); #endif diff --git a/classify/mf.cpp b/classify/mf.cpp index 7a0483d79f..decc0b2dd5 100644 --- a/classify/mf.cpp +++ b/classify/mf.cpp @@ -18,10 +18,12 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "mfdefs.h" #include "mf.h" -#include "fxdefs.h" + +#include "featdefs.h" +#include "mfdefs.h" #include "mfx.h" + #include /**---------------------------------------------------------------------------- @@ -31,11 +33,11 @@ Private Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -FEATURE_SET ExtractMicros(TBLOB *Blob, LINE_STATS *LineStats) { +FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& denorm) { /* ** Parameters: ** Blob blob to extract micro-features from - ** LineStats statistics on text row blob is in + ** denorm control parameter to feature extractor. ** Globals: none ** Operation: Call the old micro-feature extractor and then copy ** the features into the new format. Then deallocate the @@ -50,7 +52,7 @@ FEATURE_SET ExtractMicros(TBLOB *Blob, LINE_STATS *LineStats) { FEATURE Feature; MICROFEATURE OldFeature; - OldFeatures = (MICROFEATURES) BlobMicroFeatures (Blob, LineStats); + OldFeatures = (MICROFEATURES)BlobMicroFeatures(Blob, denorm); if (OldFeatures == NULL) return NULL; NumFeatures = count (OldFeatures); @@ -65,19 +67,18 @@ FEATURE_SET ExtractMicros(TBLOB *Blob, LINE_STATS *LineStats) { Feature->Params[MFYPosition] = OldFeature[YPOSITION]; Feature->Params[MFLength] = OldFeature[MFLENGTH]; - // Bulge features should not be used - // anymore and are therefore set to 0. -// ParamOf (Feature, MFBulge1) = FirstBulgeOf (OldFeature); -// ParamOf (Feature, MFBulge2) = SecondBulgeOf (OldFeature); + // Bulge features are deprecated and should not be used. Set to 0. Feature->Params[MFBulge1] = 0.0f; Feature->Params[MFBulge2] = 0.0f; -#ifndef __MSW32__ + +#ifndef WIN32 // Assert that feature parameters are well defined. int i; for (i = 0; i < Feature->Type->NumParams; i++) { - assert(!isnan(Feature->Params[i])); + assert (!isnan(Feature->Params[i])); } #endif + AddFeature(FeatureSet, Feature); } FreeMicroFeatures(OldFeatures); diff --git a/classify/mf.h b/classify/mf.h index a41b159e64..d65bc9c189 100644 --- a/classify/mf.h +++ b/classify/mf.h @@ -22,8 +22,7 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "ocrfeatures.h" -#include "tessclas.h" -#include "fxdefs.h" +#include "blobs.h" typedef enum { MFXPosition, MFYPosition, @@ -32,10 +31,6 @@ typedef enum { /*---------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ -FEATURE_SET ExtractMicros(TBLOB *Blob, LINE_STATS *LineStats); +FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& denorm); -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -extern FEATURE_DESC_STRUCT MicroFeatureDesc; #endif diff --git a/classify/mfdefs.h b/classify/mfdefs.h index d1f36d64fc..6202a17424 100644 --- a/classify/mfdefs.h +++ b/classify/mfdefs.h @@ -25,9 +25,6 @@ #include "matchdefs.h" #include "xform2d.h" -/* maximum size of a bulge for length=1 is sqrt(2)/3 */ -#define BULGENORMALIZER 0.942809041 - /* definition of a list of micro-features */ typedef LIST MICROFEATURES; diff --git a/classify/mfoutline.cpp b/classify/mfoutline.cpp index a66a3278d1..e70ddf1599 100644 --- a/classify/mfoutline.cpp +++ b/classify/mfoutline.cpp @@ -21,246 +21,85 @@ #include "clusttool.h" //If remove you get cought in a loop somewhere #include "emalloc.h" #include "mfoutline.h" -#include "hideedge.h" #include "blobs.h" #include "const.h" #include "mfx.h" -#include "varable.h" +#include "params.h" +#include "classify.h" #include #include #define MIN_INERTIA (0.00001) -/*--------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------*/ -/* center of current blob being processed - used when "unexpanding" - expanded blobs */ -static TPOINT BlobCenter; - -/*---------------------------------------------------------------------------- - Variables -----------------------------------------------------------------------------*/ - -/* control knobs used to control normalization of outlines */ -INT_VAR(classify_norm_method, character, "Normalization Method ..."); -/* PREV DEFAULT "baseline" */ -double_VAR(classify_char_norm_range, 0.2, "Character Normalization Range ..."); -double_VAR(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ..."); -/* PREV DEFAULT 0.1 */ -double_VAR(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ..."); -/* PREV DEFAULT 0.3 */ -double_VAR(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ..."); -/* PREV DEFAULT 0.1 */ -double_VAR(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ..."); -/* PREV DEFAULT 0.3 */ /*---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -void ComputeBlobCenter(TBLOB *Blob, TPOINT *BlobCenter) { -/* - ** Parameters: - ** Blob blob to compute centerpoint of - ** BlobCenter data struct to place results in - ** Globals: none - ** Operation: - ** This routine computes the center point of the specified - ** blob using the bounding box of all top level outlines in the - ** blob. The center point is computed in a coordinate system - ** which is scaled up by VECSCALE from the page coordinate - ** system. - ** Return: none - ** Exceptions: none - ** History: Fri Sep 8 10:45:39 1989, DSJ, Created. - */ - TPOINT TopLeft; - TPOINT BottomRight; - - blob_bounding_box(Blob, &TopLeft, &BottomRight); - - BlobCenter->x = ((TopLeft.x << VECSCALE) + (BottomRight.x << VECSCALE)) / 2; - BlobCenter->y = ((TopLeft.y << VECSCALE) + (BottomRight.y << VECSCALE)) / 2; - -} /* ComputeBlobCenter */ - /*---------------------------------------------------------------------------*/ -LIST ConvertBlob(TBLOB *Blob) { -/* - ** Parameters: - ** Blob blob to be converted - ** Globals: none - ** Operation: Convert Blob into a list of outlines. - ** Return: List of outlines representing blob. - ** Exceptions: none - ** History: Thu Dec 13 15:40:17 1990, DSJ, Created. - */ - LIST ConvertedOutlines = NIL; - - if (Blob != NULL) { - SettupBlobConversion(Blob); //ComputeBlobCenter (Blob, &BlobCenter); - ConvertedOutlines = ConvertOutlines (Blob->outlines, - ConvertedOutlines, outer); - } - - return (ConvertedOutlines); -} /* ConvertBlob */ +// Convert a blob into a list of MFOUTLINEs (float-based microfeature format). +LIST ConvertBlob(TBLOB *blob) { + LIST outlines = NIL_LIST; + return (blob == NULL) + ? NIL_LIST + : ConvertOutlines(blob->outlines, outlines, outer); +} /*---------------------------------------------------------------------------*/ -MFOUTLINE ConvertOutline(TESSLINE *Outline) { -/* - ** Parameters: - ** Outline outline to be converted - ** Globals: - ** BlobCenter pre-computed center of current blob - ** Operation: - ** This routine converts the specified outline into a special - ** data structure which is used for extracting micro-features. - ** If the outline has been pre-normalized by the splitter, - ** then it is assumed to be in expanded form and all we must - ** do is copy the points. Otherwise, - ** if the outline is expanded, then the expanded form is used - ** and the coordinates of the points are returned to page - ** coordinates using the global variable BlobCenter and the - ** scaling factor REALSCALE. If the outline is not expanded, - ** then the compressed form is used. - ** Return: Outline converted into special micro-features format. - ** Exceptions: none - ** History: 8/2/89, DSJ, Created. - ** 9/8/89, DSJ, Added ability to convert expanded blobs. - ** 1/11/90, DSJ, Changed to use REALSCALE instead of VECSCALE - ** to eliminate round-off problems. - ** 2/21/91, DSJ, Added ability to work with pre-normalized - ** blobs. - ** 4/30/91, DSJ, Added concept of "hidden" segments. - */ - register BYTEVEC *Vector; - TPOINT Position; - TPOINT StartPosition; +// Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. +MFOUTLINE ConvertOutline(TESSLINE *outline) { MFEDGEPT *NewPoint; - MFOUTLINE MFOutline = NIL; + MFOUTLINE MFOutline = NIL_LIST; EDGEPT *EdgePoint; EDGEPT *StartPoint; EDGEPT *NextPoint; - if (Outline == NULL || - (Outline->compactloop == NULL && Outline->loop == NULL)) - return (MFOutline); + if (outline == NULL || outline->loop == NULL) + return MFOutline; - /* have outlines been prenormalized */ - if (classify_baseline_normalized) { - StartPoint = Outline->loop; - EdgePoint = StartPoint; - do { - NextPoint = EdgePoint->next; - - /* filter out duplicate points */ - if (EdgePoint->pos.x != NextPoint->pos.x || - EdgePoint->pos.y != NextPoint->pos.y) { - NewPoint = NewEdgePoint (); - ClearMark(NewPoint); - NewPoint->Hidden = is_hidden_edge (EdgePoint) ? TRUE : FALSE; - NewPoint->Point.x = EdgePoint->pos.x; - NewPoint->Point.y = EdgePoint->pos.y; - MFOutline = push (MFOutline, NewPoint); - } - EdgePoint = NextPoint; - } - while (EdgePoint != StartPoint); - } - /* use compressed version of outline */ - else if (Outline->loop == NULL) { - Position.x = StartPosition.x = Outline->start.x; - Position.y = StartPosition.y = Outline->start.y; - Vector = Outline->compactloop; - do { - if (Vector->dx != 0 || Vector->dy != 0) { - NewPoint = NewEdgePoint (); - ClearMark(NewPoint); - /* all edges are visible */ - NewPoint->Hidden = FALSE; - NewPoint->Point.x = Position.x; - NewPoint->Point.y = Position.y; - MFOutline = push (MFOutline, NewPoint); - } - Position.x += Vector->dx; - Position.y += Vector->dy; - Vector++; - } - while (Position.x != StartPosition.x || - (Position.y != StartPosition.y)); - } - else { /* use expanded version of outline */ - StartPoint = Outline->loop; - EdgePoint = StartPoint; - do { - NextPoint = EdgePoint->next; - - /* filter out duplicate points */ - if (EdgePoint->pos.x != NextPoint->pos.x || - EdgePoint->pos.y != NextPoint->pos.y) { - NewPoint = NewEdgePoint (); - ClearMark(NewPoint); - NewPoint->Hidden = is_hidden_edge (EdgePoint) ? TRUE : FALSE; - NewPoint->Point.x = - (EdgePoint->pos.x + BlobCenter.x) / REALSCALE; - NewPoint->Point.y = - (EdgePoint->pos.y + BlobCenter.y) / REALSCALE; - MFOutline = push (MFOutline, NewPoint); - } - EdgePoint = NextPoint; + StartPoint = outline->loop; + EdgePoint = StartPoint; + do { + NextPoint = EdgePoint->next; + + /* filter out duplicate points */ + if (EdgePoint->pos.x != NextPoint->pos.x || + EdgePoint->pos.y != NextPoint->pos.y) { + NewPoint = NewEdgePoint(); + ClearMark(NewPoint); + NewPoint->Hidden = EdgePoint->IsHidden(); + NewPoint->Point.x = EdgePoint->pos.x; + NewPoint->Point.y = EdgePoint->pos.y; + MFOutline = push(MFOutline, NewPoint); } - while (EdgePoint != StartPoint); - } + EdgePoint = NextPoint; + } while (EdgePoint != StartPoint); MakeOutlineCircular(MFOutline); - return (MFOutline); - -} /* ConvertOutline */ + return MFOutline; +} /*---------------------------------------------------------------------------*/ -LIST ConvertOutlines(TESSLINE *Outline, - LIST ConvertedOutlines, - OUTLINETYPE OutlineType) { -/* - ** Parameters: - ** Outline first outline to be converted - ** ConvertedOutlines list to add converted outlines to - ** OutlineType are the outlines outer or holes? - ** Globals: none - ** Operation: - ** This routine converts all given outlines into a new format. - ** of outlines. Outline points to a list of the top level - ** outlines to be converted. The children of these outlines - ** are also recursively converted. All converted outlines - ** are added to ConvertedOutlines. This is a list of outlines, - ** one for each outline that was converted. - ** Return: Updated list of converted outlines. - ** Exceptions: none - ** History: Thu Dec 13 15:57:38 1990, DSJ, Created. - */ - MFOUTLINE MFOutline; - - while (Outline != NULL) { - if (Outline->child != NULL) { - if (OutlineType == outer) - ConvertedOutlines = ConvertOutlines (Outline->child, - ConvertedOutlines, hole); - else - ConvertedOutlines = ConvertOutlines (Outline->child, - ConvertedOutlines, outer); - } - - MFOutline = ConvertOutline (Outline); - ConvertedOutlines = push (ConvertedOutlines, MFOutline); - Outline = Outline->next; +// Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs). +// +// Parameters: +// outline first outline to be converted +// mf_outlines list to add converted outlines to +// outline_type are the outlines outer or holes? +LIST ConvertOutlines(TESSLINE *outline, + LIST mf_outlines, + OUTLINETYPE outline_type) { + MFOUTLINE mf_outline; + + while (outline != NULL) { + mf_outline = ConvertOutline(outline); + mf_outlines = push(mf_outlines, mf_outline); + outline = outline->next; } - return (ConvertedOutlines); -} /* ConvertOutlines */ + return mf_outlines; +} /*---------------------------------------------------------------------------*/ @@ -312,71 +151,6 @@ void ComputeOutlineStats(LIST Outlines, OUTLINE_STATS *OutlineStats) { } /* ComputeOutlineStats */ -/*---------------------------------------------------------------------------*/ -void FilterEdgeNoise(MFOUTLINE Outline, FLOAT32 NoiseSegmentLength) { -/* - ** Parameters: - ** Outline outline to be filtered - ** NoiseSegmentLength maximum length of a "noise" segment - ** Globals: none - ** Operation: Filter out noise from the specified outline. This is - ** done by changing the direction of short segments of the - ** outline to the same direction as the preceding outline - ** segment. - ** Return: none - ** Exceptions: none - ** History: Fri May 4 10:23:45 1990, DSJ, Created. - */ - MFOUTLINE Current; - MFOUTLINE Last; - MFOUTLINE First; - FLOAT32 Length; - int NumFound = 0; - DIRECTION DirectionOfFirst = north; - - if (DegenerateOutline (Outline)) - return; - - /* find 2 segments of different orientation which are long enough to - not be filtered. If two cannot be found, leave the outline unchanged. */ - First = NextDirectionChange (Outline); - Last = First; - do { - Current = NextDirectionChange (Last); - Length = DistanceBetween ((PointAt (Current)->Point), - PointAt (Last)->Point); - if (Length >= NoiseSegmentLength) { - if (NumFound == 0) { - NumFound = 1; - DirectionOfFirst = PointAt (Last)->Direction; - } - else if (DirectionOfFirst != PointAt (Last)->Direction) - break; - } - Last = Current; - } - while (Last != First); - if (Current == Last) - return; - - /* find each segment and filter it out if it is too short. Note that - the above code guarantees that the initial direction change will - not be removed, therefore the loop will terminate. */ - First = Last; - do { - Current = NextDirectionChange (Last); - Length = DistanceBetween (PointAt (Current)->Point, - PointAt (Last)->Point); - if (Length < NoiseSegmentLength) - ChangeDirection (Last, Current, PointAt (Last)->PreviousDirection); - - Last = Current; - } - while (Last != First); - -} /* FilterEdgeNoise */ - - /*---------------------------------------------------------------------------*/ void FindDirectionChanges(MFOUTLINE Outline, FLOAT32 MinSlope, @@ -437,8 +211,8 @@ void FreeMFOutline(void *arg) { //MFOUTLINE Outline MFOUTLINE Outline = (MFOUTLINE) arg; /* break the circular outline so we can use std. techniques to deallocate */ - Start = rest (Outline); - set_rest(Outline, NIL); + Start = list_rest (Outline); + set_rest(Outline, NIL_LIST); while (Start != NULL) { free_struct (first_node (Start), sizeof (MFEDGEPT), "MFEDGEPT"); Start = pop (Start); @@ -501,20 +275,10 @@ void MarkDirectionChanges(MFOUTLINE Outline) { /*---------------------------------------------------------------------------*/ +// Return a new edge point for a micro-feature outline. MFEDGEPT *NewEdgePoint() { -/* - ** Parameters: none - ** Globals: none - ** Operation: - ** This routine allocates and returns a new edge point for - ** a micro-feature outline. - ** Return: New edge point. - ** Exceptions: none - ** History: 7/21/89, DSJ, Created. - */ - return ((MFEDGEPT *) alloc_struct (sizeof (MFEDGEPT), "MFEDGEPT")); - -} /* NewEdgePoint */ + return ((MFEDGEPT *) alloc_struct(sizeof(MFEDGEPT), "MFEDGEPT")); +} /*---------------------------------------------------------------------------*/ @@ -533,9 +297,9 @@ MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { ** Exceptions: none ** History: 7/26/89, DSJ, Created. */ - EdgePoint = NextPointAfter (EdgePoint); - while (!PointAt (EdgePoint)->ExtremityMark) - EdgePoint = NextPointAfter (EdgePoint); + EdgePoint = NextPointAfter(EdgePoint); + while (!PointAt(EdgePoint)->ExtremityMark) + EdgePoint = NextPointAfter(EdgePoint); return (EdgePoint); @@ -544,12 +308,10 @@ MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { /*---------------------------------------------------------------------------*/ void NormalizeOutline(MFOUTLINE Outline, - LINE_STATS *LineStats, FLOAT32 XOrigin) { /* ** Parameters: ** Outline outline to be normalized - ** LineStats statistics for text line normalization ** XOrigin x-origin of text ** Globals: none ** Operation: @@ -563,55 +325,28 @@ void NormalizeOutline(MFOUTLINE Outline, ** Return: none ** Exceptions: none ** History: 8/2/89, DSJ, Created. - ** 10/23/89, DSJ, Added ascender/descender stretching. - ** 11/89, DSJ, Removed ascender/descender stretching. */ - MFEDGEPT *Current; - MFOUTLINE EdgePoint; - FLOAT32 ScaleFactor; - FLOAT32 AscStretch; - FLOAT32 DescStretch; - - if (Outline != NIL) { - ScaleFactor = ComputeScaleFactor (LineStats); - AscStretch = 1.0; - DescStretch = 1.0; - - EdgePoint = Outline; - do { - Current = PointAt (EdgePoint); - - Current->Point.y = ScaleFactor * - (Current->Point.y - - BaselineAt (LineStats, XPositionOf (Current))); - - if (Current->Point.y > NORMAL_X_HEIGHT) - Current->Point.y = NORMAL_X_HEIGHT + - (Current->Point.y - NORMAL_X_HEIGHT) / AscStretch; - - else if (Current->Point.y < NORMAL_BASELINE) - Current->Point.y = NORMAL_BASELINE + - (Current->Point.y - NORMAL_BASELINE) / DescStretch; - - Current->Point.x = ScaleFactor * - (Current->Point.x - XOrigin); + if (Outline == NIL_LIST) + return; - EdgePoint = NextPointAfter (EdgePoint); - } - while (EdgePoint != Outline); - } + MFOUTLINE EdgePoint = Outline; + do { + MFEDGEPT *Current = PointAt(EdgePoint); + Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - BASELINE_OFFSET); + Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin); + EdgePoint = NextPointAfter(EdgePoint); + } while (EdgePoint != Outline); } /* NormalizeOutline */ /*---------------------------------------------------------------------------*/ -void NormalizeOutlines(LIST Outlines, - LINE_STATS *LineStats, - FLOAT32 *XScale, - FLOAT32 *YScale) { +namespace tesseract { +void Classify::NormalizeOutlines(LIST Outlines, + FLOAT32 *XScale, + FLOAT32 *YScale) { /* ** Parameters: ** Outlines list of outlines to be normalized - ** LineStats statistics for text line normalization ** XScale x-direction scale factor used by routine ** YScale y-direction scale factor used by routine ** Globals: @@ -637,7 +372,7 @@ void NormalizeOutlines(LIST Outlines, /* limit scale factor to avoid overscaling small blobs (.,`'), thin blobs (l1ift), and merged blobs */ - *XScale = *YScale = BaselineScale = ComputeScaleFactor (LineStats); + *XScale = *YScale = BaselineScale = MF_SCALE_FACTOR; *XScale *= OutlineStats.Ry; *YScale *= OutlineStats.Rx; if (*XScale < classify_min_norm_scale_x) @@ -660,33 +395,14 @@ void NormalizeOutlines(LIST Outlines, case baseline: iterate(Outlines) { - Outline = (MFOUTLINE) first_node (Outlines); - NormalizeOutline (Outline, LineStats, 0.0); + Outline = (MFOUTLINE) first_node(Outlines); + NormalizeOutline(Outline, 0.0); } - *XScale = *YScale = ComputeScaleFactor (LineStats); + *XScale = *YScale = MF_SCALE_FACTOR; break; } } /* NormalizeOutlines */ - - -/*---------------------------------------------------------------------------*/ -void SettupBlobConversion(TBLOB *Blob) { -/* - ** Parameters: - ** Blob blob that is to be converted - ** Globals: - ** BlobCenter center of blob to be converted - ** Operation: Compute the center of the blob's bounding box and save - ** it in a global variable. This routine must be called before - ** any calls to ConvertOutline. It must be called once per - ** blob. - ** Return: none - ** Exceptions: none - ** History: Thu May 17 11:06:17 1990, DSJ, Created. - */ - ComputeBlobCenter(Blob, &BlobCenter); -} /* SettupBlobConversion */ - +} // namespace tesseract /*---------------------------------------------------------------------------*/ void SmearExtremities(MFOUTLINE Outline, FLOAT32 XScale, FLOAT32 YScale) { @@ -714,7 +430,7 @@ void SmearExtremities(MFOUTLINE Outline, FLOAT32 XScale, FLOAT32 YScale) { FLOAT32 MinYSmear; FLOAT32 MaxYSmear; - if (Outline != NIL) { + if (Outline != NIL_LIST) { MinXSmear = -0.5 * XScale; MaxXSmear = 0.5 * XScale; MinYSmear = -0.5 * YScale; @@ -787,7 +503,7 @@ void CharNormalizeOutline(MFOUTLINE Outline, MFOUTLINE First, Current; MFEDGEPT *CurrentPoint; - if (Outline == NIL) + if (Outline == NIL_LIST) return; First = Outline; @@ -966,9 +682,13 @@ MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { InitialDirection = PointAt (EdgePoint)->Direction; - do - EdgePoint = NextPointAfter (EdgePoint); - while (PointAt (EdgePoint)->Direction == InitialDirection); + MFOUTLINE next_pt = NULL; + do { + EdgePoint = NextPointAfter(EdgePoint); + next_pt = NextPointAfter(EdgePoint); + } while (PointAt(EdgePoint)->Direction == InitialDirection && + !PointAt(EdgePoint)->Hidden && + next_pt != NULL && !PointAt(next_pt)->Hidden); return (EdgePoint); } /* NextDirectionChange */ diff --git a/classify/mfoutline.h b/classify/mfoutline.h index acee5db869..3efacc8eb8 100644 --- a/classify/mfoutline.h +++ b/classify/mfoutline.h @@ -21,12 +21,11 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "general.h" +#include "host.h" #include "oldlist.h" #include "fpoint.h" -#include "fxdefs.h" #include "baseline.h" -#include "varable.h" +#include "params.h" #define NORMAL_X_HEIGHT (0.5) #define NORMAL_BASELINE (0.0) @@ -35,13 +34,9 @@ typedef LIST MFOUTLINE; typedef enum { north, south, east, west, northeast, northwest, southeast, southwest -} +} DIRECTION; - -DIRECTION; - -typedef struct -{ +typedef struct { FPOINT Point; FLOAT32 Slope; unsigned Padding:20; @@ -49,77 +44,36 @@ typedef struct BOOL8 ExtremityMark:TRUE; DIRECTION Direction:4; DIRECTION PreviousDirection:4; -} - - -MFEDGEPT; +} MFEDGEPT; typedef enum { outer, hole -} +} OUTLINETYPE; - -OUTLINETYPE; - -typedef struct -{ +typedef struct { FLOAT64 Mx, My; /* first moment of all outlines */ FLOAT64 L; /* total length of all outlines */ FLOAT64 x, y; /* center of mass of all outlines */ FLOAT64 Ix, Iy; /* second moments about center of mass axes */ FLOAT64 Rx, Ry; /* radius of gyration about center of mass axes */ -} - - -OUTLINE_STATS; +} OUTLINE_STATS; typedef enum { baseline, character -} - - -NORM_METHOD; - -/*---------------------------------------------------------------------------- - Variables -------------------------------------------------------------------------------*/ -/* control knobs used to control normalization of outlines */ -extern INT_VAR_H(classify_norm_method, character, - "Normalization Method ..."); -/* PREV DEFAULT "baseline" */ -extern double_VAR_H(classify_char_norm_range, 0.2, - "Character Normalization Range ..."); -extern double_VAR_H(classify_min_norm_scale_x, 0.0, - "Min char x-norm scale ..."); -/* PREV DEFAULT 0.1 */ -extern double_VAR_H(classify_max_norm_scale_x, 0.325, - "Max char x-norm scale ..."); -/* PREV DEFAULT 0.3 */ -extern double_VAR_H(classify_min_norm_scale_y, 0.0, - "Min char y-norm scale ..."); -/* PREV DEFAULT 0.1 */ -extern double_VAR_H(classify_max_norm_scale_y, 0.325, - "Max char y-norm scale ..."); -/* PREV DEFAULT 0.3 */ +} NORM_METHOD; /**---------------------------------------------------------------------------- Macros ----------------------------------------------------------------------------**/ #define AverageOf(A,B) (((A) + (B)) / 2) -/* macro for computing the baseline of a row of text at an x position */ -#define BaselineAt(L,X) (BASELINE_OFFSET) - /* macro for computing the scale factor to use to normalize characters */ -#define ComputeScaleFactor(L) \ -(NORMAL_X_HEIGHT / ((classify_baseline_normalized)? \ - (BASELINE_SCALE): \ - ((L)->xheight))) +#define MF_SCALE_FACTOR (NORMAL_X_HEIGHT / BASELINE_SCALE) /* macros for manipulating micro-feature outlines */ -#define DegenerateOutline(O) (((O) == NIL) || ((O) == rest(O))) +#define DegenerateOutline(O) (((O) == NIL_LIST) || ((O) == list_rest(O))) #define PointAt(O) ((MFEDGEPT *) first_node (O)) -#define NextPointAfter(E) (rest (E)) +#define NextPointAfter(E) (list_rest (E)) #define MakeOutlineCircular(O) (set_rest (last (O), (O))) /* macros for manipulating micro-feature outline edge points */ @@ -158,16 +112,8 @@ MFEDGEPT *NewEdgePoint(); MFOUTLINE NextExtremity(MFOUTLINE EdgePoint); void NormalizeOutline(MFOUTLINE Outline, - LINE_STATS *LineStats, FLOAT32 XOrigin); -void NormalizeOutlines(LIST Outlines, - LINE_STATS *LineStats, - FLOAT32 *XScale, - FLOAT32 *YScale); - -void SettupBlobConversion(TBLOB *Blob); - void SmearExtremities(MFOUTLINE Outline, FLOAT32 XScale, FLOAT32 YScale); /*---------------------------------------------------------------------------- diff --git a/classify/mfx.cpp b/classify/mfx.cpp index afa3abfe9a..85029ffef8 100644 --- a/classify/mfx.cpp +++ b/classify/mfx.cpp @@ -23,7 +23,8 @@ #include "clusttool.h" //NEEDED #include "const.h" #include "intfx.h" -#include "varable.h" +#include "normalis.h" +#include "params.h" #include @@ -36,8 +37,6 @@ double_VAR(classify_min_slope, 0.414213562, "Slope below which lines are called horizontal"); double_VAR(classify_max_slope, 2.414213562, "Slope above which lines are called vertical"); -double_VAR(classify_noise_segment_length, 0.00, - "Length below which outline segments are treated as noise"); /**---------------------------------------------------------------------------- Macros @@ -48,8 +47,6 @@ double_VAR(classify_noise_segment_length, 0.00, /*---------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ -void ComputeBulges(MFOUTLINE Start, MFOUTLINE End, MICROFEATURE MicroFeature); - FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End); MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, @@ -57,18 +54,16 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End); -void SmearBulges(MICROFEATURES MicroFeatures, FLOAT32 XScale, FLOAT32 YScale); - /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) { +CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& denorm) { /* ** Parameters: ** Blob blob to extract micro-features from - ** LineStats statistics for text line normalization + ** denorm control parameter to feature extractor ** Operation: ** This routine extracts micro-features from the specified ** blob and returns a list of the micro-features. All @@ -78,7 +73,7 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) { ** Exceptions: none ** History: 7/21/89, DSJ, Created. */ - MICROFEATURES MicroFeatures = NIL; + MICROFEATURES MicroFeatures = NIL_LIST; FLOAT32 XScale, YScale; LIST Outlines; LIST RemainingOutlines; @@ -89,8 +84,7 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) { if (Blob != NULL) { Outlines = ConvertBlob (Blob); -// NormalizeOutlines(Outlines, LineStats, &XScale, &YScale); - if (!ExtractIntFeat(Blob, blfeatures, cnfeatures, &results)) + if (!ExtractIntFeat(Blob, denorm, blfeatures, cnfeatures, &results)) return NULL; XScale = 0.2f / results.Ry; YScale = 0.2f / results.Rx; @@ -107,123 +101,19 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) { iterate(RemainingOutlines) { Outline = (MFOUTLINE) first_node (RemainingOutlines); FindDirectionChanges(Outline, classify_min_slope, classify_max_slope); - FilterEdgeNoise(Outline, classify_noise_segment_length); MarkDirectionChanges(Outline); SmearExtremities(Outline, XScale, YScale); MicroFeatures = ConvertToMicroFeatures (Outline, MicroFeatures); } - SmearBulges(MicroFeatures, XScale, YScale); FreeOutlines(Outlines); } return ((CHAR_FEATURES) MicroFeatures); } /* BlobMicroFeatures */ -/**---------------------------------------------------------------------------- - Private Macros -----------------------------------------------------------------------------**/ -/********************************************************************** - * angle_of - * - * Return the angle of the line between two points. - **********************************************************************/ -#define angle_of(x1,y1,x2,y2) \ -((x2-x1) ? \ - (atan2 (y2-y1, x2-x1)) : \ - ((y2Point.x, -Origin->Point.y); - - SegmentEnd = Start; - CurrentPoint.x = 0.0f; - CurrentPoint.y = 0.0f; - BulgePosition = MicroFeature[MFLENGTH] / 3; - LastPoint = CurrentPoint; - while (CurrentPoint.x < BulgePosition) { - SegmentStart = SegmentEnd; - SegmentEnd = NextPointAfter (SegmentStart); - LastPoint = CurrentPoint; - - MapPoint(&Matrix, PointAt(SegmentEnd)->Point, &CurrentPoint); - } - MicroFeature[FIRSTBULGE] = - XIntersectionOf(LastPoint, CurrentPoint, BulgePosition); - - BulgePosition *= 2; - - // Prevents from copying the points before computing the bulge if - // CurrentPoint will not change. (Which would cause to output nan - // for the SecondBulge.) - if (CurrentPoint.x < BulgePosition) - LastPoint = CurrentPoint; - while (CurrentPoint.x < BulgePosition) { - SegmentStart = SegmentEnd; - SegmentEnd = NextPointAfter (SegmentStart); - LastPoint = CurrentPoint; - MapPoint(&Matrix, PointAt(SegmentEnd)->Point, &CurrentPoint); - } - MicroFeature[SECONDBULGE] = - XIntersectionOf(LastPoint, CurrentPoint, BulgePosition); - - MicroFeature[FIRSTBULGE] /= BULGENORMALIZER * MicroFeature[MFLENGTH]; - MicroFeature[SECONDBULGE] /= BULGENORMALIZER * MicroFeature[MFLENGTH]; - } -} /* ComputeBulges */ - /*---------------------------------------------------------------------------*/ FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) { @@ -282,9 +172,11 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, Last = First; do { Current = NextExtremity (Last); - NewFeature = ExtractMicroFeature (Last, Current); - if (NewFeature != NULL) - MicroFeatures = push (MicroFeatures, NewFeature); + if (!PointAt(Current)->Hidden) { + NewFeature = ExtractMicroFeature (Last, Current); + if (NewFeature != NULL) + MicroFeatures = push (MicroFeatures, NewFeature); + } Last = Current; } while (Last != First); @@ -316,52 +208,16 @@ MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) { MICROFEATURE NewFeature; MFEDGEPT *P1, *P2; - P1 = PointAt (Start); - P2 = PointAt (End); + P1 = PointAt(Start); + P2 = PointAt(End); NewFeature = NewMicroFeature (); - NewFeature[XPOSITION] = AverageOf (P1->Point.x, P2->Point.x); - NewFeature[YPOSITION] = AverageOf (P1->Point.y, P2->Point.y); - NewFeature[MFLENGTH] = DistanceBetween (P1->Point, P2->Point); + NewFeature[XPOSITION] = AverageOf(P1->Point.x, P2->Point.x); + NewFeature[YPOSITION] = AverageOf(P1->Point.y, P2->Point.y); + NewFeature[MFLENGTH] = DistanceBetween(P1->Point, P2->Point); NewFeature[ORIENTATION] = NormalizedAngleFrom(&P1->Point, &P2->Point, 1.0); - ComputeBulges(Start, End, NewFeature); - return (NewFeature); -} /* ExtractMicroFeature */ - - -/*---------------------------------------------------------------------------*/ -void SmearBulges(MICROFEATURES MicroFeatures, FLOAT32 XScale, FLOAT32 YScale) { -/* - ** Parameters: - ** MicroFeatures features to be smeared - ** XScale # of normalized units per pixel in x dir - ** YScale # of normalized units per pixel in y dir - ** Globals: none - ** Operation: Add a random amount to each bulge parameter of each - ** feature. The amount added is between -0.5 pixels and - ** 0.5 pixels. This is done to prevent the prototypes - ** generated in training from being unrealistically tight. - ** Return: none - ** Exceptions: none - ** History: Thu Jun 28 18:03:38 1990, DSJ, Created. - */ - MICROFEATURE MicroFeature; - FLOAT32 MinSmear; - FLOAT32 MaxSmear; - FLOAT32 Cos, Sin; - FLOAT32 Scale; + NewFeature[FIRSTBULGE] = 0.0f; // deprecated + NewFeature[SECONDBULGE] = 0.0f; // deprecated - iterate(MicroFeatures) { - MicroFeature = NextFeatureOf (MicroFeatures); - - Cos = fabs(cos(2.0 * PI * MicroFeature[ORIENTATION])); - Sin = fabs(sin(2.0 * PI * MicroFeature[ORIENTATION])); - Scale = YScale * Cos + XScale * Sin; - - MinSmear = -0.5 * Scale / (BULGENORMALIZER * MicroFeature[MFLENGTH]); - MaxSmear = 0.5 * Scale / (BULGENORMALIZER * MicroFeature[MFLENGTH]); - - MicroFeature[FIRSTBULGE] += UniformRandomNumber (MinSmear, MaxSmear); - MicroFeature[SECONDBULGE] += UniformRandomNumber (MinSmear, MaxSmear); - } -} /* SmearBulges */ + return NewFeature; +} /* ExtractMicroFeature */ diff --git a/classify/mfx.h b/classify/mfx.h index 310495b3c7..bd31399670 100644 --- a/classify/mfx.h +++ b/classify/mfx.h @@ -21,8 +21,7 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "fxdefs.h" -#include "varable.h" +#include "params.h" /**---------------------------------------------------------------------------- Variables ----------------------------------------------------------------------------**/ @@ -32,13 +31,10 @@ extern double_VAR_H(classify_min_slope, 0.414213562, "Slope below which lines are called horizontal"); extern double_VAR_H(classify_max_slope, 2.414213562, "Slope above which lines are called vertical"); -extern double_VAR_H(classify_noise_segment_length, 0.00, - "Length below which outline segments" - "are treated as noise"); /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats); +CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& denorm); #endif diff --git a/classify/normfeat.cpp b/classify/normfeat.cpp index f0e5e9fb78..a377a6b383 100644 --- a/classify/normfeat.cpp +++ b/classify/normfeat.cpp @@ -19,116 +19,64 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "normfeat.h" -#include "mfoutline.h" -#include "intfx.h" -#include "ocrfeatures.h" //Debug -#include //Debug -#include "efio.h" //Debug -//#include "christydbg.h" +#include "intfx.h" +#include "featdefs.h" +#include "mfoutline.h" -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ + +// Return the length of the outline in baseline normalized form. FLOAT32 ActualOutlineLength(FEATURE Feature) { -/* - ** Parameters: - ** Feature normalization feature - ** Globals: none - ** Operation: This routine returns the length that the outline - ** would have been if it were baseline normalized instead - ** of character normalized. - ** Return: Baseline normalized length of outline. - ** Exceptions: none - ** History: Thu Dec 20 14:50:57 1990, DSJ, Created. - */ return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION); - -} /* ActualOutlineLength */ +} /*---------------------------------------------------------------------------*/ -FEATURE_SET ExtractCharNormFeatures(TBLOB *Blob, LINE_STATS *LineStats) { -/* - ** Parameters: - ** Blob blob to extract char norm feature from - ** LineStats statistics on text row blob is in - ** Globals: none - ** Operation: Compute a feature whose parameters describe how a - ** character will be affected by the character normalization - ** algorithm. The feature parameters are: - ** y position of center of mass in baseline coordinates - ** total length of outlines in baseline coordinates - ** divided by a scale factor - ** radii of gyration about the center of mass in - ** baseline coordinates - ** Return: Character normalization feature for Blob. - ** Exceptions: none - ** History: Wed May 23 18:06:38 1990, DSJ, Created. - */ - FEATURE_SET FeatureSet; - FEATURE Feature; - FLOAT32 Scale; - FLOAT32 Baseline; - LIST Outlines; +// Return the character normalization feature for a blob. +// +// The features returned are in a scale where the x-height has been +// normalized to live in the region y = [-0.25 .. 0.25]. Example ranges +// for English below are based on the Linux font collection on 2009-12-04: +// +// Params[CharNormY] +// The y coordinate of the grapheme's centroid. +// English: [-0.27, 0.71] +// +// Params[CharNormLength] +// The length of the grapheme's outline (tiny segments discarded), +// divided by 10.0=LENGTH_COMPRESSION. +// English: [0.16, 0.85] +// +// Params[CharNormRx] +// The radius of gyration about the x axis, as measured from CharNormY. +// English: [0.011, 0.34] +// +// Params[CharNormRy] +// The radius of gyration about the y axis, as measured from +// the x center of the grapheme's bounding box. +// English: [0.011, 0.31] +// +FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& denorm) { + FEATURE_SET feature_set = NewFeatureSet(1); + FEATURE feature = NewFeature(&CharNormDesc); + INT_FEATURE_ARRAY blfeatures; INT_FEATURE_ARRAY cnfeatures; INT_FX_RESULT_STRUCT FXInfo; - /* allocate the feature and feature set - note that there is always one - and only one char normalization feature for any blob */ - FeatureSet = NewFeatureSet (1); - Feature = NewFeature (&CharNormDesc); - AddFeature(FeatureSet, Feature); + ExtractIntFeat(blob, denorm, blfeatures, cnfeatures, &FXInfo); - /* compute the normalization statistics for this blob */ - Outlines = ConvertBlob (Blob); -#ifdef DEBUG_NORMFEAT - FILE* OFile; - OFile = fopen ("nfOutline.logCPP", "r"); - if (OFile == NULL) - { - OFile = Efopen ("nfOutline.logCPP", "w"); - WriteOutlines(OFile, Outlines); - } - else - { - fclose (OFile); - OFile = Efopen ("nfOutline.logCPP", "a"); - } - WriteOutlines(OFile, Outlines); - fclose (OFile); -#endif + feature->Params[CharNormY] = + MF_SCALE_FACTOR * (FXInfo.Ymean - BASELINE_OFFSET); + feature->Params[CharNormLength] = + MF_SCALE_FACTOR * FXInfo.Length / LENGTH_COMPRESSION; + feature->Params[CharNormRx] = MF_SCALE_FACTOR * FXInfo.Rx; + feature->Params[CharNormRy] = MF_SCALE_FACTOR * FXInfo.Ry; - ExtractIntFeat(Blob, blfeatures, cnfeatures, &FXInfo); - Baseline = BaselineAt (LineStats, FXInfo.Xmean); - Scale = ComputeScaleFactor (LineStats); - Feature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale; - Feature->Params[CharNormLength] = - FXInfo.Length * Scale / LENGTH_COMPRESSION; - Feature->Params[CharNormRx] = FXInfo.Rx * Scale; - Feature->Params[CharNormRy] = FXInfo.Ry * Scale; + AddFeature(feature_set, feature); -#ifdef DEBUG_NORMFEAT - FILE* File; - File = fopen ("nfFeatSet.logCPP", "r"); - if (File == NULL) - { - File = Efopen ("nfFeatSet.logCPP", "w"); - WriteFeatureSet(File, FeatureSet); - } - else - { - fclose (File); - File = Efopen ("nfFeatSet.logCPP", "a"); - } - WriteFeatureSet(File, FeatureSet); - fclose (File); -#endif - FreeOutlines(Outlines); - return (FeatureSet); + return feature_set; } /* ExtractCharNormFeatures */ diff --git a/classify/normfeat.h b/classify/normfeat.h index ce2ec70322..54bf6ae576 100644 --- a/classify/normfeat.h +++ b/classify/normfeat.h @@ -22,42 +22,18 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "ocrfeatures.h" -#include "tessclas.h" -#include "fxdefs.h" #define LENGTH_COMPRESSION (10.0) -typedef enum -{ CharNormY, CharNormLength, CharNormRx, CharNormRy } -NORM_PARAM_NAME; +typedef enum { + CharNormY, CharNormLength, CharNormRx, CharNormRy +} NORM_PARAM_NAME; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -FLOAT32 ActualOutlineLength(FEATURE Feature); +FLOAT32 ActualOutlineLength(FEATURE Feature); -FEATURE_SET ExtractCharNormFeatures(TBLOB *Blob, LINE_STATS *LineStats); +FEATURE_SET ExtractCharNormFeatures(TBLOB *Blob, const DENORM& denorm); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* normfeat.c -FLOAT32 ActualOutlineLength - _ARGS((FEATURE Feature)); - -FEATURE_SET ExtractCharNormFeatures - _ARGS((BLOB *Blob, - LINE_STATS *LineStats)); - -#undef _ARGS -*/ - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -extern FEATURE_DESC_STRUCT CharNormDesc; #endif diff --git a/classify/normmatch.cpp b/classify/normmatch.cpp index b573ee7d82..a693a7348e 100644 --- a/classify/normmatch.cpp +++ b/classify/normmatch.cpp @@ -33,7 +33,7 @@ #include "normfeat.h" #include "scanutils.h" #include "unicharset.h" -#include "varable.h" +#include "params.h" struct NORM_PROTOS { @@ -46,7 +46,7 @@ struct NORM_PROTOS /**---------------------------------------------------------------------------- Private Function Prototypes ----------------------------------------------------------------------------**/ -FLOAT32 NormEvidenceOf(register FLOAT32 NormAdj); +double NormEvidenceOf(register double NormAdj); void PrintNormMatch(FILE *File, int NumParams, @@ -62,6 +62,8 @@ NORM_PROTOS *ReadNormProtos(FILE *File); /* control knobs used to control the normalization adjustment process */ double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ..."); double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); +// Weight of width variance against height and vertical position. +const double kWidthErrorWeighting = 0.125; /**---------------------------------------------------------------------------- Public Code @@ -118,6 +120,11 @@ FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, FEATURE Feature, Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY]; Delta = Feature->Params[CharNormRx] - Proto->Mean[CharNormRx]; Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx]; + // Ry is width! See intfx.cpp. + Delta = Feature->Params[CharNormRy] - Proto->Mean[CharNormRy]; + Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy]; + Delta *= kWidthErrorWeighting; + Match += Delta; if (Match < BestMatch) BestMatch = Match; @@ -158,7 +165,7 @@ void Classify::FreeNormProtos() { * normalization adjustment. The equation that represents the transform is: * 1 / (1 + (NormAdj / midpoint) ^ curl) **********************************************************************/ -FLOAT32 NormEvidenceOf(register FLOAT32 NormAdj) { +double NormEvidenceOf(register double NormAdj) { NormAdj /= classify_norm_adj_midpoint; if (classify_norm_adj_curl == 3) @@ -166,7 +173,7 @@ FLOAT32 NormEvidenceOf(register FLOAT32 NormAdj) { else if (classify_norm_adj_curl == 2) NormAdj = NormAdj * NormAdj; else - NormAdj = pow(static_cast(NormAdj), classify_norm_adj_curl); + NormAdj = pow (NormAdj, classify_norm_adj_curl); return (1.0 / (1.0 + NormAdj)); } @@ -233,7 +240,7 @@ NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) { NormProtos->NumProtos = unicharset.size(); NormProtos->Protos = (LIST *) Emalloc (NormProtos->NumProtos * sizeof(LIST)); for (i = 0; i < NormProtos->NumProtos; i++) - NormProtos->Protos[i] = NIL; + NormProtos->Protos[i] = NIL_LIST; /* read file header and save in data structure */ NormProtos->NumParams = ReadSampleSize (File); diff --git a/classify/normmatch.h b/classify/normmatch.h index df928f8cf8..cdec0fd966 100644 --- a/classify/normmatch.h +++ b/classify/normmatch.h @@ -23,7 +23,7 @@ ----------------------------------------------------------------------------**/ #include "matchdefs.h" #include "ocrfeatures.h" -#include "varable.h" +#include "params.h" /**---------------------------------------------------------------------------- Variables diff --git a/classify/ocrfeatures.cpp b/classify/ocrfeatures.cpp index 5098f0a6d1..d2f5a70c0a 100644 --- a/classify/ocrfeatures.cpp +++ b/classify/ocrfeatures.cpp @@ -99,7 +99,7 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) { /*---------------------------------------------------------------------------*/ -FEATURE NewFeature(FEATURE_DESC FeatureDesc) { +FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { /* ** Parameters: ** FeatureDesc description of feature to be created. @@ -146,7 +146,7 @@ FEATURE_SET NewFeatureSet(int NumFeatures) { /*---------------------------------------------------------------------------*/ -FEATURE ReadFeature(FILE *File, FEATURE_DESC FeatureDesc) { +FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { /* ** Parameters: ** File open text file to read feature from @@ -167,13 +167,9 @@ FEATURE ReadFeature(FILE *File, FEATURE_DESC FeatureDesc) { Feature = NewFeature (FeatureDesc); for (i = 0; i < Feature->Type->NumParams; i++) { -#ifndef _MSC_VER - if (tess_fscanf (File, "%f", &(Feature->Params[i])) != 1) -#else if (fscanf (File, "%f", &(Feature->Params[i])) != 1) -#endif DoError (ILLEGAL_FEATURE_PARAM, "Illegal feature parameter spec"); -#ifndef __MSW32__ +#ifndef WIN32 assert (!isnan(Feature->Params[i])); #endif } @@ -183,7 +179,7 @@ FEATURE ReadFeature(FILE *File, FEATURE_DESC FeatureDesc) { /*---------------------------------------------------------------------------*/ -FEATURE_SET ReadFeatureSet(FILE *File, FEATURE_DESC FeatureDesc) { +FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { /* ** Parameters: ** File open text file to read new feature set from @@ -234,10 +230,10 @@ void WriteFeature(FILE *File, FEATURE Feature) { int i; for (i = 0; i < Feature->Type->NumParams; i++) { -#ifndef __MSW32__ +#ifndef WIN32 assert (!isnan(Feature->Params[i])); #endif - fprintf (File, " %12g", Feature->Params[i]); + fprintf (File, " %12g", Feature->Params[i]); } fprintf (File, "\n"); @@ -270,7 +266,7 @@ void WriteFeatureSet(FILE *File, FEATURE_SET FeatureSet) { /*---------------------------------------------------------------------------*/ -void WriteOldParamDesc(FILE *File, FEATURE_DESC FeatureDesc) { +void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { /* ** Parameters: ** File open text file to write FeatureDesc to diff --git a/classify/ocrfeatures.h b/classify/ocrfeatures.h index 3b23fdf205..8b5c3f43be 100644 --- a/classify/ocrfeatures.h +++ b/classify/ocrfeatures.h @@ -21,82 +21,74 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "funcdefs.h" -#include "tessclas.h" -#include "fxdefs.h" +#include "blobs.h" #include +class DENORM; + #undef Min #undef Max #define FEAT_NAME_SIZE 80 -/*define trap errors which can be caused by this module*/ +// define trap errors which can be caused by this module #define ILLEGAL_FEATURE_PARAM 1000 #define ILLEGAL_NUM_FEATURES 1001 -/* A character is described by multiple sets of extracted features. Each - set contains a number of features of a particular type, for example, a - set of bays, or a set of closures, or a set of microfeatures. Each - feature consists of a number of parameters. All features within a - feature set contain the same number of parameters. All circular - parameters are required to be the first parameters in the feature.*/ - -typedef struct -{ - struct fds *Type; /* points to description of feature type */ - FLOAT32 Params[1]; /* variable size array - params for feature */ -} FEATURE_STRUCT; +// A character is described by multiple sets of extracted features. Each +// set contains a number of features of a particular type, for example, a +// set of bays, or a set of closures, or a set of microfeatures. Each +// feature consists of a number of parameters. All features within a +// feature set contain the same number of parameters. All circular +// parameters are required to be the first parameters in the feature. + +struct PARAM_DESC { + inT8 Circular; // TRUE if dimension wraps around + inT8 NonEssential; // TRUE if dimension not used in searches + FLOAT32 Min; // low end of range for circular dimensions + FLOAT32 Max; // high end of range for circular dimensions + FLOAT32 Range; // Max - Min + FLOAT32 HalfRange; // (Max - Min)/2 + FLOAT32 MidRange; // (Max + Min)/2 +}; + +struct FEATURE_DESC_STRUCT { + uinT16 NumParams; // total # of params + const char *ShortName; // short name for feature + const PARAM_DESC *ParamDesc; // array - one per param +}; +typedef FEATURE_DESC_STRUCT *FEATURE_DESC; + +struct FEATURE_STRUCT { + const FEATURE_DESC_STRUCT *Type; // points to description of feature type + FLOAT32 Params[1]; // variable size array - params for feature +}; typedef FEATURE_STRUCT *FEATURE; -typedef struct -{ - uinT16 NumFeatures; /* number of features in set */ - uinT16 MaxNumFeatures; /* maximum size of feature set */ - FEATURE Features[1]; /* variable size array of features */ -} FEATURE_SET_STRUCT; +struct FEATURE_SET_STRUCT { + uinT16 NumFeatures; // number of features in set + uinT16 MaxNumFeatures; // maximum size of feature set + FEATURE Features[1]; // variable size array of features +}; typedef FEATURE_SET_STRUCT *FEATURE_SET; -/* Define various function types which will be needed for "class methods"*/ -typedef FEATURE (*FEAT_FUNC) (); -typedef FEATURE_SET (*FX_FUNC) (TBLOB *, LINE_STATS *); -typedef FLOAT32 (*PENALTY_FUNC) (); - -typedef struct -{ - inT8 Circular; /* TRUE if dimension wraps around */ - inT8 NonEssential; /* TRUE if dimension not used in searches */ - FLOAT32 Min; /* low end of range for circular dimensions */ - FLOAT32 Max; /* high end of range for circular dimensions */ - FLOAT32 Range; /* Max - Min */ - FLOAT32 HalfRange; /* (Max - Min)/2 */ - FLOAT32 MidRange; /* (Max + Min)/2 */ -} PARAM_DESC; - -typedef struct fds -{ - uinT16 NumParams; /* total # of params */ - uinT8 NumLinearParams; /* # of linear params */ - uinT8 NumCircularParams; /* # of linear params */ - uinT8 MinFeatPerChar; /* min # of feats allowed */ - uinT8 MaxFeatPerChar; /* max # of feats allowed */ - char LongName[FEAT_NAME_SIZE]; /* long name for feature */ - char ShortName[FEAT_NAME_SIZE];/* short name for feature */ - PARAM_DESC *ParamDesc; /* array - one per param */ -} FEATURE_DESC_STRUCT; - /* one per feature type */ -typedef FEATURE_DESC_STRUCT *FEATURE_DESC; +// A generic character description as a char pointer. In reality, it will be +// a pointer to some data structure. Paired feature extractors/matchers need +// to agree on the data structure to be used, however, the high level +// classifier does not need to know the details of this data structure. +typedef char *CHAR_FEATURES; -typedef struct fxs -{ - FX_FUNC Extractor; /* func to extract features */ -} FEATURE_EXT_STRUCT; +typedef FEATURE_SET (*FX_FUNC) (TBLOB *, const DENORM&); + +struct FEATURE_EXT_STRUCT { + FX_FUNC Extractor; // func to extract features +}; /*---------------------------------------------------------------------- Macros for defining the parameters of a new features ----------------------------------------------------------------------*/ #define StartParamDesc(Name) \ -static PARAM_DESC Name[] = { +const PARAM_DESC Name[] = { #define DefineParam(Circular, NonEssential, Min, Max) \ {Circular, NonEssential, Min, Max, \ @@ -113,10 +105,8 @@ DefineFeature (Name, NumLinear, NumCircular, LongName, ShortName, ParamName) ----------------------------------------------------------------------*/ #define DefineFeature(Name, NL, NC, Min, Max, LN, SN, PN) \ -FEATURE_DESC_STRUCT Name = { \ - ((NL) + (NC)), NL, NC, Min, Max, LN, SN, PN}; - -#define DefineFeatureExt(Name, E) FEATURE_EXT_STRUCT Name = {E}; +const FEATURE_DESC_STRUCT Name = { \ + ((NL) + (NC)), SN, PN}; /*---------------------------------------------------------------------- Generic routines that work for all feature types @@ -127,17 +117,18 @@ void FreeFeature(FEATURE Feature); void FreeFeatureSet(FEATURE_SET FeatureSet); -FEATURE NewFeature(FEATURE_DESC FeatureDesc); +FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc); FEATURE_SET NewFeatureSet(int NumFeatures); -FEATURE ReadFeature(FILE *File, FEATURE_DESC FeatureDesc); +FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc); -FEATURE_SET ReadFeatureSet(FILE *File, FEATURE_DESC FeatureDesc); +FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc); void WriteFeature(FILE *File, FEATURE Feature); void WriteFeatureSet(FILE *File, FEATURE_SET FeatureSet); -void WriteOldParamDesc(FILE *File, FEATURE_DESC FeatureDesc); +void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc); + #endif diff --git a/classify/outfeat.cpp b/classify/outfeat.cpp index a383976c5d..c391c00bf8 100644 --- a/classify/outfeat.cpp +++ b/classify/outfeat.cpp @@ -19,17 +19,21 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "outfeat.h" + +#include "classify.h" +#include "efio.h" +#include "featdefs.h" #include "mfoutline.h" +#include "ocrfeatures.h" -#include "ocrfeatures.h" //Debug -#include //Debug -#include "efio.h" //Debug +#include /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob, LINE_STATS *LineStats) { +namespace tesseract { +FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) { /* ** Parameters: ** Blob blob to extract pico-features from @@ -54,7 +58,7 @@ FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob, LINE_STATS *LineStats) { Outlines = ConvertBlob (Blob); - NormalizeOutlines(Outlines, LineStats, &XScale, &YScale); + NormalizeOutlines(Outlines, &XScale, &YScale); RemainingOutlines = Outlines; iterate(RemainingOutlines) { Outline = (MFOUTLINE) first_node (RemainingOutlines); @@ -65,6 +69,7 @@ FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob, LINE_STATS *LineStats) { FreeOutlines(Outlines); return (FeatureSet); } /* ExtractOutlineFeatures */ +} // namespace tesseract /**---------------------------------------------------------------------------- Private Code diff --git a/classify/outfeat.h b/classify/outfeat.h index 5a930d7bb9..e7a36476ef 100644 --- a/classify/outfeat.h +++ b/classify/outfeat.h @@ -33,10 +33,6 @@ typedef enum { } OUTLINE_FEAT_PARAM_NAME; #define MAX_OUTLINE_FEATURES (100) -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob, LINE_STATS *LineStats); /*--------------------------------------------------------------------------- Privat Function Prototypes @@ -49,8 +45,4 @@ void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet); void NormalizeOutlineX(FEATURE_SET FeatureSet); -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -extern FEATURE_DESC_STRUCT OutlineFeatDesc; #endif diff --git a/classify/picofeat.cpp b/classify/picofeat.cpp index 60c0a96d1d..24ac068c48 100644 --- a/classify/picofeat.cpp +++ b/classify/picofeat.cpp @@ -19,16 +19,17 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "picofeat.h" -#include "mfoutline.h" -#include "hideedge.h" + +#include "classify.h" +#include "efio.h" +#include "featdefs.h" #include "fpoint.h" -#include "varable.h" +#include "mfoutline.h" +#include "ocrfeatures.h" +#include "params.h" #include - -#include "ocrfeatures.h" //Debug -#include //Debug -#include "efio.h" //Debug +#include /*--------------------------------------------------------------------------- Variables @@ -51,7 +52,8 @@ void NormalizePicoX(FEATURE_SET FeatureSet); Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -FEATURE_SET ExtractPicoFeatures(TBLOB *Blob, LINE_STATS *LineStats) { +namespace tesseract { +FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { /* ** Parameters: ** Blob blob to extract pico-features from @@ -69,11 +71,9 @@ FEATURE_SET ExtractPicoFeatures(TBLOB *Blob, LINE_STATS *LineStats) { FEATURE_SET FeatureSet; FLOAT32 XScale, YScale; - FeatureSet = NewFeatureSet (MAX_PICO_FEATURES); - - Outlines = ConvertBlob (Blob); - - NormalizeOutlines(Outlines, LineStats, &XScale, &YScale); + FeatureSet = NewFeatureSet(MAX_PICO_FEATURES); + Outlines = ConvertBlob(Blob); + NormalizeOutlines(Outlines, &XScale, &YScale); RemainingOutlines = Outlines; iterate(RemainingOutlines) { Outline = (MFOUTLINE) first_node (RemainingOutlines); @@ -115,6 +115,7 @@ FEATURE_SET ExtractPicoFeatures(TBLOB *Blob, LINE_STATS *LineStats) { return (FeatureSet); } /* ExtractPicoFeatures */ +} // namespace tesseract /**---------------------------------------------------------------------------- Private Code diff --git a/classify/picofeat.h b/classify/picofeat.h index f06543d460..773b472265 100644 --- a/classify/picofeat.h +++ b/classify/picofeat.h @@ -22,9 +22,7 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "ocrfeatures.h" -#include "tessclas.h" -#include "fxdefs.h" -#include "varable.h" +#include "params.h" typedef enum { PicoFeatY, PicoFeatDir, PicoFeatX } @@ -44,11 +42,8 @@ extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length"); ----------------------------------------------------------------------------**/ #define GetPicoFeatureLength() (PicoFeatureLength) -FEATURE_SET ExtractPicoFeatures(TBLOB *Blob, LINE_STATS *LineStats); - /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ -extern FEATURE_DESC_STRUCT PicoFeatDesc; extern FLOAT32 PicoFeatureLength; #endif diff --git a/classify/protos.cpp b/classify/protos.cpp index 70cfd87a3e..45fd107454 100644 --- a/classify/protos.cpp +++ b/classify/protos.cpp @@ -31,11 +31,10 @@ #include "freelist.h" #include "callcpp.h" #include "tprintf.h" -#include "adaptmatch.h" #include "scanutils.h" #include "globals.h" #include "classify.h" -#include "varable.h" +#include "params.h" #include #include @@ -377,11 +376,7 @@ void ReadProtos(register FILE *File, CLASS_TYPE Class) { for (Pid = 0; Pid < NumProtos; Pid++) { Proto = ProtoIn (Class, Pid); -#ifndef _MSC_VER - tess_fscanf (File, "%f %f %f %f %f %f %f\n", -#else fscanf (File, "%f %f %f %f %f %f %f\n", -#endif &Proto->X, &Proto->Y, &Proto->Length, diff --git a/classify/protos.h b/classify/protos.h index 80bf655a7e..1c61997d7c 100644 --- a/classify/protos.h +++ b/classify/protos.h @@ -32,7 +32,7 @@ #include "cutil.h" #include "unichar.h" #include "unicity_table.h" -#include "varable.h" +#include "params.h" /*---------------------------------------------------------------------- T y p e s diff --git a/classify/speckle.cpp b/classify/speckle.cpp index cd79af803f..0d35d689bc 100644 --- a/classify/speckle.cpp +++ b/classify/speckle.cpp @@ -22,7 +22,7 @@ #include "blobs.h" #include "ratngs.h" -#include "varable.h" +#include "params.h" /*----------------------------------------------------------------------------- Global Data Definitions and Declarations @@ -70,7 +70,7 @@ void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices) { if (Choices->length() == 0) { blob_choice = new BLOB_CHOICE(0, speckle_small_certainty + speckle_large_penalty, - speckle_small_certainty, -1, NULL); + speckle_small_certainty, -1, -1, NULL); temp_it.add_to_end(blob_choice); return; } @@ -81,7 +81,7 @@ void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices) { blob_choice = temp_it.data(); // pick the worst choice temp_it.add_to_end( new BLOB_CHOICE(0, blob_choice->rating() + speckle_large_penalty, - blob_choice->certainty(), -1, NULL)); + blob_choice->certainty(), -1, -1, NULL)); } /* AddLargeSpeckleTo */ @@ -97,16 +97,15 @@ void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices) { * History: Mon Mar 11 10:06:49 1991, DSJ, Created. * * @param Blob blob to test against speckle criteria - * @param Row text row that blob is in * * @return TRUE if Blob is speckle, FALSE otherwise. */ -BOOL8 LargeSpeckle(TBLOB *Blob, TEXTROW *Row) { +BOOL8 LargeSpeckle(TBLOB *Blob) { double speckle_size; TPOINT TopLeft; TPOINT BottomRight; - speckle_size = RowHeight (Row) * speckle_large_max_size; + speckle_size = BASELINE_SCALE * speckle_large_max_size; blob_bounding_box(Blob, &TopLeft, &BottomRight); if (TopLeft.y - BottomRight.y < speckle_size && diff --git a/classify/speckle.h b/classify/speckle.h index f3b01bd514..9676dc0a43 100644 --- a/classify/speckle.h +++ b/classify/speckle.h @@ -25,19 +25,11 @@ #include "baseline.h" #include "ratngs.h" -/*----------------------------------------------------------------------------- - Macros ------------------------------------------------------------------------------*/ -/** macro for getting the height of a row of text */ -#define RowHeight(R) ((classify_baseline_normalized)? \ - (BASELINE_SCALE): \ - ((R)->lineheight)) - /*----------------------------------------------------------------------------- Public Function Prototypes -----------------------------------------------------------------------------*/ void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices); -BOOL8 LargeSpeckle(TBLOB *Blob, TEXTROW *Row); +BOOL8 LargeSpeckle(TBLOB *Blob); #endif diff --git a/configure b/configure index 042ab0776a..fe534391a8 100755 --- a/configure +++ b/configure @@ -1,7 +1,7 @@ #! /bin/sh # From configure.ac Id: configure.ac. # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.65 for tesseract 3.00. +# Generated by GNU Autoconf 2.65 for tesseract 3.01. # # Report bugs to . # @@ -702,8 +702,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='tesseract' PACKAGE_TARNAME='tesseract' -PACKAGE_VERSION='3.00' -PACKAGE_STRING='tesseract 3.00' +PACKAGE_VERSION='3.01' +PACKAGE_STRING='tesseract 3.01' PACKAGE_BUGREPORT='theraysmith@gmail.com' PACKAGE_URL='' @@ -1452,7 +1452,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures tesseract 3.00 to adapt to many kinds of systems. +\`configure' configures tesseract 3.01 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1522,7 +1522,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of tesseract 3.00:";; + short | recursive ) echo "Configuration of tesseract 3.01:";; esac cat <<\_ACEOF @@ -1637,7 +1637,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -tesseract configure 3.00 +tesseract configure 3.01 generated by GNU Autoconf 2.65 Copyright (C) 2009 Free Software Foundation, Inc. @@ -2323,7 +2323,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by tesseract $as_me 3.00, which was +It was created by tesseract $as_me 3.01, which was generated by GNU Autoconf 2.65. Invocation command line was $ $0 $@ @@ -14272,7 +14272,7 @@ fi # Define the identity of the package. PACKAGE='tesseract' - VERSION='3.00' + VERSION='3.01' cat >>confdefs.h <<_ACEOF @@ -16516,12 +16516,16 @@ ac_config_files="$ac_config_files ccutil/Makefile" ac_config_files="$ac_config_files classify/Makefile" +ac_config_files="$ac_config_files cube/Makefile" + ac_config_files="$ac_config_files cutil/Makefile" ac_config_files="$ac_config_files dict/Makefile" ac_config_files="$ac_config_files image/Makefile" +ac_config_files="$ac_config_files neural_networks/runtime/Makefile" + ac_config_files="$ac_config_files textord/Makefile" ac_config_files="$ac_config_files viewer/Makefile" @@ -17104,7 +17108,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by tesseract $as_me 3.00, which was +This file was extended by tesseract $as_me 3.01, which was generated by GNU Autoconf 2.65. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -17170,7 +17174,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -tesseract config.status 3.00 +tesseract config.status 3.01 configured by $0, generated by GNU Autoconf 2.65, with options \\"\$ac_cs_config\\" @@ -17653,9 +17657,11 @@ do "ccstruct/Makefile") CONFIG_FILES="$CONFIG_FILES ccstruct/Makefile" ;; "ccutil/Makefile") CONFIG_FILES="$CONFIG_FILES ccutil/Makefile" ;; "classify/Makefile") CONFIG_FILES="$CONFIG_FILES classify/Makefile" ;; + "cube/Makefile") CONFIG_FILES="$CONFIG_FILES cube/Makefile" ;; "cutil/Makefile") CONFIG_FILES="$CONFIG_FILES cutil/Makefile" ;; "dict/Makefile") CONFIG_FILES="$CONFIG_FILES dict/Makefile" ;; "image/Makefile") CONFIG_FILES="$CONFIG_FILES image/Makefile" ;; + "neural_networks/runtime/Makefile") CONFIG_FILES="$CONFIG_FILES neural_networks/runtime/Makefile" ;; "textord/Makefile") CONFIG_FILES="$CONFIG_FILES textord/Makefile" ;; "viewer/Makefile") CONFIG_FILES="$CONFIG_FILES viewer/Makefile" ;; "wordrec/Makefile") CONFIG_FILES="$CONFIG_FILES wordrec/Makefile" ;; diff --git a/configure.ac b/configure.ac index 7129f8a810..9925bd6c15 100644 --- a/configure.ac +++ b/configure.ac @@ -8,7 +8,7 @@ AC_PREREQ(2.50) AC_CONFIG_MACRO_DIR([m4]) -AC_INIT(tesseract, 3.00, theraysmith@gmail.com) +AC_INIT(tesseract, 3.01, theraysmith@gmail.com) AC_REVISION($Id: configure.ac,v 1.4 2007/02/02 22:38:17 theraysmith Exp $) AC_CONFIG_AUX_DIR(config) AC_CONFIG_SRCDIR(api/tesseractmain.cpp) @@ -363,9 +363,11 @@ AC_CONFIG_FILES(ccmain/Makefile) AC_CONFIG_FILES(ccstruct/Makefile) AC_CONFIG_FILES(ccutil/Makefile) AC_CONFIG_FILES(classify/Makefile) +AC_CONFIG_FILES(cube/Makefile) AC_CONFIG_FILES(cutil/Makefile) AC_CONFIG_FILES(dict/Makefile) AC_CONFIG_FILES(image/Makefile) +AC_CONFIG_FILES(neural_networks/runtime/Makefile) AC_CONFIG_FILES(textord/Makefile) AC_CONFIG_FILES(viewer/Makefile) AC_CONFIG_FILES(wordrec/Makefile) @@ -375,7 +377,6 @@ AC_CONFIG_FILES(tessdata/configs/Makefile) AC_CONFIG_FILES(tessdata/tessconfigs/Makefile) AC_CONFIG_FILES(testing/Makefile) AC_CONFIG_FILES(vs2008/Makefile) -AC_CONFIG_FILES(vs2008/dlltest/Makefile) AC_CONFIG_FILES(vs2008/include/Makefile) AC_CONFIG_FILES(vs2008/include/leptonica/Makefile) if test "$enable_graphics" = "yes"; then diff --git a/cube/Makefile.am b/cube/Makefile.am new file mode 100644 index 0000000000..57c3a5350f --- /dev/null +++ b/cube/Makefile.am @@ -0,0 +1,38 @@ +SUBDIRS = +AM_CPPFLAGS = \ + -DUSE_STD_NAMESPACE \ + -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \ + -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \ + -I$(top_srcdir)/ccmain -I$(top_srcdir)/classify \ + -I$(top_srcdir)/textord -I$(top_srcdir)/wordrec \ + -I$(top_srcdir)/neural_networks/runtime \ + -I$(top_srcdir)/image -I$(top_srcdir)/viewer + +include_HEADERS = \ + altlist.h beam_search.h bmp_8.h cached_file.h \ + char_altlist.h char_bigrams.h char_samp.h char_samp_enum.h \ + char_samp_set.h char_set.h classifier_base.h classifier_factory.h \ + con_comp.h const.h conv_net_classifier.h cube_line_object.h \ + cube_line_segmenter.h cube_object.h cube_search_object.h \ + cube_tuning_params.h cube_utils.h feature_base.h feature_bmp.h \ + feature_chebyshev.h feature_hybrid.h hybrid_neural_net_classifier.h \ + lang_mod_edge.h lang_model.h search_column.h search_node.h \ + search_object.h string_32.h tess_lang_mod_edge.h tess_lang_model.h \ + tuning_params.h word_altlist.h word_list_lang_model.h word_size_model.h \ + word_unigrams.h + +lib_LTLIBRARIES = libtesseract_cube.la +libtesseract_cube_la_SOURCES = \ + altlist.cpp beam_search.cpp bmp_8.cpp cached_file.cpp \ + char_altlist.cpp char_bigrams.cpp char_samp.cpp char_samp_enum.cpp \ + char_samp_set.cpp char_set.cpp classifier_factory.cpp \ + con_comp.cpp conv_net_classifier.cpp cube_line_object.cpp \ + cube_line_segmenter.cpp cube_object.cpp cube_search_object.cpp \ + cube_tuning_params.cpp cube_utils.cpp feature_bmp.cpp \ + feature_chebyshev.cpp feature_hybrid.cpp hybrid_neural_net_classifier.cpp \ + search_column.cpp search_node.cpp \ + tess_lang_mod_edge.cpp tess_lang_model.cpp \ + word_altlist.cpp word_list_lang_model.cpp word_size_model.cpp \ + word_unigrams.cpp + +libtesseract_cube_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) diff --git a/cube/Makefile.in b/cube/Makefile.in new file mode 100644 index 0000000000..d93970dabb --- /dev/null +++ b/cube/Makefile.in @@ -0,0 +1,801 @@ +# Makefile.in generated by automake 1.11.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = cube +DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(SHELL) $(top_srcdir)/config/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config_auto.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libtesseract_cube_la_LIBADD = +am_libtesseract_cube_la_OBJECTS = altlist.lo beam_search.lo bmp_8.lo \ + cached_file.lo char_altlist.lo char_bigrams.lo char_samp.lo \ + char_samp_enum.lo char_samp_set.lo char_set.lo \ + classifier_factory.lo con_comp.lo conv_net_classifier.lo \ + cube_line_object.lo cube_line_segmenter.lo cube_object.lo \ + cube_search_object.lo cube_tuning_params.lo cube_utils.lo \ + feature_bmp.lo feature_chebyshev.lo feature_hybrid.lo \ + hybrid_neural_net_classifier.lo search_column.lo \ + search_node.lo tess_lang_mod_edge.lo tess_lang_model.lo \ + word_altlist.lo word_list_lang_model.lo word_size_model.lo \ + word_unigrams.lo +libtesseract_cube_la_OBJECTS = $(am_libtesseract_cube_la_OBJECTS) +libtesseract_cube_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(libtesseract_cube_la_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/config/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(libtesseract_cube_la_SOURCES) +DIST_SOURCES = $(libtesseract_cube_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +HEADERS = $(include_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ + distdir +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GENERIC_API_VERSION = @GENERIC_API_VERSION@ +GENERIC_LIBRARY_NAME = @GENERIC_LIBRARY_NAME@ +GENERIC_LIBRARY_VERSION = @GENERIC_LIBRARY_VERSION@ +GENERIC_MAJOR_VERSION = @GENERIC_MAJOR_VERSION@ +GENERIC_RELEASE = @GENERIC_RELEASE@ +GENERIC_VERSION = @GENERIC_VERSION@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTIFF_CFLAGS = @LIBTIFF_CFLAGS@ +LIBTIFF_LIBS = @LIBTIFF_LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_DATE = @PACKAGE_DATE@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PACKAGE_YEAR = @PACKAGE_YEAR@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = +AM_CPPFLAGS = \ + -DUSE_STD_NAMESPACE \ + -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \ + -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \ + -I$(top_srcdir)/ccmain -I$(top_srcdir)/classify \ + -I$(top_srcdir)/textord -I$(top_srcdir)/wordrec \ + -I$(top_srcdir)/neural_networks/runtime \ + -I$(top_srcdir)/image -I$(top_srcdir)/viewer + +include_HEADERS = \ + altlist.h beam_search.h bmp_8.h cached_file.h \ + char_altlist.h char_bigrams.h char_samp.h char_samp_enum.h \ + char_samp_set.h char_set.h classifier_base.h classifier_factory.h \ + con_comp.h const.h conv_net_classifier.h cube_line_object.h \ + cube_line_segmenter.h cube_object.h cube_search_object.h \ + cube_tuning_params.h cube_utils.h feature_base.h feature_bmp.h \ + feature_chebyshev.h feature_hybrid.h hybrid_neural_net_classifier.h \ + lang_mod_edge.h lang_model.h search_column.h search_node.h \ + search_object.h string_32.h tess_lang_mod_edge.h tess_lang_model.h \ + tuning_params.h word_altlist.h word_list_lang_model.h word_size_model.h \ + word_unigrams.h + +lib_LTLIBRARIES = libtesseract_cube.la +libtesseract_cube_la_SOURCES = \ + altlist.cpp beam_search.cpp bmp_8.cpp cached_file.cpp \ + char_altlist.cpp char_bigrams.cpp char_samp.cpp char_samp_enum.cpp \ + char_samp_set.cpp char_set.cpp classifier_factory.cpp \ + con_comp.cpp conv_net_classifier.cpp cube_line_object.cpp \ + cube_line_segmenter.cpp cube_object.cpp cube_search_object.cpp \ + cube_tuning_params.cpp cube_utils.cpp feature_bmp.cpp \ + feature_chebyshev.cpp feature_hybrid.cpp hybrid_neural_net_classifier.cpp \ + search_column.cpp search_node.cpp \ + tess_lang_mod_edge.cpp tess_lang_model.cpp \ + word_altlist.cpp word_list_lang_model.cpp word_size_model.cpp \ + word_unigrams.cpp + +libtesseract_cube_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .cpp .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu cube/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu cube/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libtesseract_cube.la: $(libtesseract_cube_la_OBJECTS) $(libtesseract_cube_la_DEPENDENCIES) + $(libtesseract_cube_la_LINK) -rpath $(libdir) $(libtesseract_cube_la_OBJECTS) $(libtesseract_cube_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/altlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/beam_search.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bmp_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cached_file.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_altlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_bigrams.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_samp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_samp_enum.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_samp_set.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_set.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/classifier_factory.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/con_comp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conv_net_classifier.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_line_object.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_line_segmenter.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_object.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_search_object.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_tuning_params.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_utils.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/feature_bmp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/feature_chebyshev.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/feature_hybrid.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hybrid_neural_net_classifier.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/search_column.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/search_node.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tess_lang_mod_edge.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tess_lang_model.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/word_altlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/word_list_lang_model.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/word_size_model.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/word_unigrams.Plo@am__quote@ + +.cpp.o: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cpp.lo: +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + test -n "$$files" || exit 0; \ + echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(includedir)" && rm -f $$files + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-includeHEADERS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ + install-am install-strip tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool ctags ctags-recursive \ + distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-recursive uninstall uninstall-am \ + uninstall-includeHEADERS uninstall-libLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/cube/altlist.cpp b/cube/altlist.cpp new file mode 100644 index 0000000000..b96796eefc --- /dev/null +++ b/cube/altlist.cpp @@ -0,0 +1,60 @@ +/********************************************************************** + * File: alt_list.cpp + * Description: Class to abstarct a list of alternate results + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "altlist.h" +#include + +namespace tesseract { + +AltList::AltList(int max_alt) { + max_alt_ = max_alt; + alt_cnt_ = 0; + alt_cost_ = NULL; + alt_tag_ = NULL; +} + +AltList::~AltList() { + if (alt_cost_ != NULL) { + delete []alt_cost_; + alt_cost_ = NULL; + } + + if (alt_tag_ != NULL) { + delete []alt_tag_; + alt_tag_ = NULL; + } +} + +// return the best possible cost and index of corresponding alternate +int AltList::BestCost(int *best_alt) const { + if (alt_cnt_ <= 0) { + (*best_alt) = -1; + return -1; + } + + int best_alt_idx = 0; + for (int alt_idx = 1; alt_idx < alt_cnt_; alt_idx++) { + if (alt_cost_[alt_idx] < alt_cost_[best_alt_idx]) { + best_alt_idx = alt_idx; + } + } + (*best_alt) = best_alt_idx; + return alt_cost_[best_alt_idx]; +} +} diff --git a/cube/altlist.h b/cube/altlist.h new file mode 100644 index 0000000000..3aebf313c7 --- /dev/null +++ b/cube/altlist.h @@ -0,0 +1,61 @@ +/********************************************************************** + * File: alt_list.h + * Description: Class to abstarct a list of alternate results + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The AltList class is the base class for the list of alternate recognition +// results. Each alternate has a cost an an optional tag associated with it + +#ifndef ALT_LIST_H +#define ALT_LIST_H + +#include +#include "cube_utils.h" + +namespace tesseract { +class AltList { + public: + explicit AltList(int max_alt); + virtual ~AltList(); + // sort the list of alternates based + virtual void Sort() = 0; + // return the best possible cost and index of corresponding alternate + int BestCost (int *best_alt) const; + // return the count of alternates + inline int AltCount() const { return alt_cnt_; } + // returns the cost (-ve log prob) of an alternate + inline int AltCost(int alt_idx) const { return alt_cost_[alt_idx]; } + // returns the prob of an alternate + inline double AltProb(int alt_idx) const { + return CubeUtils::Cost2Prob(AltCost(alt_idx)); + } + // returns the alternate tag + inline void *AltTag(int alt_idx) const { return alt_tag_[alt_idx]; } + + protected: + // max number of alternates the list can hold + int max_alt_; + // actual alternate count + int alt_cnt_; + // array of alternate costs + int *alt_cost_; + // array of alternate tags + void **alt_tag_; +}; +} + +#endif // ALT_LIST_H diff --git a/cube/beam_search.cpp b/cube/beam_search.cpp new file mode 100644 index 0000000000..34e69bf509 --- /dev/null +++ b/cube/beam_search.cpp @@ -0,0 +1,489 @@ +/********************************************************************** + * File: beam_search.cpp + * Description: Class to implement Beam Word Search Algorithm + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include + +#include "beam_search.h" +#include "tesseractclass.h" + +namespace tesseract { + +BeamSearch::BeamSearch(CubeRecoContext *cntxt, bool word_mode) { + cntxt_ = cntxt; + seg_pt_cnt_ = 0; + col_cnt_ = 1; + col_ = NULL; + word_mode_ = word_mode; +} + +// Cleanup the lattice corresponding to the last search +void BeamSearch::Cleanup() { + if (col_ != NULL) { + for (int col = 0; col < col_cnt_; col++) { + if (col_[col]) + delete col_[col]; + } + delete []col_; + } + col_ = NULL; +} + +BeamSearch::~BeamSearch() { + Cleanup(); +} + +// Creates a set of children nodes emerging from a parent node based on +// the character alternate list and the language model. +void BeamSearch::CreateChildren(SearchColumn *out_col, LangModel *lang_mod, + SearchNode *parent_node, + LangModEdge *lm_parent_edge, + CharAltList *char_alt_list, int extra_cost) { + // get all the edges from this parent + int edge_cnt; + LangModEdge **lm_edges = lang_mod->GetEdges(char_alt_list, + lm_parent_edge, &edge_cnt); + if (lm_edges) { + // add them to the ending column with the appropriate parent + for (int edge = 0; edge < edge_cnt; edge++) { + // add a node to the column if the current column is not the + // last one, or if the lang model edge indicates it is valid EOW + if (!cntxt_->NoisyInput() && out_col->ColIdx() >= seg_pt_cnt_ && + !lm_edges[edge]->IsEOW()) { + // free edge since no object is going to own it + delete lm_edges[edge]; + continue; + } + + // compute the recognition cost of this node + int recognition_cost = MIN_PROB_COST; + if (char_alt_list && char_alt_list->AltCount() > 0) { + recognition_cost = MAX(0, char_alt_list->ClassCost( + lm_edges[edge]->ClassID())); + // Add the no space cost. This should zero in word mode + recognition_cost += extra_cost; + } + + // Note that the edge will be freed inside the column if + // AddNode is called + if (recognition_cost >= 0) { + out_col->AddNode(lm_edges[edge], recognition_cost, parent_node, + cntxt_); + } else { + delete lm_edges[edge]; + } + } // edge + // free edge array + delete []lm_edges; + } // lm_edges +} + +// Performs a beam seach in the specified search using the specified +// language model; returns an alternate list of possible words as a result. +WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) { + // verifications + if (!lang_mod) + lang_mod = cntxt_->LangMod(); + if (!lang_mod) { + fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct " + "LangModel\n"); + return NULL; + } + + // free existing state + Cleanup(); + + // get seg pt count + seg_pt_cnt_ = srch_obj->SegPtCnt(); + if (seg_pt_cnt_ < 0) { + return NULL; + } + col_cnt_ = seg_pt_cnt_ + 1; + + // disregard suspicious cases + if (seg_pt_cnt_ > 128) { + fprintf(stderr, "Cube ERROR (BeamSearch::Search): segment point count is " + "suspiciously high; bailing out\n"); + return NULL; + } + + // alloc memory for columns + col_ = new SearchColumn *[col_cnt_]; + if (!col_) { + fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct " + "SearchColumn array\n"); + return NULL; + } + memset(col_, 0, col_cnt_ * sizeof(*col_)); + + // for all possible segments + for (int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) { + // create a search column + col_[end_seg - 1] = new SearchColumn(end_seg - 1, + cntxt_->Params()->BeamWidth()); + if (!col_[end_seg - 1]) { + fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct " + "SearchColumn for column %d\n", end_seg - 1); + return NULL; + } + + // for all possible start segments + int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar()); + for (int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) { + int parent_nodes_cnt; + SearchNode **parent_nodes; + + // for the root segment, we do not have a parent + if (strt_seg == 0) { + parent_nodes_cnt = 1; + parent_nodes = NULL; + } else { + // for all the existing nodes in the starting column + parent_nodes_cnt = col_[strt_seg - 1]->NodeCount(); + parent_nodes = col_[strt_seg - 1]->Nodes(); + } + + // run the shape recognizer + CharAltList *char_alt_list = srch_obj->RecognizeSegment(strt_seg - 1, + end_seg - 1); + // for all the possible parents + for (int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) { + // point to the parent node + SearchNode *parent_node = !parent_nodes ? NULL + : parent_nodes[parent_idx]; + LangModEdge *lm_parent_edge = !parent_node ? lang_mod->Root() + : parent_node->LangModelEdge(); + + // compute the cost of not having spaces within the segment range + int contig_cost = srch_obj->NoSpaceCost(strt_seg - 1, end_seg - 1); + + // In phrase mode, compute the cost of not having a space before + // this character + int no_space_cost = 0; + if (!word_mode_ && strt_seg > 0) { + no_space_cost = srch_obj->NoSpaceCost(strt_seg - 1); + } + + // if the no space cost is low enough + if ((contig_cost + no_space_cost) < MIN_PROB_COST) { + // Add the children nodes + CreateChildren(col_[end_seg - 1], lang_mod, parent_node, + lm_parent_edge, char_alt_list, + contig_cost + no_space_cost); + } + + // In phrase mode and if not starting at the root + if (!word_mode_ && strt_seg > 0) { // parent_node must be non-NULL + // consider starting a new word for nodes that are valid EOW + if (parent_node->LangModelEdge()->IsEOW()) { + // get the space cost + int space_cost = srch_obj->SpaceCost(strt_seg - 1); + // if the space cost is low enough + if ((contig_cost + space_cost) < MIN_PROB_COST) { + // Restart the language model and add nodes as children to the + // space node. + CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL, + char_alt_list, contig_cost + space_cost); + } + } + } + } // parent + } // strt_seg + + // prune the column nodes + col_[end_seg - 1]->Prune(); + + // Free the column hash table. No longer needed + col_[end_seg - 1]->FreeHashTable(); + } // end_seg + + WordAltList *alt_list = CreateWordAltList(srch_obj); + return alt_list; +} + +// Creates a Word alternate list from the results in the lattice. +WordAltList *BeamSearch::CreateWordAltList(SearchObject *srch_obj) { + // create an alternate list of all the nodes in the last column + int node_cnt = col_[col_cnt_ - 1]->NodeCount(); + SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes(); + CharBigrams *bigrams = cntxt_->Bigrams(); + WordUnigrams *word_unigrams = cntxt_->WordUnigramsObj(); + + // Save the index of the best-cost node before the alt list is + // sorted, so that we can retrieve it from the node list when backtracking. + best_presorted_node_idx_ = 0; + int best_cost = -1; + + if (node_cnt <= 0) + return NULL; + + // start creating the word alternate list + WordAltList *alt_list = new WordAltList(node_cnt + 1); + for (int node_idx = 0; node_idx < node_cnt; node_idx++) { + // recognition cost + int recognition_cost = srch_nodes[node_idx]->BestCost(); + // compute the size cost of the alternate + char_32 *ch_buff = NULL; + int size_cost = SizeCost(srch_obj, srch_nodes[node_idx], &ch_buff); + // accumulate other costs + if (ch_buff) { + int cost = 0; + // char bigram cost + int bigram_cost = !bigrams ? 0 : + bigrams->Cost(ch_buff, cntxt_->CharacterSet(), + &cntxt_->TesseractObject()->unicharset); + // word unigram cost + int unigram_cost = !word_unigrams ? 0 : + word_unigrams->Cost(ch_buff, cntxt_->LangMod(), + cntxt_->CharacterSet(), + &cntxt_->TesseractObject()->unicharset); + // overall cost + cost = static_cast( + (size_cost * cntxt_->Params()->SizeWgt()) + + (bigram_cost * cntxt_->Params()->CharBigramWgt()) + + (unigram_cost * cntxt_->Params()->WordUnigramWgt()) + + (recognition_cost * cntxt_->Params()->RecoWgt())); + + // insert into word alt list + alt_list->Insert(ch_buff, cost, + static_cast(srch_nodes[node_idx])); + // Note that strict < is necessary because WordAltList::Sort() + // uses it in a bubble sort to swap entries. + if (best_cost < 0 || cost < best_cost) { + best_presorted_node_idx_ = node_idx; + best_cost = cost; + } + delete []ch_buff; + } + } + + // sort the alternates based on cost + alt_list->Sort(); + return alt_list; +} + +// Returns the lattice column corresponding to the specified column index. +SearchColumn *BeamSearch::Column(int col) const { + if (col < 0 || col >= col_cnt_ || !col_) + return NULL; + return col_[col]; +} + +// Returns the best node in the last column of last performed search. +SearchNode *BeamSearch::BestNode() const { + if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1]) + return NULL; + + int node_cnt = col_[col_cnt_ - 1]->NodeCount(); + SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes(); + if (node_cnt < 1 || !srch_nodes || !srch_nodes[0]) + return NULL; + return srch_nodes[0]; +} + +// Returns the string corresponding to the specified alt. +char_32 *BeamSearch::Alt(int alt) const { + // get the last column of the lattice + if (col_cnt_ <= 0) + return NULL; + + SearchColumn *srch_col = col_[col_cnt_ - 1]; + if (!srch_col) + return NULL; + + // point to the last node in the selected path + if (alt >= srch_col->NodeCount() || srch_col->Nodes() == NULL) { + return NULL; + } + + SearchNode *srch_node = srch_col->Nodes()[alt]; + if (!srch_node) + return NULL; + + // get string + char_32 *str32 = srch_node->PathString(); + if (!str32) + return NULL; + + return str32; +} + +// Backtracks from the specified node index and returns the corresponding +// character mapped segments and character count. Optional return +// arguments are the char_32 result string and character bounding +// boxes, if non-NULL values are passed in. +CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, int node_index, + int *char_cnt, char_32 **str32, + Boxa **char_boxes) const { + // get the last column of the lattice + if (col_cnt_ <= 0) + return NULL; + SearchColumn *srch_col = col_[col_cnt_ - 1]; + if (!srch_col) + return NULL; + + // point to the last node in the selected path + if (node_index >= srch_col->NodeCount() || !srch_col->Nodes()) + return NULL; + + SearchNode *srch_node = srch_col->Nodes()[node_index]; + if (!srch_node) + return NULL; + return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes); +} + +// Backtracks from the specified node index and returns the corresponding +// character mapped segments and character count. Optional return +// arguments are the char_32 result string and character bounding +// boxes, if non-NULL values are passed in. +CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, SearchNode *srch_node, + int *char_cnt, char_32 **str32, + Boxa **char_boxes) const { + if (!srch_node) + return NULL; + + if (str32) { + if (*str32) + delete [](*str32); // clear existing value + *str32 = srch_node->PathString(); + if (!*str32) + return NULL; + } + + if (char_boxes && *char_boxes) { + boxaDestroy(char_boxes); // clear existing value + } + + CharSamp **chars; + chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes); + if (!chars && str32) + delete []*str32; + return chars; +} + +// Backtracks from the given lattice node and return the corresponding +// char mapped segments and character count. The character bounding +// boxes are optional return arguments, if non-NULL values are passed in. +CharSamp **BeamSearch::SplitByNode(SearchObject *srch_obj, + SearchNode *srch_node, + int *char_cnt, + Boxa **char_boxes) const { + // Count the characters (could be less than the path length when in + // phrase mode) + *char_cnt = 0; + SearchNode *node = srch_node; + while (node) { + node = node->ParentNode(); + (*char_cnt)++; + } + + if (*char_cnt == 0) + return NULL; + + // Allocate box array + if (char_boxes) { + if (*char_boxes) + boxaDestroy(char_boxes); // clear existing value + *char_boxes = boxaCreate(*char_cnt); + if (*char_boxes == NULL) + return NULL; + } + + // Allocate memory for CharSamp array. + CharSamp **chars = new CharSamp *[*char_cnt]; + if (!chars) { + if (char_boxes) + boxaDestroy(char_boxes); + return NULL; + } + + int ch_idx = *char_cnt - 1; + int seg_pt_cnt = srch_obj->SegPtCnt(); + bool success=true; + while (srch_node && ch_idx >= 0) { + // Parent node (could be null) + SearchNode *parent_node = srch_node->ParentNode(); + + // Get the seg pts corresponding to the search node + int st_col = !parent_node ? 0 : parent_node->ColIdx() + 1; + int st_seg_pt = st_col <= 0 ? -1 : st_col - 1; + int end_col = srch_node->ColIdx(); + int end_seg_pt = end_col >= seg_pt_cnt ? seg_pt_cnt : end_col; + + // Get a char sample corresponding to the segmentation points + CharSamp *samp = srch_obj->CharSample(st_seg_pt, end_seg_pt); + if (!samp) { + success = false; + break; + } + samp->SetLabel(srch_node->NodeString()); + chars[ch_idx] = samp; + if (char_boxes) { + // Create the corresponding character bounding box + Box *char_box = boxCreate(samp->Left(), samp->Top(), + samp->Width(), samp->Height()); + if (!char_box) { + success = false; + break; + } + boxaAddBox(*char_boxes, char_box, L_INSERT); + } + srch_node = parent_node; + ch_idx--; + } + if (!success) { + delete []chars; + if (char_boxes) + boxaDestroy(char_boxes); + return NULL; + } + + // Reverse the order of boxes. + if (char_boxes) { + int char_boxa_size = boxaGetCount(*char_boxes); + int limit = char_boxa_size / 2; + for (int i = 0; i < limit; ++i) { + int box1_idx = i; + int box2_idx = char_boxa_size - 1 - i; + Box *box1 = boxaGetBox(*char_boxes, box1_idx, L_CLONE); + Box *box2 = boxaGetBox(*char_boxes, box2_idx, L_CLONE); + boxaReplaceBox(*char_boxes, box2_idx, box1); + boxaReplaceBox(*char_boxes, box1_idx, box2); + } + } + return chars; +} + +// Returns the size cost of a string for a lattice path that +// ends at the specified lattice node. +int BeamSearch::SizeCost(SearchObject *srch_obj, SearchNode *node, + char_32 **str32) const { + CharSamp **chars = NULL; + int char_cnt = 0; + if (!node) + return 0; + // Backtrack to get string and character segmentation + chars = BackTrack(srch_obj, node, &char_cnt, str32, NULL); + if (!chars) + return WORST_COST; + int size_cost = (cntxt_->SizeModel() == NULL) ? 0 : + cntxt_->SizeModel()->Cost(chars, char_cnt); + delete []chars; + return size_cost; +} +} // namespace tesesract diff --git a/cube/beam_search.h b/cube/beam_search.h new file mode 100644 index 0000000000..a39f5b1349 --- /dev/null +++ b/cube/beam_search.h @@ -0,0 +1,126 @@ +/********************************************************************** + * File: beam_search.h + * Description: Declaration of Beam Word Search Algorithm Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The Beam Search class implements a Beam Search algorithm for the +// N-best paths through the lattice of a search object using a language model +// The search object is a segmented bitmap of a word image. The language model +// is a state machine that defines valid sequences of characters +// The cost of each path is the combined (product) probabilities of the +// characters along the path. The character probabilities are computed using +// the character classifier member of the RecoContext +// The BeamSearch class itself holds the state of the last search it performed +// using its "Search" method. Subsequent class to the Search method erase the +// states of previously done searches + +#ifndef BEAM_SEARCH_H +#define BEAM_SEARCH_H + +#include "search_column.h" +#include "word_altlist.h" +#include "search_object.h" +#include "lang_model.h" +#include "cube_utils.h" +#include "cube_reco_context.h" +#include "allheaders.h" + +namespace tesseract { + +class BeamSearch { + public: + explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true); + ~BeamSearch(); + // Performs a beam seach in the specified search using the specified + // language model; returns an alternate list of possible words as a result. + WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL); + // Returns the best node in the last column of last performed search. + SearchNode *BestNode() const; + // Returns the string corresponding to the specified alt. + char_32 *Alt(int alt) const; + // Backtracks from the specified lattice node and returns the corresponding + // character-mapped segments, character count, char_32 result string, and + // character bounding boxes (if char_boxes is not NULL). If the segments + // cannot be constructed, returns NULL, and all result arguments + // will be NULL. + CharSamp **BackTrack(SearchObject *srch_obj, int node_index, + int *char_cnt, char_32 **str32, Boxa **char_boxes) const; + // Same as above, except it takes a pointer to a search node object + // instead of node index. + CharSamp **BackTrack(SearchObject *srch_obj, SearchNode *node, + int *char_cnt, char_32 **str32, Boxa **char_boxes) const; + // Returns the size cost of a specified string of a lattice + // path that ends at the specified lattice node. + int SizeCost(SearchObject *srch_obj, SearchNode *node, + char_32 **str32 = NULL) const; + // Returns the word unigram cost of the given string, possibly + // stripping out a single trailing punctuation character. + int WordUnigramCost(char_32 *str32, WordUnigrams* word_unigrams) const; + + // Supplementary functions needed for visualization + // Return column count of the lattice. + inline int ColCnt() const { return col_cnt_; } + // Returns the lattice column corresponding to the specified column index. + SearchColumn *Column(int col_idx) const; + // Return the index of the best node in the last column of the + // best-cost path before the alternates list is sorted. + inline int BestPresortedNodeIndex() const { + return best_presorted_node_idx_; + }; + + private: + // Maximum reasonable segmentation point count + static const int kMaxSegPointCnt = 128; + // Recognition context object; the context holds the character classifier + // and the tuning parameters object + CubeRecoContext *cntxt_; + // Count of segmentation pts + int seg_pt_cnt_; + // Lattice column count; currently redundant with respect to seg_pt_cnt_ + // but that might change in the future + int col_cnt_; + // Array of lattice columns + SearchColumn **col_; + // Run in word or phrase mode + bool word_mode_; + // Node index of best-cost node, before alternates are merged and sorted + int best_presorted_node_idx_; + // Cleans up beam search state + void Cleanup(); + // Creates a Word alternate list from the results in the lattice. + // This function computes a cost for each node in the final column + // of the lattice, which is a weighted average of several costs: + // size cost, character bigram cost, word unigram cost, and + // recognition cost from the beam search. The weights are the + // CubeTuningParams, which are learned together with the character + // classifiers. + WordAltList *CreateWordAltList(SearchObject *srch_obj); + // Creates a set of children nodes emerging from a parent node based on + // the character alternate list and the language model. + void CreateChildren(SearchColumn *out_col, LangModel *lang_mod, + SearchNode *parent_node, LangModEdge *lm_parent_edge, + CharAltList *char_alt_list, int extra_cost); + // Backtracks from the given lattice node and returns the corresponding + // char mapped segments, character count, and character bounding boxes (if + // char_boxes is not NULL). If the segments cannot be constructed, + // returns NULL, and all result arguments will be NULL. + CharSamp **SplitByNode(SearchObject *srch_obj, SearchNode *srch_node, + int* char_cnt, Boxa **char_boxes) const; +}; +} + +#endif // BEAM_SEARCH_H diff --git a/cube/bmp_8.cpp b/cube/bmp_8.cpp new file mode 100644 index 0000000000..460c31eaab --- /dev/null +++ b/cube/bmp_8.cpp @@ -0,0 +1,1150 @@ +/********************************************************************** + * File: bmp_8.cpp + * Description: Implementation of an 8-bit Bitmap class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include +#include "bmp_8.h" +#include "con_comp.h" +#ifdef USE_STD_NAMESPACE +using std::min; +using std::max; +#endif + +#ifdef WIN32 +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif +#endif + +namespace tesseract { + +const int Bmp8::kDeslantAngleCount = (1 + static_cast(0.5f + + (kMaxDeslantAngle - kMinDeslantAngle) / kDeslantAngleDelta)); +float *Bmp8::tan_table_ = NULL; + +Bmp8::Bmp8(unsigned short wid, unsigned short hgt) + : wid_(wid) + , hgt_(hgt) { + line_buff_ = CreateBmpBuffer(); +} + +Bmp8::~Bmp8() { + FreeBmpBuffer(line_buff_); +} + +// free buffer +void Bmp8::FreeBmpBuffer(unsigned char **buff) { + if (buff != NULL) { + if (buff[0] != NULL) { + delete []buff[0]; + } + delete []buff; + } +} + +void Bmp8::FreeBmpBuffer(unsigned int **buff) { + if (buff != NULL) { + if (buff[0] != NULL) { + delete []buff[0]; + } + delete []buff; + } +} + +// init bmp buffers +unsigned char **Bmp8::CreateBmpBuffer(unsigned char init_val) { + unsigned char **buff; + + // Check valid sizes + if (!hgt_ || !wid_) + return NULL; + + // compute stride (align on 4 byte boundries) + stride_ = ((wid_ % 4) == 0) ? wid_ : (4 * (1 + (wid_ / 4))); + + buff = (unsigned char **) new unsigned char *[hgt_ * sizeof(*buff)]; + if (!buff) { + delete []buff; + return NULL; + } + + // alloc and init memory for buffer and line buffer + buff[0] = (unsigned char *) + new unsigned char[stride_ * hgt_ * sizeof(*buff[0])]; + if (!buff[0]) { + return NULL; + } + + memset(buff[0], init_val, stride_ * hgt_ * sizeof(*buff[0])); + + for (int y = 1; y < hgt_; y++) { + buff[y] = buff[y -1] + stride_; + } + + return buff; +} + +// init bmp buffers +unsigned int ** Bmp8::CreateBmpBuffer(int wid, int hgt, + unsigned char init_val) { + unsigned int **buff; + + // compute stride (align on 4 byte boundries) + buff = (unsigned int **) new unsigned int *[hgt * sizeof(*buff)]; + if (!buff) { + delete []buff; + return NULL; + } + + // alloc and init memory for buffer and line buffer + buff[0] = (unsigned int *) new unsigned int[wid * hgt * sizeof(*buff[0])]; + if (!buff[0]) { + return NULL; + } + + memset(buff[0], init_val, wid * hgt * sizeof(*buff[0])); + + for (int y = 1; y < hgt; y++) { + buff[y] = buff[y -1] + wid; + } + + return buff; +} + +// clears the contents of the bmp +bool Bmp8::Clear() { + if (line_buff_ == NULL) { + return false; + } + + memset(line_buff_[0], 0xff, stride_ * hgt_ * sizeof(*line_buff_[0])); + return true; +} + +bool Bmp8::LoadFromCharDumpFile(CachedFile *fp) { + unsigned short wid; + unsigned short hgt; + unsigned short x; + unsigned short y; + int buf_size; + int pix; + int pix_cnt; + unsigned int val32; + unsigned char *buff; + + // read and check 32 bit marker + if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) { + return false; + } + + if (val32 != kMagicNumber) { + return false; + } + + // read wid and hgt + if (fp->Read(&wid, sizeof(wid)) != sizeof(wid)) { + return false; + } + + if (fp->Read(&hgt, sizeof(hgt)) != sizeof(hgt)) { + return false; + } + + // read buf size + if (fp->Read(&buf_size, sizeof(buf_size)) != sizeof(buf_size)) { + return false; + } + + // validate buf size: for now, only 3 channel (RBG) is supported + pix_cnt = wid * hgt; + if (buf_size != (3 * pix_cnt)) { + return false; + } + + // alloc memory & read the 3 channel buffer + buff = new unsigned char[buf_size]; + if (buff == NULL) { + return false; + } + + if (fp->Read(buff, buf_size) != buf_size) { + delete []buff; + return false; + } + + // create internal buffers + wid_ = wid; + hgt_ = hgt; + + line_buff_ = CreateBmpBuffer(); + if (line_buff_ == NULL) { + delete []buff; + return false; + } + + // copy the data + for (y = 0, pix = 0; y < hgt_; y++) { + for (x = 0; x < wid_; x++, pix += 3) { + // for now we only support gray scale, + // so we expect R = G = B, it this is not the case, bail out + if (buff[pix] != buff[pix + 1] || buff[pix] != buff[pix + 2]) { + delete []buff; + return false; + } + line_buff_[y][x] = buff[pix]; + } + } + + // delete temp buffer + delete[]buff; + + return true; +} + +Bmp8 * Bmp8::FromCharDumpFile(CachedFile *fp) { + // create a Bmp8 object + Bmp8 *bmp_obj = new Bmp8(0, 0); + if (bmp_obj == NULL) { + return NULL; + } + + if (bmp_obj->LoadFromCharDumpFile(fp) == false) { + delete bmp_obj; + } + + return bmp_obj; +} + +bool Bmp8::LoadFromCharDumpFile(FILE *fp) { + unsigned short wid; + unsigned short hgt; + unsigned short x; + unsigned short y; + int buf_size; + int pix; + int pix_cnt; + unsigned int val32; + unsigned char *buff; + + // read and check 32 bit marker + if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { + return false; + } + + if (val32 != kMagicNumber) { + return false; + } + + // read wid and hgt + if (fread(&wid, 1, sizeof(wid), fp) != sizeof(wid)) { + return false; + } + + if (fread(&hgt, 1, sizeof(hgt), fp) != sizeof(hgt)) { + return false; + } + + // read buf size + if (fread(&buf_size, 1, sizeof(buf_size), fp) != sizeof(buf_size)) { + return false; + } + + // validate buf size: for now, only 3 channel (RBG) is supported + pix_cnt = wid * hgt; + if (buf_size != (3 * pix_cnt)) { + return false; + } + + // alloc memory & read the 3 channel buffer + buff = new unsigned char[buf_size]; + if (buff == NULL) { + return false; + } + + if (fread(buff, 1, buf_size, fp) != buf_size) { + delete []buff; + return false; + } + + // create internal buffers + wid_ = wid; + hgt_ = hgt; + + line_buff_ = CreateBmpBuffer(); + if (line_buff_ == NULL) { + delete []buff; + return false; + } + + // copy the data + for (y = 0, pix = 0; y < hgt_; y++) { + for (x = 0; x < wid_; x++, pix += 3) { + // for now we only support gray scale, + // so we expect R = G = B, it this is not the case, bail out + if ( buff[pix] != buff[pix + 1] || + buff[pix] != buff[pix + 2] + ) { + delete []buff; + return false; + } + + line_buff_[y][x] = buff[pix]; + } + } + + // delete temp buffer + delete[]buff; + + return true; +} + +Bmp8 * Bmp8::FromCharDumpFile(FILE *fp) { + // create a Bmp8 object + Bmp8 *bmp_obj = new Bmp8(0, 0); + if (bmp_obj == NULL) { + return NULL; + } + + if (bmp_obj->LoadFromCharDumpFile(fp) == false) { + delete bmp_obj; + } + + return bmp_obj; +} + +bool Bmp8::IsBlankColumn(int x) const { + for (int y = 0; y < hgt_; y++) { + if (line_buff_[y][x] != 0xff) { + return false; + } + } + + return true; +} + +bool Bmp8::IsBlankRow(int y) const { + for (int x = 0; x < wid_; x++) { + if (line_buff_[y][x] != 0xff) { + return false; + } + } + + return true; +} + +// crop the bitmap returning new dimensions +void Bmp8::Crop(int *xst, int *yst, int *wid, int *hgt) { + (*xst) = 0; + (*yst) = 0; + + int xend = wid_ - 1; + int yend = hgt_ - 1; + + while ((*xst) < (wid_ - 1) && (*xst) <= xend) { + // column is not empty + if (!IsBlankColumn((*xst))) { + break; + } + (*xst)++; + } + + while (xend > 0 && xend >= (*xst)) { + // column is not empty + if (!IsBlankColumn(xend)) { + break; + } + xend--; + } + + while ((*yst) < (hgt_ - 1) && (*yst) <= yend) { + // column is not empty + if (!IsBlankRow((*yst))) { + break; + } + (*yst)++; + } + + while (yend > 0 && yend >= (*yst)) { + // column is not empty + if (!IsBlankRow(yend)) { + break; + } + yend--; + } + + (*wid) = xend - (*xst) + 1; + (*hgt) = yend - (*yst) + 1; +} + +// generates a scaled bitmap with dimensions the new bmp will have the +// same aspect ratio and will be centered in the box +bool Bmp8::ScaleFrom(Bmp8 *bmp, bool isotropic) { + int x_num; + int x_denom; + int y_num; + int y_denom; + int xoff; + int yoff; + int xsrc; + int ysrc; + int xdest; + int ydest; + int xst_src = 0; + int yst_src = 0; + int xend_src = bmp->wid_ - 1; + int yend_src = bmp->hgt_ - 1; + int wid_src; + int hgt_src; + + // src dimensions + wid_src = xend_src - xst_src + 1, + hgt_src = yend_src - yst_src + 1; + + // scale to maintain aspect ratio if required + if (isotropic) { + if ((wid_ * hgt_src) > (hgt_ * wid_src)) { + x_num = y_num = hgt_; + x_denom = y_denom = hgt_src; + } else { + x_num = y_num = wid_; + x_denom = y_denom = wid_src; + } + } else { + x_num = wid_; + y_num = hgt_; + x_denom = wid_src; + y_denom = hgt_src; + } + + // compute offsets needed to center new bmp + xoff = (wid_ - ((x_num * wid_src) / x_denom)) / 2; + yoff = (hgt_ - ((y_num * hgt_src) / y_denom)) / 2; + + // scale up + if (y_num > y_denom) { + for (ydest = yoff; ydest < (hgt_ - yoff); ydest++) { + // compute un-scaled y + ysrc = static_cast(0.5 + (1.0 * (ydest - yoff) * + y_denom / y_num)); + if (ysrc < 0 || ysrc >= hgt_src) { + continue; + } + + for (xdest = xoff; xdest < (wid_ - xoff); xdest++) { + // compute un-scaled y + xsrc = static_cast(0.5 + (1.0 * (xdest - xoff) * + x_denom / x_num)); + if (xsrc < 0 || xsrc >= wid_src) { + continue; + } + + line_buff_[ydest][xdest] = + bmp->line_buff_[ysrc + yst_src][xsrc + xst_src]; + } + } + } else { + // or scale down + // scaling down is a bit tricky: we'll accumulate pixels + // and then compute the means + unsigned int **dest_line_buff = CreateBmpBuffer(wid_, hgt_, 0), + **dest_pix_cnt = CreateBmpBuffer(wid_, hgt_, 0); + + for (ysrc = 0; ysrc < hgt_src; ysrc++) { + // compute scaled y + ydest = yoff + static_cast(0.5 + (1.0 * ysrc * y_num / y_denom)); + if (ydest < 0 || ydest >= hgt_) { + continue; + } + + for (xsrc = 0; xsrc < wid_src; xsrc++) { + // compute scaled y + xdest = xoff + static_cast(0.5 + (1.0 * xsrc * x_num / x_denom)); + if (xdest < 0 || xdest >= wid_) { + continue; + } + + dest_line_buff[ydest][xdest] += + bmp->line_buff_[ysrc + yst_src][xsrc + xst_src]; + dest_pix_cnt[ydest][xdest]++; + } + } + + for (ydest = 0; ydest < hgt_; ydest++) { + for (xdest = 0; xdest < wid_; xdest++) { + if (dest_pix_cnt[ydest][xdest] > 0) { + unsigned int pixval = + dest_line_buff[ydest][xdest] / dest_pix_cnt[ydest][xdest]; + + line_buff_[ydest][xdest] = + (unsigned char) min((unsigned int)255, pixval); + } + } + } + + // we no longer need these temp buffers + FreeBmpBuffer(dest_line_buff); + FreeBmpBuffer(dest_pix_cnt); + } + + return true; +} + +bool Bmp8::LoadFromRawData(unsigned char *data) { + unsigned char *pline_data = data; + + // copy the data + for (int y = 0; y < hgt_; y++, pline_data += wid_) { + memcpy(line_buff_[y], pline_data, wid_ * sizeof(*pline_data)); + } + + return true; +} + +bool Bmp8::SaveBmp2CharDumpFile(FILE *fp) const { + unsigned short wid; + unsigned short hgt; + unsigned short x; + unsigned short y; + int buf_size; + int pix; + int pix_cnt; + unsigned int val32; + unsigned char *buff; + + // write and check 32 bit marker + val32 = kMagicNumber; + if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { + return false; + } + + // write wid and hgt + wid = wid_; + if (fwrite(&wid, 1, sizeof(wid), fp) != sizeof(wid)) { + return false; + } + + hgt = hgt_; + if (fwrite(&hgt, 1, sizeof(hgt), fp) != sizeof(hgt)) { + return false; + } + + // write buf size + pix_cnt = wid * hgt; + buf_size = 3 * pix_cnt; + if (fwrite(&buf_size, 1, sizeof(buf_size), fp) != sizeof(buf_size)) { + return false; + } + + // alloc memory & write the 3 channel buffer + buff = new unsigned char[buf_size]; + if (buff == NULL) { + return false; + } + + // copy the data + for (y = 0, pix = 0; y < hgt_; y++) { + for (x = 0; x < wid_; x++, pix += 3) { + buff[pix] = + buff[pix + 1] = + buff[pix + 2] = line_buff_[y][x]; + } + } + + if (fwrite(buff, 1, buf_size, fp) != buf_size) { + delete []buff; + return false; + } + + // delete temp buffer + delete[]buff; + + return true; +} + +// copy part of the specified bitmap to the top of the bitmap +// does any necessary clipping +void Bmp8::Copy(int x_st, int y_st, int wid, int hgt, Bmp8 *bmp_dest) const { + int x_end = min(x_st + wid, static_cast(wid_)), + y_end = min(y_st + hgt, static_cast(hgt_)); + + for (int y = y_st; y < y_end; y++) { + for (int x = x_st; x < x_end; x++) { + bmp_dest->line_buff_[y - y_st][x - x_st] = + line_buff_[y][x]; + } + } +} + +bool Bmp8::IsIdentical(Bmp8 *pBmp) const { + if (wid_ != pBmp->wid_ || hgt_ != pBmp->hgt_) { + return false; + } + + for (int y = 0; y < hgt_; y++) { + if (memcmp(line_buff_[y], pBmp->line_buff_[y], wid_) != 0) { + return false; + } + } + + return true; +} + +// Detect connected components in the bitmap +ConComp ** Bmp8::FindConComps(int *concomp_cnt, int min_size) const { + (*concomp_cnt) = 0; + + unsigned int **out_bmp_array = CreateBmpBuffer(wid_, hgt_, 0); + if (out_bmp_array == NULL) { + fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not allocate " + "bitmap array\n"); + return NULL; + } + + // listed of connected components + ConComp **concomp_array = NULL; + + int x; + int y; + int x_nbr; + int y_nbr; + int concomp_id; + int alloc_concomp_cnt = 0; + + // neighbors to check + const int nbr_cnt = 4; + + // relative coordinates of nbrs + int x_del[nbr_cnt] = {-1, 0, 1, -1}, + y_del[nbr_cnt] = {-1, -1, -1, 0}; + + + for (y = 0; y < hgt_; y++) { + for (x = 0; x < wid_; x++) { + // is this a foreground pix + if (line_buff_[y][x] != 0xff) { + int master_concomp_id = 0; + ConComp *master_concomp = NULL; + + // checkout the nbrs + for (int nbr = 0; nbr < nbr_cnt; nbr++) { + x_nbr = x + x_del[nbr]; + y_nbr = y + y_del[nbr]; + + if (x_nbr < 0 || y_nbr < 0 || x_nbr >= wid_ || y_nbr >= hgt_) { + continue; + } + + // is this nbr a foreground pix + if (line_buff_[y_nbr][x_nbr] != 0xff) { + // get its concomp ID + concomp_id = out_bmp_array[y_nbr][x_nbr]; + + // this should not happen + if (concomp_id < 1 || concomp_id > alloc_concomp_cnt) { + fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): illegal " + "connected component id: %d\n", concomp_id); + FreeBmpBuffer(out_bmp_array); + delete []concomp_array; + return NULL; + } + + // if we has previously found a component then merge the two + // and delete the latest one + if (master_concomp != NULL && concomp_id != master_concomp_id) { + // relabel all the pts + ConCompPt *pt_ptr = concomp_array[concomp_id - 1]->Head(); + while (pt_ptr != NULL) { + out_bmp_array[pt_ptr->y()][pt_ptr->x()] = master_concomp_id; + pt_ptr = pt_ptr->Next(); + } + + // merge the two concomp + if (!master_concomp->Merge(concomp_array[concomp_id - 1])) { + fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not " + "merge connected component: %d\n", concomp_id); + FreeBmpBuffer(out_bmp_array); + delete []concomp_array; + return NULL; + } + + // delete the merged concomp + delete concomp_array[concomp_id - 1]; + concomp_array[concomp_id - 1] = NULL; + } else { + // this is the first concomp we encounter + master_concomp_id = concomp_id; + master_concomp = concomp_array[master_concomp_id - 1]; + + out_bmp_array[y][x] = master_concomp_id; + + if (!master_concomp->Add(x, y)) { + fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not " + "add connected component (%d,%d)\n", x, y); + FreeBmpBuffer(out_bmp_array); + delete []concomp_array; + return NULL; + } + } + } // foreground nbr + } // nbrs + + // if there was no foreground pix, then create a new concomp + if (master_concomp == NULL) { + master_concomp = new ConComp(); + if (master_concomp == NULL || master_concomp->Add(x, y) == false) { + fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not " + "allocate or add a connected component\n"); + FreeBmpBuffer(out_bmp_array); + delete []concomp_array; + return NULL; + } + + // extend the list of concomps if needed + if ((alloc_concomp_cnt % kConCompAllocChunk) == 0) { + ConComp **temp_con_comp = + new ConComp *[alloc_concomp_cnt + kConCompAllocChunk]; + if (temp_con_comp == NULL) { + fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not " + "extend array of connected components\n"); + FreeBmpBuffer(out_bmp_array); + delete []concomp_array; + return NULL; + } + + if (alloc_concomp_cnt > 0) { + memcpy(temp_con_comp, concomp_array, + alloc_concomp_cnt * sizeof(*concomp_array)); + + delete []concomp_array; + } + + concomp_array = temp_con_comp; + } + + concomp_array[alloc_concomp_cnt++] = master_concomp; + out_bmp_array[y][x] = alloc_concomp_cnt; + } + } // foreground pix + } // x + } // y + + // free the concomp bmp + FreeBmpBuffer(out_bmp_array); + + if (alloc_concomp_cnt > 0 && concomp_array != NULL) { + // scan the array of connected components and color + // the o/p buffer with the corresponding concomps + (*concomp_cnt) = 0; + ConComp *concomp = NULL; + + for (int concomp_idx = 0; concomp_idx < alloc_concomp_cnt; concomp_idx++) { + concomp = concomp_array[concomp_idx]; + + // found a concomp + if (concomp != NULL) { + // add the connected component if big enough + if (concomp->PtCnt() > min_size) { + concomp->SetLeftMost(true); + concomp->SetRightMost(true); + concomp->SetID((*concomp_cnt)); + concomp_array[(*concomp_cnt)++] = concomp; + } else { + delete concomp; + } + } + } + } + + return concomp_array; +} + +// precompute the tan table to speedup deslanting +bool Bmp8::ComputeTanTable() { + int ang_idx; + float ang_val; + + // alloc memory for tan table + delete []tan_table_; + tan_table_ = new float[kDeslantAngleCount]; + if (tan_table_ == NULL) { + return false; + } + + for (ang_idx = 0, ang_val = kMinDeslantAngle; + ang_idx < kDeslantAngleCount; ang_idx++) { + tan_table_[ang_idx] = tan(ang_val * M_PI / 180.0f); + ang_val += kDeslantAngleDelta; + } + + return true; +} + +// generates a deslanted bitmap from the passed bitmap. +bool Bmp8::Deslant() { + int x; + int y; + int des_x; + int des_y; + int ang_idx; + int best_ang; + int min_des_x; + int max_des_x; + int des_wid; + + // only do deslanting if bitmap is wide enough + // otherwise it slant estimate might not be reliable + if (wid_ < (hgt_ * 2)) { + return true; + } + + // compute tan table if needed + if (tan_table_ == NULL && !ComputeTanTable()) { + return false; + } + + // compute min and max values for x after deslant + min_des_x = static_cast(0.5f + (hgt_ - 1) * tan_table_[0]); + max_des_x = (wid_ - 1) + + static_cast(0.5f + (hgt_ - 1) * tan_table_[kDeslantAngleCount - 1]); + + des_wid = max_des_x - min_des_x + 1; + + // alloc memory for histograms + int **angle_hist = new int*[kDeslantAngleCount]; + for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) { + angle_hist[ang_idx] = new int[des_wid]; + if (angle_hist[ang_idx] == NULL) { + delete[] angle_hist; + return false; + } + memset(angle_hist[ang_idx], 0, des_wid * sizeof(*angle_hist[ang_idx])); + } + + // compute histograms + for (y = 0; y < hgt_; y++) { + for (x = 0; x < wid_; x++) { + // find a non-bkgrnd pixel + if (line_buff_[y][x] != 0xff) { + des_y = hgt_ - y - 1; + // stamp all histograms + for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) { + des_x = x + static_cast(0.5f + (des_y * tan_table_[ang_idx])); + if (des_x >= min_des_x && des_x <= max_des_x) { + angle_hist[ang_idx][des_x - min_des_x]++; + } + } + } + } + } + + // find the histogram with the lowest entropy + float entropy; + double best_entropy = 0.0f; + double norm_val; + + best_ang = -1; + for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) { + entropy = 0.0f; + + for (x = min_des_x; x <= max_des_x; x++) { + if (angle_hist[ang_idx][x - min_des_x] > 0) { + norm_val = (1.0f * angle_hist[ang_idx][x - min_des_x] / hgt_); + entropy += (-1.0f * norm_val * log(norm_val)); + } + } + + if (best_ang == -1 || entropy < best_entropy) { + best_ang = ang_idx; + best_entropy = entropy; + } + + // free the histogram + delete[] angle_hist[ang_idx]; + } + delete[] angle_hist; + + // deslant + if (best_ang != -1) { + unsigned char **dest_lines; + int old_wid = wid_; + + // create a new buffer + wid_ = des_wid; + dest_lines = CreateBmpBuffer(); + if (dest_lines == NULL) { + return false; + } + + for (y = 0; y < hgt_; y++) { + for (x = 0; x < old_wid; x++) { + // find a non-bkgrnd pixel + if (line_buff_[y][x] != 0xff) { + des_y = hgt_ - y - 1; + // compute new pos + des_x = x + static_cast(0.5f + (des_y * tan_table_[best_ang])); + dest_lines[y][des_x - min_des_x] = 0; + } + } + } + + // free old buffer + FreeBmpBuffer(line_buff_); + line_buff_ = dest_lines; + } + return true; +} + +// Load dimensions & contents of bitmap from raw data +bool Bmp8::LoadFromCharDumpFile(unsigned char **raw_data_ptr) { + unsigned short wid; + unsigned short hgt; + unsigned short x; + unsigned short y; + unsigned char *raw_data = (*raw_data_ptr); + int buf_size; + int pix; + unsigned int val32; + + // read and check 32 bit marker + memcpy(&val32, raw_data, sizeof(val32)); + raw_data += sizeof(val32); + + if (val32 != kMagicNumber) { + return false; + } + + // read wid and hgt + memcpy(&wid, raw_data, sizeof(wid)); + raw_data += sizeof(wid); + + memcpy(&hgt, raw_data, sizeof(hgt)); + raw_data += sizeof(hgt); + + // read buf size + memcpy(&buf_size, raw_data, sizeof(buf_size)); + raw_data += sizeof(buf_size); + + // validate buf size: for now, only 3 channel (RBG) is supported + if (buf_size != (3 * wid * hgt)) { + return false; + } + + wid_ = wid; + hgt_ = hgt; + + line_buff_ = CreateBmpBuffer(); + if (line_buff_ == NULL) { + return false; + } + + // copy the data + for (y = 0, pix = 0; y < hgt_; y++) { + for (x = 0; x < wid_; x++, pix += 3) { + // for now we only support gray scale, + // so we expect R = G = B, it this is not the case, bail out + if (raw_data[pix] != raw_data[pix + 1] || + raw_data[pix] != raw_data[pix + 2]) { + return false; + } + + line_buff_[y][x] = raw_data[pix]; + } + } + + (*raw_data_ptr) = raw_data + buf_size; + return true; +} + +float Bmp8::ForegroundRatio() const { + int fore_cnt = 0; + + if (wid_ <= 0 || hgt_ <= 0) { + return 1.0; + } + + for (int y = 0; y < hgt_; y++) { + for (int x = 0; x < wid_; x++) { + fore_cnt += (line_buff_[y][x] == 0xff ? 0 : 1); + } + } + + return (1.0 * (fore_cnt / hgt_) / wid_); +} + +// generates a deslanted bitmap from the passed bitmap +bool Bmp8::HorizontalDeslant(double *deslant_angle) { + int x; + int y; + int des_y; + int ang_idx; + int best_ang; + int min_des_y; + int max_des_y; + int des_hgt; + + // compute tan table if necess. + if (tan_table_ == NULL && !ComputeTanTable()) { + return false; + } + + // compute min and max values for x after deslant + min_des_y = min(0, static_cast((wid_ - 1) * tan_table_[0])); + max_des_y = (hgt_ - 1) + + max(0, static_cast((wid_ - 1) * tan_table_[kDeslantAngleCount - 1])); + + des_hgt = max_des_y - min_des_y + 1; + + // alloc memory for histograms + int **angle_hist = new int*[kDeslantAngleCount]; + for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) { + angle_hist[ang_idx] = new int[des_hgt]; + if (angle_hist[ang_idx] == NULL) { + delete[] angle_hist; + return false; + } + memset(angle_hist[ang_idx], 0, des_hgt * sizeof(*angle_hist[ang_idx])); + } + + // compute histograms + for (y = 0; y < hgt_; y++) { + for (x = 0; x < wid_; x++) { + // find a non-bkgrnd pixel + if (line_buff_[y][x] != 0xff) { + // stamp all histograms + for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) { + des_y = y - static_cast(x * tan_table_[ang_idx]); + if (des_y >= min_des_y && des_y <= max_des_y) { + angle_hist[ang_idx][des_y - min_des_y]++; + } + } + } + } + } + + // find the histogram with the lowest entropy + float entropy; + float best_entropy = 0.0f; + float norm_val; + + best_ang = -1; + for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) { + entropy = 0.0f; + + for (y = min_des_y; y <= max_des_y; y++) { + if (angle_hist[ang_idx][y - min_des_y] > 0) { + norm_val = (1.0f * angle_hist[ang_idx][y - min_des_y] / wid_); + entropy += (-1.0f * norm_val * log(norm_val)); + } + } + + if (best_ang == -1 || entropy < best_entropy) { + best_ang = ang_idx; + best_entropy = entropy; + } + + // free the histogram + delete[] angle_hist[ang_idx]; + } + delete[] angle_hist; + + (*deslant_angle) = 0.0; + + // deslant + if (best_ang != -1) { + unsigned char **dest_lines; + int old_hgt = hgt_; + + // create a new buffer + min_des_y = min(0, static_cast((wid_ - 1) * -tan_table_[best_ang])); + max_des_y = (hgt_ - 1) + + max(0, static_cast((wid_ - 1) * -tan_table_[best_ang])); + hgt_ = max_des_y - min_des_y + 1; + dest_lines = CreateBmpBuffer(); + if (dest_lines == NULL) { + return false; + } + + for (y = 0; y < old_hgt; y++) { + for (x = 0; x < wid_; x++) { + // find a non-bkgrnd pixel + if (line_buff_[y][x] != 0xff) { + // compute new pos + des_y = y - static_cast((x * tan_table_[best_ang])); + dest_lines[des_y - min_des_y][x] = 0; + } + } + } + + // free old buffer + FreeBmpBuffer(line_buff_); + line_buff_ = dest_lines; + + (*deslant_angle) = kMinDeslantAngle + (best_ang * kDeslantAngleDelta); + } + + return true; +} + +float Bmp8::MeanHorizontalHistogramEntropy() const { + float entropy = 0.0f; + + // compute histograms + for (int y = 0; y < hgt_; y++) { + int pix_cnt = 0; + + for (int x = 0; x < wid_; x++) { + // find a non-bkgrnd pixel + if (line_buff_[y][x] != 0xff) { + pix_cnt++; + } + } + + if (pix_cnt > 0) { + float norm_val = (1.0f * pix_cnt / wid_); + entropy += (-1.0f * norm_val * log(norm_val)); + } + } + + return entropy / hgt_; +} + +int *Bmp8::HorizontalHistogram() const { + int *hist = new int[hgt_]; + if (hist == NULL) { + return NULL; + } + + // compute histograms + for (int y = 0; y < hgt_; y++) { + hist[y] = 0; + + for (int x = 0; x < wid_; x++) { + // find a non-bkgrnd pixel + if (line_buff_[y][x] != 0xff) { + hist[y]++; + } + } + } + + return hist; +} + +} // namespace tesseract diff --git a/cube/bmp_8.h b/cube/bmp_8.h new file mode 100644 index 0000000000..7200d7da82 --- /dev/null +++ b/cube/bmp_8.h @@ -0,0 +1,122 @@ +/********************************************************************** + * File: bmp_8.h + * Description: Declaration of an 8-bit Bitmap class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef BMP8_H +#define BMP8_H + +// The Bmp8 class is an 8-bit bitmap that represents images of +// words, characters and segments throughout Cube +// It is meant to provide fast access to the bitmap bits and provide +// fast scaling, cropping, deslanting, connected components detection, +// loading and saving functionality + +#include +#include +#include "con_comp.h" +#include "cached_file.h" + +namespace tesseract { + +// Non-integral deslanting parameters. +static const float kMinDeslantAngle = -30.0f; +static const float kMaxDeslantAngle = 30.0f; +static const float kDeslantAngleDelta = 0.5f; + +class Bmp8 { + public: + Bmp8(unsigned short wid, unsigned short hgt); + ~Bmp8(); + // Clears the bitmap + bool Clear(); + // accessors to bitmap dimensions + inline unsigned short Width() const { return wid_; } + inline unsigned short Stride() const { return stride_; } + inline unsigned short Height() const { return hgt_; } + inline unsigned char *RawData() const { + return (line_buff_ == NULL ? NULL : line_buff_[0]); + } + // creates a scaled version of the specified bitmap + // Optionally, scaling can be isotropic (preserving aspect ratio) or not + bool ScaleFrom(Bmp8 *bmp, bool isotropic = true); + // Deslant the bitmap vertically + bool Deslant(); + // Deslant the bitmap horizontally + bool HorizontalDeslant(double *deslant_angle); + // Create a bitmap object from a file + static Bmp8 *FromCharDumpFile(CachedFile *fp); + static Bmp8 *FromCharDumpFile(FILE *fp); + // are two bitmaps identical + bool IsIdentical(Bmp8 *pBmp) const; + // Detect connected components + ConComp ** FindConComps(int *concomp_cnt, int min_size) const; + // compute the foreground ratio + float ForegroundRatio() const; + // returns the mean horizontal histogram entropy of the bitmap + float MeanHorizontalHistogramEntropy() const; + // returns the horizontal histogram of the bitmap + int *HorizontalHistogram() const; + + private: + // Compute a look up tan table that will be used for fast slant computation + static bool ComputeTanTable(); + // create a bitmap buffer (two flavors char & int) and init contents + unsigned char ** CreateBmpBuffer(unsigned char init_val = 0xff); + static unsigned int ** CreateBmpBuffer(int wid, int hgt, + unsigned char init_val = 0xff); + // Free a bitmap buffer + static void FreeBmpBuffer(unsigned char **buff); + static void FreeBmpBuffer(unsigned int **buff); + + // a static array that holds the tan lookup table + static float *tan_table_; + // bitmap 32-bit-aligned stride + unsigned short stride_; + // Bmp8 magic number used to validate saved bitmaps + static const unsigned int kMagicNumber = 0xdeadbeef; + + protected: + // bitmap dimensions + unsigned short wid_; + unsigned short hgt_; + // bitmap contents + unsigned char **line_buff_; + // deslanting parameters + static const int kConCompAllocChunk = 16; + static const int kDeslantAngleCount; + + // Load dimensions & contents of bitmap from file + bool LoadFromCharDumpFile(CachedFile *fp); + bool LoadFromCharDumpFile(FILE *fp); + // Load dimensions & contents of bitmap from raw data + bool LoadFromCharDumpFile(unsigned char **raw_data); + // Load contents of bitmap from raw data + bool LoadFromRawData(unsigned char *data); + // save bitmap to a file + bool SaveBmp2CharDumpFile(FILE *fp) const; + // checks if a row or a column are entirely blank + bool IsBlankColumn(int x) const; + bool IsBlankRow(int y) const; + // crop the bitmap returning new dimensions + void Crop(int *xst_src, int *yst_src, int *wid, int *hgt); + // copy part of the specified bitmap + void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const; +}; +} + +#endif // BMP8_H diff --git a/cube/cached_file.cpp b/cube/cached_file.cpp new file mode 100644 index 0000000000..a482d50e7d --- /dev/null +++ b/cube/cached_file.cpp @@ -0,0 +1,150 @@ +/********************************************************************** + * File: cached_file.pp + * Description: Implementation of an Cached File Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include "cached_file.h" + +namespace tesseract { + +CachedFile::CachedFile(string file_name) { + file_name_ = file_name; + buff_ = NULL; + buff_pos_ = 0; + buff_size_ = 0; + file_pos_ = 0; + file_size_ = 0; + fp_ = NULL; +} + +CachedFile::~CachedFile() { + if (fp_ != NULL) { + fclose(fp_); + fp_ = NULL; + } + + if (buff_ != NULL) { + delete []buff_; + buff_ = NULL; + } +} + +// free buffers and init vars +bool CachedFile::Open() { + if (fp_ != NULL) { + return true; + } + + fp_ = fopen(file_name_.c_str(), "r"); + if (fp_ == NULL) { + return false; + } + + // seek to the end + fseek(fp_, 0, SEEK_END); + // get file size + file_size_ = ftell(fp_); + if (file_size_ < 1) { + return false; + } + // rewind again + rewind(fp_); + // alloc memory for buffer + buff_ = new unsigned char[kCacheSize]; + if (buff_ == NULL) { + return false; + } + // init counters + buff_size_ = 0; + buff_pos_ = 0; + file_pos_ = 0; + return true; +} + +// add a new sample +int CachedFile::Read(void *read_buff, int bytes) { + int read_bytes = 0; + unsigned char *buff = (unsigned char *)read_buff; + + // do we need to read beyond the buffer + if ((buff_pos_ + bytes) > buff_size_) { + // copy as much bytes from the current buffer if any + int copy_bytes = buff_size_ - buff_pos_; + + if (copy_bytes > 0) { + memcpy(buff, buff_ + buff_pos_, copy_bytes); + buff += copy_bytes; + bytes -= copy_bytes; + read_bytes += copy_bytes; + } + + // determine how much to read + buff_size_ = kCacheSize; + + if ((file_pos_ + buff_size_) > file_size_) { + buff_size_ = static_cast(file_size_ - file_pos_); + } + + // EOF ? + if (buff_size_ <= 0 || bytes > buff_size_) { + return read_bytes; + } + + // read the first chunck + if (fread(buff_, 1, buff_size_, fp_) != buff_size_) { + return read_bytes; + } + + buff_pos_ = 0; + file_pos_ += buff_size_; + } + + memcpy(buff, buff_ + buff_pos_, bytes); + read_bytes += bytes; + buff_pos_ += bytes; + + return read_bytes; +} + +long CachedFile::Size() { + if (fp_ == NULL && Open() == false) { + return 0; + } + + return file_size_; +} + +long CachedFile::Tell() { + if (fp_ == NULL && Open() == false) { + return 0; + } + + return file_pos_ - buff_size_ + buff_pos_; +} + +bool CachedFile::eof() { + if (fp_ == NULL && Open() == false) { + return true; + } + + return (file_pos_ - buff_size_ + buff_pos_) >= file_size_; +} + +} // namespace tesseract diff --git a/cube/cached_file.h b/cube/cached_file.h new file mode 100644 index 0000000000..eb671970dd --- /dev/null +++ b/cube/cached_file.h @@ -0,0 +1,69 @@ +/********************************************************************** + * File: cached_file.h + * Description: Declaration of a Cached File class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef CACHED_FILE_H +#define CACHED_FILE_H + +// The CachedFile class provides a large-cache read access to a file +// It is mainly designed for loading large word dump files + +#include +#include +#ifdef USE_STD_NAMESPACE +using std::string; +#endif + +namespace tesseract { +class CachedFile { + public: + explicit CachedFile(string file_name); + ~CachedFile(); + + // reads a specified number of bytes to the specified buffer and + // returns the actual number of bytes read + int Read(void *read_buff, int bytes); + // Returns the file size + long Size(); + // returns the current position in the file + long Tell(); + // End of file flag + bool eof(); + + private: + static const unsigned int kCacheSize = 0x8000000; + // file name + string file_name_; + // internal file buffer + unsigned char *buff_; + // file position + long file_pos_; + // file size + long file_size_; + // position of file within buffer + int buff_pos_; + // buffer size + int buff_size_; + // file handle + FILE *fp_; + // Opens the file + bool Open(); +}; +} + +#endif // CACHED_FILE_H diff --git a/cube/char_altlist.cpp b/cube/char_altlist.cpp new file mode 100644 index 0000000000..c0e7776ef2 --- /dev/null +++ b/cube/char_altlist.cpp @@ -0,0 +1,115 @@ +/********************************************************************** + * File: char_altlist.cpp + * Description: Implementation of a Character Alternate List Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "char_altlist.h" + +namespace tesseract { + +// The CharSet is not class owned and must exist for +// the life time of this class +CharAltList::CharAltList(const CharSet *char_set, int max_alt) + : AltList(max_alt) { + char_set_ = char_set; + max_alt_ = max_alt; + class_id_alt_ = NULL; + class_id_cost_ = NULL; +} + +CharAltList::~CharAltList() { + if (class_id_alt_ != NULL) { + delete []class_id_alt_; + class_id_alt_ = NULL; + } + + if (class_id_cost_ != NULL) { + delete []class_id_cost_; + class_id_cost_ = NULL; + } +} + +// Insert a new char alternate +bool CharAltList::Insert(int class_id, int cost, void *tag) { + // validate class ID + if (class_id < 0 || class_id >= char_set_->ClassCount()) { + return false; + } + + // allocate buffers if nedded + if (class_id_alt_ == NULL || alt_cost_ == NULL) { + class_id_alt_ = new int[max_alt_]; + alt_cost_ = new int[max_alt_]; + alt_tag_ = new void *[max_alt_]; + + if (class_id_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) { + return false; + } + + memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_)); + } + + if (class_id_cost_ == NULL) { + int class_cnt = char_set_->ClassCount(); + + class_id_cost_ = new int[class_cnt]; + if (class_id_cost_ == NULL) { + return false; + } + + for (int ich = 0; ich < class_cnt; ich++) { + class_id_cost_[ich] = WORST_COST; + } + } + + if (class_id < 0 || class_id >= char_set_->ClassCount()) { + return false; + } + + // insert the alternate + class_id_alt_[alt_cnt_] = class_id; + alt_cost_[alt_cnt_] = cost; + alt_tag_[alt_cnt_] = tag; + + alt_cnt_++; + + class_id_cost_[class_id] = cost; + + return true; +} + +// sort the alternate Desc. based on prob +void CharAltList::Sort() { + for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { + for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) { + if (alt_cost_[alt_idx] > alt_cost_[alt]) { + int temp = class_id_alt_[alt_idx]; + class_id_alt_[alt_idx] = class_id_alt_[alt]; + class_id_alt_[alt] = temp; + + temp = alt_cost_[alt_idx]; + alt_cost_[alt_idx] = alt_cost_[alt]; + alt_cost_[alt] = temp; + + void *tag = alt_tag_[alt_idx]; + alt_tag_[alt_idx] = alt_tag_[alt]; + alt_tag_[alt] = tag; + } + } + } +} +} diff --git a/cube/char_altlist.h b/cube/char_altlist.h new file mode 100644 index 0000000000..ef3a083a63 --- /dev/null +++ b/cube/char_altlist.h @@ -0,0 +1,70 @@ +/********************************************************************** + * File: char_altlist.h + * Description: Declaration of a Character Alternate List Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef CHAR_ALT_LIST_H +#define CHAR_ALT_LIST_H + +// The CharAltList class holds the list of class alternates returned from +// a character classifier. Each alternate represents a class ID. +// It inherits from the AltList class. +// The CharAltList owns a CharSet object that maps a class-id to a string. + +#include "altlist.h" +#include "char_set.h" + +namespace tesseract { +class CharAltList : public AltList { + public: + CharAltList(const CharSet *char_set, int max_alt = kMaxCharAlt); + ~CharAltList(); + + // Sort the alternate list based on cost + void Sort(); + // insert a new alternate with the specified class-id, cost and tag + bool Insert(int class_id, int cost, void *tag = NULL); + // returns the cost of a specific class ID + inline int ClassCost(int class_id) const { + if (class_id_cost_ == NULL || + class_id < 0 || + class_id >= char_set_->ClassCount()) { + return WORST_COST; + } + return class_id_cost_[class_id]; + } + // returns the alternate class-id corresponding to an alternate index + inline int Alt(int alt_idx) const { return class_id_alt_[alt_idx]; } + // set the cost of a certain alternate + void SetAltCost(int alt_idx, int cost) { + alt_cost_[alt_idx] = cost; + class_id_cost_[class_id_alt_[alt_idx]] = cost; + } + + private: + // character set object. Passed at construction time + const CharSet *char_set_; + // array of alternate class-ids + int *class_id_alt_; + // array of alternate costs + int *class_id_cost_; + // default max count of alternates + static const int kMaxCharAlt = 256; +}; +} + +#endif // CHAR_ALT_LIST_H diff --git a/cube/char_bigrams.cpp b/cube/char_bigrams.cpp new file mode 100644 index 0000000000..443daf4dd8 --- /dev/null +++ b/cube/char_bigrams.cpp @@ -0,0 +1,206 @@ +/********************************************************************** + * File: char_bigrams.cpp + * Description: Implementation of a Character Bigrams Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include + +#include "char_bigrams.h" +#include "cube_utils.h" +#include "ndminx.h" +#include "unicharset.h" + +namespace tesseract { + +CharBigrams::CharBigrams() { + memset(&bigram_table_, 0, sizeof(bigram_table_)); +} + +CharBigrams::~CharBigrams() { + if (bigram_table_.char_bigram != NULL) { + for (int ch1 = 0; ch1 <= bigram_table_.max_char; ch1++) { + CharBigram *char_bigram = bigram_table_.char_bigram + ch1; + + if (char_bigram->bigram != NULL) { + delete []char_bigram->bigram; + } + } + delete []bigram_table_.char_bigram; + } +} + +CharBigrams *CharBigrams::Create(const string &data_file_path, + const string &lang) { + string file_name; + string str; + + file_name = data_file_path + lang; + file_name += ".cube.bigrams"; + + // load the string into memory + if (!CubeUtils::ReadFileToString(file_name, &str)) { + return NULL; + } + + // construct a new object + CharBigrams *char_bigrams_obj = new CharBigrams(); + if (char_bigrams_obj == NULL) { + fprintf(stderr, "Cube ERROR (CharBigrams::Create): could not create " + "character bigrams object.\n"); + return NULL; + } + CharBigramTable *table = &char_bigrams_obj->bigram_table_; + + table->total_cnt = 0; + table->max_char = -1; + table->char_bigram = NULL; + + // split into lines + vector str_vec; + CubeUtils::SplitStringUsing(str, "\r\n", &str_vec); + + for (int big = 0; big < str_vec.size(); big++) { + char_32 ch1; + char_32 ch2; + int cnt; + if (sscanf(str_vec[big].c_str(), "%d %x %x", &cnt, &ch1, &ch2) != 3) { + fprintf(stderr, "Cube ERROR (CharBigrams::Create): invalid format " + "reading line: %s\n", str_vec[big].c_str()); + return NULL; + } + + // expand the bigram table + if (ch1 > table->max_char) { + CharBigram *char_bigram = new CharBigram[ch1 + 1]; + if (char_bigram == NULL) { + fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating " + "additional memory for character bigram table.\n"); + return NULL; + } + + if (table->char_bigram != NULL && table->max_char >= 0) { + memcpy(char_bigram, table->char_bigram, + (table->max_char + 1) * sizeof(*char_bigram)); + + delete []table->char_bigram; + } + table->char_bigram = char_bigram; + + // init + for (int new_big = table->max_char + 1; new_big <= ch1; new_big++) { + table->char_bigram[new_big].total_cnt = 0; + table->char_bigram[new_big].max_char = -1; + table->char_bigram[new_big].bigram = NULL; + } + table->max_char = ch1; + } + + if (ch2 > table->char_bigram[ch1].max_char) { + Bigram *bigram = new Bigram[ch2 + 1]; + if (bigram == NULL) { + fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating " + "memory for bigram.\n"); + return NULL; + } + + if (table->char_bigram[ch1].bigram != NULL && + table->char_bigram[ch1].max_char >= 0) { + memcpy(bigram, table->char_bigram[ch1].bigram, + (table->char_bigram[ch1].max_char + 1) * sizeof(*bigram)); + delete []table->char_bigram[ch1].bigram; + } + table->char_bigram[ch1].bigram = bigram; + + // init + for (int new_big = table->char_bigram[ch1].max_char + 1; + new_big <= ch2; new_big++) { + table->char_bigram[ch1].bigram[new_big].cnt = 0; + } + table->char_bigram[ch1].max_char = ch2; + } + + table->char_bigram[ch1].bigram[ch2].cnt = cnt; + table->char_bigram[ch1].total_cnt += cnt; + table->total_cnt += cnt; + } + + // compute costs (-log probs) + table->worst_cost = static_cast( + -PROB2COST_SCALE * log(0.5 / table->total_cnt)); + for (char_32 ch1 = 0; ch1 <= table->max_char; ch1++) { + for (char_32 ch2 = 0; ch2 <= table->char_bigram[ch1].max_char; ch2++) { + int cnt = table->char_bigram[ch1].bigram[ch2].cnt; + table->char_bigram[ch1].bigram[ch2].cost = + static_cast(-PROB2COST_SCALE * + log(MAX(0.5, static_cast(cnt)) / + table->total_cnt)); + } + } + return char_bigrams_obj; +} + +int CharBigrams::PairCost(char_32 ch1, char_32 ch2) const { + if (ch1 > bigram_table_.max_char) { + return bigram_table_.worst_cost; + } + if (ch2 > bigram_table_.char_bigram[ch1].max_char) { + return bigram_table_.worst_cost; + } + return bigram_table_.char_bigram[ch1].bigram[ch2].cost; +} + +int CharBigrams::Cost(const char_32 *char_32_ptr, CharSet *char_set, + UNICHARSET *unicharset) const { + if (!char_32_ptr || char_32_ptr[0] == 0) { + return bigram_table_.worst_cost; + } + int cost = MeanCostWithSpaces(char_32_ptr); + if (CubeUtils::StrLen(char_32_ptr) >= kMinLengthCaseInvariant && + CubeUtils::IsCaseInvariant(char_32_ptr, char_set, unicharset)) { + char_32 *lower_32 = CubeUtils::ToLower(char_32_ptr, char_set, unicharset); + if (lower_32 && lower_32[0] != 0) { + int cost_lower = MeanCostWithSpaces(lower_32); + cost = MIN(cost, cost_lower); + delete [] lower_32; + } + char_32 *upper_32 = CubeUtils::ToUpper(char_32_ptr, char_set, unicharset); + if (upper_32 && upper_32[0] != 0) { + int cost_upper = MeanCostWithSpaces(upper_32); + cost = MIN(cost, cost_upper); + delete [] upper_32; + } + } + return cost; +} + +int CharBigrams::MeanCostWithSpaces(const char_32 *char_32_ptr) const { + if (!char_32_ptr) + return bigram_table_.worst_cost; + int len = CubeUtils::StrLen(char_32_ptr); + int cost = 0; + int c = 0; + cost = PairCost(' ', char_32_ptr[0]); + for (c = 1; c < len; c++) { + cost += PairCost(char_32_ptr[c - 1], char_32_ptr[c]); + } + cost += PairCost(char_32_ptr[len - 1], ' '); + return static_cast(cost / static_cast(len + 1)); +} +} // namespace tesseract diff --git a/cube/char_bigrams.h b/cube/char_bigrams.h new file mode 100644 index 0000000000..ae6ba8e21f --- /dev/null +++ b/cube/char_bigrams.h @@ -0,0 +1,90 @@ +/********************************************************************** + * File: char_bigrams.h + * Description: Declaration of a Character Bigrams Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CharBigram class represents the interface to the character bigram +// table used by Cube +// A CharBigram object can be constructed from the Char Bigrams file +// Given a sequence of characters, the "Cost" method returns the Char Bigram +// cost of the string according to the table + +#ifndef CHAR_BIGRAMS_H +#define CHAR_BIGRAMS_H + +#include +#include "char_set.h" + +namespace tesseract { + +// structure representing a single bigram value +struct Bigram { + int cnt; + int cost; +}; + +// structure representing the char bigram array of characters +// following a specific character +struct CharBigram { + int total_cnt; + char_32 max_char; + Bigram *bigram; +}; + +// structure representing the whole bigram table +struct CharBigramTable { + int total_cnt; + int worst_cost; + char_32 max_char; + CharBigram *char_bigram; +}; + +class CharBigrams { + public: + CharBigrams(); + ~CharBigrams(); + // Construct the CharBigrams class from a file + static CharBigrams *Create(const string &data_file_path, + const string &lang); + // Top-level function to return the mean character bigram cost of a + // sequence of characters. If char_set and unicharset are not NULL + // and cube and tesseract share the same unicharset, use + // tesseract functions to return a case-invariant cost. + // This avoids unnecessarily penalizing all-one-case words or + // capitalized words (first-letter upper-case and remaining letters + // lower-case). + int Cost(const char_32 *str, CharSet *char_set, UNICHARSET *unicharset) const; + + protected: + // Returns the character bigram cost of two characters. + int PairCost(char_32 ch1, char_32 ch2) const; + // Returns the mean character bigram cost of a sequence of + // characters. Adds a space at the beginning and end to account for + // cost of starting and ending characters. + int MeanCostWithSpaces(const char_32 *char_32_ptr) const; + + private: + // Only words this length or greater qualify for case-invariant character + // bigram cost. + static const int kMinLengthCaseInvariant = 4; + + + CharBigramTable bigram_table_; +}; +} + +#endif // CHAR_BIGRAMS_H diff --git a/cube/char_samp.cpp b/cube/char_samp.cpp new file mode 100644 index 0000000000..80e29b74bd --- /dev/null +++ b/cube/char_samp.cpp @@ -0,0 +1,679 @@ +/********************************************************************** + * File: char_samp.cpp + * Description: Implementation of a Character Bitmap Sample Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include "char_samp.h" +#include "cube_utils.h" + +namespace tesseract { + +#define MAX_LINE_LEN 1024 + +CharSamp::CharSamp() + : Bmp8(0, 0) { + left_ = 0; + top_ = 0; + label32_ = NULL; + page_ = -1; +} + +CharSamp::CharSamp(int wid, int hgt) + : Bmp8(wid, hgt) { + left_ = 0; + top_ = 0; + label32_ = NULL; + page_ = -1; +} + +CharSamp::CharSamp(int left, int top, int wid, int hgt) + : Bmp8(wid, hgt) + , left_(left) + , top_(top) { + label32_ = NULL; + page_ = -1; +} + +CharSamp::~CharSamp() { + if (label32_ != NULL) { + delete []label32_; + label32_ = NULL; + } +} + +// returns a UTF-8 version of the string label +string CharSamp::stringLabel() const { + string str = ""; + if (label32_ != NULL) { + string_32 str32(label32_); + CubeUtils::UTF32ToUTF8(str32.c_str(), &str); + } + return str; +} + +// set a the string label using a UTF encoded string +void CharSamp::SetLabel(string str) { + if (label32_ != NULL) { + delete []label32_; + label32_ = NULL; + } + string_32 str32; + CubeUtils::UTF8ToUTF32(str.c_str(), &str32); + SetLabel(reinterpret_cast(str32.c_str())); +} + +// creates a CharSamp object from file +CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) { + unsigned short left; + unsigned short top; + unsigned short page; + unsigned short first_char; + unsigned short last_char; + unsigned short norm_top; + unsigned short norm_bottom; + unsigned short norm_aspect_ratio; + unsigned int val32; + + char_32 *label32; + + // read and check 32 bit marker + if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) { + return NULL; + } + if (val32 != 0xabd0fefe) { + return NULL; + } + // read label length, + if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) { + return NULL; + } + // the label is not null terminated in the file + if (val32 > 0) { + label32 = new char_32[val32 + 1]; + if (label32 == NULL) { + return NULL; + } + // read label + if (fp->Read(label32, val32 * sizeof(*label32)) != + (val32 * sizeof(*label32))) { + return NULL; + } + // null terminate + label32[val32] = 0; + } else { + label32 = NULL; + } + // read coordinates + if (fp->Read(&page, sizeof(page)) != sizeof(page)) { + return NULL; + } + if (fp->Read(&left, sizeof(left)) != sizeof(left)) { + return NULL; + } + if (fp->Read(&top, sizeof(top)) != sizeof(top)) { + return NULL; + } + if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) { + return NULL; + } + if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) { + return NULL; + } + if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) { + return NULL; + } + if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) { + return NULL; + } + if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) != + sizeof(norm_aspect_ratio)) { + return NULL; + } + // create the object + CharSamp *char_samp = new CharSamp(); + if (char_samp == NULL) { + return NULL; + } + // init + char_samp->label32_ = label32; + char_samp->page_ = page; + char_samp->left_ = left; + char_samp->top_ = top; + char_samp->first_char_ = first_char; + char_samp->last_char_ = last_char; + char_samp->norm_top_ = norm_top; + char_samp->norm_bottom_ = norm_bottom; + char_samp->norm_aspect_ratio_ = norm_aspect_ratio; + // load the Bmp8 part + if (char_samp->LoadFromCharDumpFile(fp) == false) { + delete char_samp; + return NULL; + } + return char_samp; +} + +// Load a Char Samp from a dump file +CharSamp *CharSamp::FromCharDumpFile(FILE *fp) { + unsigned short left; + unsigned short top; + unsigned short page; + unsigned short first_char; + unsigned short last_char; + unsigned short norm_top; + unsigned short norm_bottom; + unsigned short norm_aspect_ratio; + unsigned int val32; + char_32 *label32; + + // read and check 32 bit marker + if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { + return NULL; + } + if (val32 != 0xabd0fefe) { + return NULL; + } + // read label length, + if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { + return NULL; + } + // the label is not null terminated in the file + if (val32 > 0) { + label32 = new char_32[val32 + 1]; + if (label32 == NULL) { + return NULL; + } + // read label + if (fread(label32, 1, val32 * sizeof(*label32), fp) != + (val32 * sizeof(*label32))) { + return NULL; + } + // null terminate + label32[val32] = 0; + } else { + label32 = NULL; + } + // read coordinates + if (fread(&page, 1, sizeof(page), fp) != sizeof(page)) { + return NULL; + } + if (fread(&left, 1, sizeof(left), fp) != sizeof(left)) { + return NULL; + } + if (fread(&top, 1, sizeof(top), fp) != sizeof(top)) { + return NULL; + } + if (fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char)) { + return NULL; + } + if (fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char)) { + return NULL; + } + if (fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top)) { + return NULL; + } + if (fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom)) { + return NULL; + } + if (fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) != + sizeof(norm_aspect_ratio)) { + return NULL; + } + // create the object + CharSamp *char_samp = new CharSamp(); + if (char_samp == NULL) { + return NULL; + } + // init + char_samp->label32_ = label32; + char_samp->page_ = page; + char_samp->left_ = left; + char_samp->top_ = top; + char_samp->first_char_ = first_char; + char_samp->last_char_ = last_char; + char_samp->norm_top_ = norm_top; + char_samp->norm_bottom_ = norm_bottom; + char_samp->norm_aspect_ratio_ = norm_aspect_ratio; + // load the Bmp8 part + if (char_samp->LoadFromCharDumpFile(fp) == false) { + return NULL; + } + return char_samp; +} + +// returns a copy of the charsamp that is scaled to the +// specified width and height +CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) { + CharSamp *scaled_samp = new CharSamp(wid, hgt); + if (scaled_samp == NULL) { + return NULL; + } + if (scaled_samp->ScaleFrom(this, isotropic) == false) { + delete scaled_samp; + return NULL; + } + scaled_samp->left_ = left_; + scaled_samp->top_ = top_; + scaled_samp->page_ = page_; + scaled_samp->SetLabel(label32_); + scaled_samp->first_char_ = first_char_; + scaled_samp->last_char_ = last_char_; + scaled_samp->norm_top_ = norm_top_; + scaled_samp->norm_bottom_ = norm_bottom_; + scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_; + return scaled_samp; +} + +// Load a Char Samp from a dump file +CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt, + unsigned char *data) { + // create the object + CharSamp *char_samp = new CharSamp(left, top, wid, hgt); + if (char_samp == NULL) { + return NULL; + } + if (char_samp->LoadFromRawData(data) == false) { + delete char_samp; + return NULL; + } + return char_samp; +} + +// Saves the charsamp to a dump file +bool CharSamp::Save2CharDumpFile(FILE *fp) const { + unsigned int val32; + // write and check 32 bit marker + val32 = 0xabd0fefe; + if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { + return false; + } + // write label length + val32 = (label32_ == NULL) ? 0 : LabelLen(label32_); + if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { + return false; + } + // write label + if (label32_ != NULL) { + if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) != + (val32 * sizeof(*label32_))) { + return false; + } + } + // write coordinates + if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) { + return false; + } + if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) { + return false; + } + if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) { + return false; + } + if (fwrite(&first_char_, 1, sizeof(first_char_), fp) != + sizeof(first_char_)) { + return false; + } + if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) { + return false; + } + if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) { + return false; + } + if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) != + sizeof(norm_bottom_)) { + return false; + } + if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) != + sizeof(norm_aspect_ratio_)) { + return false; + } + if (SaveBmp2CharDumpFile(fp) == false) { + return false; + } + return true; +} + +// Crop the char samp such that there are no white spaces on any side. +// The norm_top_ and norm_bottom_ fields are the character top/bottom +// with respect to whatever context the character is being recognized +// in (e.g. word bounding box) normalized to a standard size of +// 255. Here they default to 0 and 255 (word box boundaries), but +// since they are context dependent, they may need to be reset by the +// calling function. +CharSamp *CharSamp::Crop() { + // get the dimesions of the cropped img + int cropped_left = 0; + int cropped_top = 0; + int cropped_wid = wid_; + int cropped_hgt = hgt_; + Bmp8::Crop(&cropped_left, &cropped_top, + &cropped_wid, &cropped_hgt); + + if (cropped_wid == 0 || cropped_hgt == 0) { + return NULL; + } + // create the cropped char samp + CharSamp *cropped_samp = new CharSamp(left_ + cropped_left, + top_ + cropped_top, + cropped_wid, cropped_hgt); + cropped_samp->SetLabel(label32_); + cropped_samp->SetFirstChar(first_char_); + cropped_samp->SetLastChar(last_char_); + // the following 3 fields may/should be reset by the calling function + // using context information, i.e., location of character box + // w.r.t. the word bounding box + cropped_samp->SetNormAspectRatio(255 * + cropped_wid / (cropped_wid + cropped_hgt)); + cropped_samp->SetNormTop(0); + cropped_samp->SetNormBottom(255); + + // copy the bitmap to the cropped img + Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp); + return cropped_samp; +} + +// segment the char samp to connected components +// based on contiguity and vertical pixel density histogram +ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left, + int max_hist_wnd, int min_con_comp_size) const { + // init + (*segment_cnt) = 0; + int concomp_cnt = 0; + int seg_cnt = 0; + // find the concomps of the image + ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size); + if (concomp_cnt <= 0 || !concomp_array) { + if (concomp_array) + delete []concomp_array; + return NULL; + } + ConComp **seg_array = NULL; + // segment each concomp further using vertical histogram + for (int concomp = 0; concomp < concomp_cnt; concomp++) { + int concomp_seg_cnt = 0; + // segment the concomp + ConComp **concomp_seg_array = NULL; + ConComp **concomp_alloc_seg = + concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt); + // no segments, add the whole concomp + if (concomp_alloc_seg == NULL) { + concomp_seg_cnt = 1; + concomp_seg_array = concomp_array + concomp; + } else { + // delete the original concomp, we no longer need it + concomp_seg_array = concomp_alloc_seg; + delete concomp_array[concomp]; + } + // add the resulting segments + for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) { + // too small of a segment: ignore + if (concomp_seg_array[seg_idx]->Width() < 2 && + concomp_seg_array[seg_idx]->Height() < 2) { + delete concomp_seg_array[seg_idx]; + } else { + // add the new segment + // extend the segment array + if ((seg_cnt % kConCompAllocChunk) == 0) { + ConComp **temp_segm_array = + new ConComp *[seg_cnt + kConCompAllocChunk]; + if (temp_segm_array == NULL) { + fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not " + "allocate additional connected components\n"); + delete []concomp_seg_array; + delete []concomp_array; + delete []seg_array; + return NULL; + } + if (seg_cnt > 0) { + memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array)); + delete []seg_array; + } + seg_array = temp_segm_array; + } + seg_array[seg_cnt++] = concomp_seg_array[seg_idx]; + } + } // segment + if (concomp_alloc_seg != NULL) { + delete []concomp_alloc_seg; + } + } // concomp + delete []concomp_array; + + // sort the concomps from Left2Right or Right2Left, based on the reading order + if (seg_cnt > 0 && seg_array != NULL) { + qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ? + ConComp::Right2LeftComparer : ConComp::Left2RightComparer); + } + (*segment_cnt) = seg_cnt; + return seg_array; +} + +// builds a char samp from a set of connected components +CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp, + int seg_flags_size, int *seg_flags, + bool *left_most, bool *right_most, + int word_hgt) { + int concomp; + int end_concomp; + int concomp_cnt = 0; + end_concomp = strt_concomp + seg_flags_size; + // determine ID range + bool once = false; + int min_id = -1; + int max_id = -1; + for (concomp = strt_concomp; concomp < end_concomp; concomp++) { + if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) { + if (!once) { + min_id = concomp_array[concomp]->ID(); + max_id = concomp_array[concomp]->ID(); + once = true; + } else { + UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id); + } + concomp_cnt++; + } + } + if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) { + return NULL; + } + // alloc memo for computing leftmost and right most attributes + int id_cnt = max_id - min_id + 1; + bool *id_exist = new bool[id_cnt]; + bool *left_most_exist = new bool[id_cnt]; + bool *right_most_exist = new bool[id_cnt]; + if (!id_exist || !left_most_exist || !right_most_exist) + return NULL; + memset(id_exist, 0, id_cnt * sizeof(*id_exist)); + memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist)); + memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist)); + // find the dimensions of the charsamp + once = false; + int left = -1; + int right = -1; + int top = -1; + int bottom = -1; + int unq_ids = 0; + int unq_left_most = 0; + int unq_right_most = 0; + for (concomp = strt_concomp; concomp < end_concomp; concomp++) { + if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) { + if (!once) { + left = concomp_array[concomp]->Left(); + right = concomp_array[concomp]->Right(); + top = concomp_array[concomp]->Top(); + bottom = concomp_array[concomp]->Bottom(); + once = true; + } else { + UpdateRange(concomp_array[concomp]->Left(), + concomp_array[concomp]->Right(), &left, &right); + UpdateRange(concomp_array[concomp]->Top(), + concomp_array[concomp]->Bottom(), &top, &bottom); + } + // count unq ids, unq left most and right mosts ids + int concomp_id = concomp_array[concomp]->ID() - min_id; + if (!id_exist[concomp_id]) { + id_exist[concomp_id] = true; + unq_ids++; + } + if (concomp_array[concomp]->LeftMost()) { + if (left_most_exist[concomp_id] == false) { + left_most_exist[concomp_id] = true; + unq_left_most++; + } + } + if (concomp_array[concomp]->RightMost()) { + if (right_most_exist[concomp_id] == false) { + right_most_exist[concomp_id] = true; + unq_right_most++; + } + } + } + } + delete []id_exist; + delete []left_most_exist; + delete []right_most_exist; + if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) { + return NULL; + } + (*left_most) = (unq_left_most >= unq_ids); + (*right_most) = (unq_right_most >= unq_ids); + // create the char sample object + CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1); + if (!samp) { + return NULL; + } + + // set the foreground pixels + for (concomp = strt_concomp; concomp < end_concomp; concomp++) { + if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) { + ConCompPt *pt_ptr = concomp_array[concomp]->Head(); + while (pt_ptr) { + samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0; + pt_ptr = pt_ptr->Next(); + } + } + } + return samp; +} + +// clones the object +CharSamp *CharSamp::Clone() const { + // create the cropped char samp + CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_); + samp->SetLabel(label32_); + samp->SetFirstChar(first_char_); + samp->SetLastChar(last_char_); + samp->SetNormTop(norm_top_); + samp->SetNormBottom(norm_bottom_); + samp->SetNormAspectRatio(norm_aspect_ratio_); + // copy the bitmap to the cropped img + Copy(0, 0, wid_, hgt_, samp); + return samp; +} + +// Load a Char Samp from a dump file +CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) { + unsigned int val32; + char_32 *label32; + unsigned char *raw_data = *raw_data_ptr; + + // read and check 32 bit marker + memcpy(&val32, raw_data, sizeof(val32)); + raw_data += sizeof(val32); + if (val32 != 0xabd0fefe) { + return NULL; + } + // read label length, + memcpy(&val32, raw_data, sizeof(val32)); + raw_data += sizeof(val32); + // the label is not null terminated in the file + if (val32 > 0) { + label32 = new char_32[val32 + 1]; + if (label32 == NULL) { + return NULL; + } + // read label + memcpy(label32, raw_data, val32 * sizeof(*label32)); + raw_data += (val32 * sizeof(*label32)); + // null terminate + label32[val32] = 0; + } else { + label32 = NULL; + } + + // create the object + CharSamp *char_samp = new CharSamp(); + if (char_samp == NULL) { + return NULL; + } + + // read coordinates + char_samp->label32_ = label32; + memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_)); + raw_data += sizeof(char_samp->page_); + memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_)); + raw_data += sizeof(char_samp->left_); + memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_)); + raw_data += sizeof(char_samp->top_); + memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_)); + raw_data += sizeof(char_samp->first_char_); + memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_)); + raw_data += sizeof(char_samp->last_char_); + memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_)); + raw_data += sizeof(char_samp->norm_top_); + memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_)); + raw_data += sizeof(char_samp->norm_bottom_); + memcpy(&char_samp->norm_aspect_ratio_, raw_data, + sizeof(char_samp->norm_aspect_ratio_)); + raw_data += sizeof(char_samp->norm_aspect_ratio_); + + // load the Bmp8 part + if (char_samp->LoadFromCharDumpFile(&raw_data) == false) { + delete char_samp; + return NULL; + } + + (*raw_data_ptr) = raw_data; + return char_samp; +} + +// computes the features corresponding to the char sample +bool CharSamp::ComputeFeatures(int conv_grid_size, float *features) { + // Create a scaled BMP + CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size); + if (!scaled_bmp) { + return false; + } + // prepare input + unsigned char *buff = scaled_bmp->RawData(); + // bitmap features + int input; + int bmp_size = conv_grid_size * conv_grid_size; + for (input = 0; input < bmp_size; input++) { + features[input] = 255.0f - (1.0f * buff[input]); + } + // word context features + features[input++] = FirstChar(); + features[input++] = LastChar(); + features[input++] = NormTop(); + features[input++] = NormBottom(); + features[input++] = NormAspectRatio(); + delete scaled_bmp; + return true; +} +} // namespace tesseract diff --git a/cube/char_samp.h b/cube/char_samp.h new file mode 100644 index 0000000000..700a11d079 --- /dev/null +++ b/cube/char_samp.h @@ -0,0 +1,166 @@ +/********************************************************************** + * File: char_samp.h + * Description: Declaration of a Character Bitmap Sample Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CharSamp inherits the Bmp8 class that represents images of +// words, characters and segments throughout Cube +// CharSamp adds more data members to hold the physical location of the image +// in a page, page number in a book if available. +// It also holds the label (GT) of the image that might correspond to a single +// character or a word +// It also provides methods for segmenting, scaling and cropping of the sample + +#ifndef CHAR_SAMP_H +#define CHAR_SAMP_H + +#include +#include +#include +#include "bmp_8.h" +#include "string_32.h" + +namespace tesseract { + +class CharSamp : public Bmp8 { + public: + CharSamp(); + CharSamp(int wid, int hgt); + CharSamp(int left, int top, int wid, int hgt); + ~CharSamp(); + // accessor methods + unsigned short Left() const { return left_; } + unsigned short Right() const { return left_ + wid_; } + unsigned short Top() const { return top_; } + unsigned short Bottom() const { return top_ + hgt_; } + unsigned short Page() const { return page_; } + unsigned short NormTop() const { return norm_top_; } + unsigned short NormBottom() const { return norm_bottom_; } + unsigned short NormAspectRatio() const { return norm_aspect_ratio_; } + unsigned short FirstChar() const { return first_char_; } + unsigned short LastChar() const { return last_char_; } + char_32 Label() const { + if (label32_ == NULL || LabelLen() != 1) { + return 0; + } + return label32_[0]; + } + char_32 * StrLabel() const { return label32_; } + string stringLabel() const; + + void SetLeft(unsigned short left) { left_ = left; } + void SetTop(unsigned short top) { top_ = top; } + void SetPage(unsigned short page) { page_ = page; } + void SetLabel(char_32 label) { + if (label32_ != NULL) { + delete []label32_; + } + label32_ = new char_32[2]; + if (label32_ != NULL) { + label32_[0] = label; + label32_[1] = 0; + } + } + void SetLabel(const char_32 *label32) { + if (label32_ != NULL) { + delete []label32_; + label32_ = NULL; + } + if (label32 != NULL) { + // remove any byte order markes if any + if (label32[0] == 0xfeff) { + label32++; + } + int len = LabelLen(label32); + label32_ = new char_32[len + 1]; + if (label32_ != NULL) { + memcpy(label32_, label32, len * sizeof(*label32)); + label32_[len] = 0; + } + } + } + void SetLabel(string str); + void SetNormTop(unsigned short norm_top) { norm_top_ = norm_top; } + void SetNormBottom(unsigned short norm_bottom) { + norm_bottom_ = norm_bottom; + } + void SetNormAspectRatio(unsigned short norm_aspect_ratio) { + norm_aspect_ratio_ = norm_aspect_ratio; + } + void SetFirstChar(unsigned short first_char) { + first_char_ = first_char; + } + void SetLastChar(unsigned short last_char) { + last_char_ = last_char; + } + + // Saves the charsamp to a dump file + bool Save2CharDumpFile(FILE *fp) const; + // Crops the underlying image and returns a new CharSamp with the + // same character information but new dimensions. Warning: does not + // necessarily set the normalized top and bottom correctly since + // those depend on its location within the word (or CubeSearchObject). + CharSamp *Crop(); + // Computes the connected components of the char sample + ConComp **Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd, + int min_con_comp_size) const; + // returns a copy of the charsamp that is scaled to the + // specified width and height + CharSamp *Scale(int wid, int hgt, bool isotropic = true); + // returns a Clone of the charsample + CharSamp *Clone() const; + // computes the features corresponding to the char sample + bool ComputeFeatures(int conv_grid_size, float *features); + // Load a Char Samp from a dump file + static CharSamp *FromCharDumpFile(CachedFile *fp); + static CharSamp *FromCharDumpFile(FILE *fp); + static CharSamp *FromCharDumpFile(unsigned char **raw_data); + static CharSamp *FromRawData(int left, int top, int wid, int hgt, + unsigned char *data); + static CharSamp *FromConComps(ConComp **concomp_array, + int strt_concomp, int seg_flags_size, + int *seg_flags, bool *left_most, + bool *right_most, int word_hgt); + static int AuxFeatureCnt() { return (5); } + // Return the length of the label string + int LabelLen() const { return LabelLen(label32_); } + static int LabelLen(const char_32 *label32) { + if (label32 == NULL) { + return 0; + } + int len = 0; + while (label32[++len] != 0); + return len; + } + private: + char_32 * label32_; + unsigned short page_; + unsigned short left_; + unsigned short top_; + // top of sample normalized to a word height of 255 + unsigned short norm_top_; + // bottom of sample normalized to a word height of 255 + unsigned short norm_bottom_; + // 255 * ratio of character width to (width + height) + unsigned short norm_aspect_ratio_; + unsigned short first_char_; + unsigned short last_char_; +}; + +} + +#endif // CHAR_SAMP_H diff --git a/ccmain/blobcmp.h b/cube/char_samp_enum.cpp similarity index 68% rename from ccmain/blobcmp.h rename to cube/char_samp_enum.cpp index ad73106acd..46d9b209e3 100644 --- a/ccmain/blobcmp.h +++ b/cube/char_samp_enum.cpp @@ -1,10 +1,10 @@ /********************************************************************** - * File: blobcmp.c - * Description: Code to compare blobs using the adaptive matcher. - * Author: Ray Smith - * Created: Wed Apr 21 09:28:51 BST 1993 + * File: char_samp_enum.cpp + * Description: Implementation of a Character Sample Enumerator Class + * Author: Ahmad Abdulkader + * Created: 2007 * - * (C) Copyright 1993, Hewlett-Packard Ltd. + * (C) Copyright 2008, Google Inc. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -17,9 +17,14 @@ * **********************************************************************/ -#ifndef BLOBCMP_H -#define BLOBCMP_H +#include "char_samp_enum.h" -#include "tstruct.h" +namespace tesseract { -#endif +CharSampEnum::CharSampEnum() { +} + +CharSampEnum::~CharSampEnum() { +} + +} // namespace ocrlib diff --git a/ccmain/callnet.h b/cube/char_samp_enum.h similarity index 54% rename from ccmain/callnet.h rename to cube/char_samp_enum.h index ea5ab51d64..93768e0f38 100644 --- a/ccmain/callnet.h +++ b/cube/char_samp_enum.h @@ -1,10 +1,10 @@ /********************************************************************** - * File: callnet.h (Formerly callnet.h) - * Description: Interface to Neural Net matcher - * Author: Phil Cheatle - * Created: Wed Nov 18 10:35:00 GMT 1992 + * File: char_samp_enum.h + * Description: Declaration of a Character Sample Enumerator Class + * Author: Ahmad Abdulkader + * Created: 2007 * - * (C) Copyright 1992, Hewlett-Packard Ltd. + * (C) Copyright 2008, Google Inc. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -17,19 +17,22 @@ * **********************************************************************/ -#ifndef CALLNET_H -#define CALLNET_H +// The CharSampEnum class provides the base class for CharSamp class +// Enumerators. This is typically used to implement dump file readers -// extern "C" { -/** Initialise net */ -void init_net(); +#ifndef CHARSAMP_ENUM_H +#define CHARSAMP_ENUM_H -/** Apply image to net */ -void callnet( - float *input_vector, - char *top, - float *top_score, - char *next, - float *next_score); -// }; -#endif +#include "char_samp.h" + +namespace tesseract { + +class CharSampEnum { + public: + CharSampEnum(); + virtual ~CharSampEnum(); + virtual bool EnumCharSamp(CharSamp *char_samp, float progress) = 0; +}; +} + +#endif // CHARSAMP_ENUM_H diff --git a/cube/char_samp_set.cpp b/cube/char_samp_set.cpp new file mode 100644 index 0000000000..ad749c9ae0 --- /dev/null +++ b/cube/char_samp_set.cpp @@ -0,0 +1,178 @@ +/********************************************************************** + * File: char_samp_enum.cpp + * Description: Implementation of a Character Sample Set Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include "char_samp_set.h" +#include "cached_file.h" + +namespace tesseract { + +CharSampSet::CharSampSet() { + cnt_ = 0; + samp_buff_ = NULL; + own_samples_ = false; +} + +CharSampSet::~CharSampSet() { + Cleanup(); +} + +// free buffers and init vars +void CharSampSet::Cleanup() { + if (samp_buff_ != NULL) { + // only free samples if owned by class + if (own_samples_ == true) { + for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) { + if (samp_buff_[samp_idx] != NULL) { + delete samp_buff_[samp_idx]; + } + } + } + delete []samp_buff_; + } + cnt_ = 0; + samp_buff_ = NULL; +} + +// add a new sample +bool CharSampSet::Add(CharSamp *char_samp) { + if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) { + // create an extended buffer + CharSamp **new_samp_buff = + reinterpret_cast(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]); + if (new_samp_buff == NULL) { + return false; + } + // copy old contents + if (cnt_ > 0) { + memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_)); + delete []samp_buff_; + } + samp_buff_ = new_samp_buff; + } + samp_buff_[cnt_++] = char_samp; + return true; +} + +// load char samples from file +bool CharSampSet::LoadCharSamples(FILE *fp) { + // free existing + Cleanup(); + // samples are created here and owned by the class + own_samples_ = true; + // start loading char samples + while (feof(fp) == 0) { + CharSamp *new_samp = CharSamp::FromCharDumpFile(fp); + if (new_samp != NULL) { + if (Add(new_samp) == false) { + return false; + } + } + } + return true; +} + +// creates a CharSampSet object from file +CharSampSet * CharSampSet::FromCharDumpFile(string file_name) { + FILE *fp; + unsigned int val32; + // open the file + fp = fopen(file_name.c_str(), "r"); + if (fp == NULL) { + return NULL; + } + // read and verify marker + if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { + return NULL; + } + if (val32 != 0xfefeabd0) { + return NULL; + } + // create an object + CharSampSet *samp_set = new CharSampSet(); + if (samp_set == NULL) { + return NULL; + } + if (samp_set->LoadCharSamples(fp) == false) { + delete samp_set; + samp_set = NULL; + } + fclose(fp); + return samp_set; +} + +// Create a new Char Dump file +FILE *CharSampSet::CreateCharDumpFile(string file_name) { + FILE *fp; + unsigned int val32; + // create the file + fp = fopen(file_name.c_str(), "w"); + if (!fp) { + return NULL; + } + // read and verify marker + val32 = 0xfefeabd0; + if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { + return NULL; + } + return fp; +} + +// Enumerate the Samples in the set one-by-one calling the enumertor's + // EnumCharSamp method for each sample +bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) { + CachedFile *fp_in; + unsigned int val32; + long i64_size, + i64_pos; + // open the file + fp_in = new CachedFile(file_name); + if (fp_in == NULL) { + return false; + } + i64_size = fp_in->Size(); + if (i64_size < 1) { + return false; + } + // read and verify marker + if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) { + return false; + } + if (val32 != 0xfefeabd0) { + return false; + } + // start loading char samples + while (fp_in->eof() == false) { + CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in); + i64_pos = fp_in->Tell(); + if (new_samp != NULL) { + bool ret_flag = (enum_obj)->EnumCharSamp(new_samp, + (100.0f * i64_pos / i64_size)); + delete new_samp; + if (ret_flag == false) { + break; + } + } + } + delete fp_in; + return true; +} + +} // namespace ocrlib diff --git a/cube/char_samp_set.h b/cube/char_samp_set.h new file mode 100644 index 0000000000..d524282514 --- /dev/null +++ b/cube/char_samp_set.h @@ -0,0 +1,73 @@ +/********************************************************************** + * File: char_samp_set.h + * Description: Declaration of a Character Sample Set Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CharSampSet set encapsulates a set of CharSet objects typically +// but not necessarily loaded from a file +// It provides methods to load samples from File, Create a new file and +// Add new char samples to the set + +#ifndef CHAR_SAMP_SET_H +#define CHAR_SAMP_SET_H + +#include +#include +#include +#include "char_samp.h" +#include "char_samp_enum.h" +#include "char_set.h" + +namespace tesseract { + +// chunks of samp pointers to allocate +#define SAMP_ALLOC_BLOCK 10000 + +class CharSampSet { + public: + CharSampSet(); + ~CharSampSet(); + // return sample count + int SampleCount() const { return cnt_; } + // returns samples buffer + CharSamp ** Samples() const { return samp_buff_; } + // Create a CharSampSet set object from a file + static CharSampSet *FromCharDumpFile(string file_name); + // Enumerate the Samples in the set one-by-one calling the enumertor's + // EnumCharSamp method for each sample + static bool EnumSamples(string file_name, CharSampEnum *enumerator); + // Create a new Char Dump file + static FILE *CreateCharDumpFile(string file_name); + // Add a new sample to the set + bool Add(CharSamp *char_samp); + + private: + // sample count + int cnt_; + // the char samp array + CharSamp **samp_buff_; + // Are the samples owned by the set or not. + // Determines whether we should cleanup in the end + bool own_samples_; + // Cleanup + void Cleanup(); + // Load character samples from a file + bool LoadCharSamples(FILE *fp); +}; +} + +#endif // CHAR_SAMP_SET_H diff --git a/cube/char_set.cpp b/cube/char_set.cpp new file mode 100644 index 0000000000..3cf47989ea --- /dev/null +++ b/cube/char_set.cpp @@ -0,0 +1,180 @@ +/********************************************************************** + * File: char_samp_enum.cpp + * Description: Implementation of a Character Set Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include + +#include "char_set.h" +#include "cube_utils.h" +#include "tessdatamanager.h" + +namespace tesseract { + +CharSet::CharSet() { + class_cnt_ = 0; + class_strings_ = NULL; + unicharset_map_ = NULL; + init_ = false; + + // init hash table + memset(hash_bin_size_, 0, sizeof(hash_bin_size_)); +} + +CharSet::~CharSet() { + if (class_strings_ != NULL) { + for (int cls = 0; cls < class_cnt_; cls++) { + if (class_strings_[cls] != NULL) { + delete class_strings_[cls]; + } + } + delete []class_strings_; + class_strings_ = NULL; + } + delete []unicharset_map_; +} + +// Creates CharSet object by reading the unicharset from the +// TessDatamanager, and mapping Cube's unicharset to Tesseract's if +// they differ. +CharSet *CharSet::Create(TessdataManager *tessdata_manager, + UNICHARSET *tess_unicharset) { + CharSet *char_set = new CharSet(); + if (char_set == NULL) { + return NULL; + } + + // First look for Cube's unicharset; if not there, use tesseract's + bool cube_unicharset_exists; + if (!(cube_unicharset_exists = + tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) && + !tessdata_manager->SeekToStart(TESSDATA_UNICHARSET)) { + fprintf(stderr, "Cube ERROR (CharSet::Create): could not find " + "either cube or tesseract unicharset\n"); + return false; + } + FILE *charset_fp = tessdata_manager->GetDataFilePtr(); + if (!charset_fp) { + fprintf(stderr, "Cube ERROR (CharSet::Create): could not load " + "a unicharset\n"); + return false; + } + + // If we found a cube unicharset separate from tesseract's, load it and + // map its unichars to tesseract's; if only one unicharset exists, + // just load it. + bool loaded; + if (cube_unicharset_exists) + loaded = char_set->LoadSupportedCharList(charset_fp, tess_unicharset); + else + loaded = char_set->LoadSupportedCharList(charset_fp, NULL); + if (!loaded) { + delete char_set; + return false; + } + + char_set->init_ = true; + return char_set; +} + +// Load the list of supported chars from the given data file pointer. +bool CharSet::LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset) { + if (init_) + return true; + + char str_line[256]; + // init hash table + memset(hash_bin_size_, 0, sizeof(hash_bin_size_)); + // read the char count + if (fgets(str_line, sizeof(str_line), fp) == NULL) { + fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not " + "read char count.\n"); + return false; + } + class_cnt_ = atoi(str_line); + if (class_cnt_ < 2) { + fprintf(stderr, "Cube ERROR (CharSet::InitMemory): invalid " + "class count: %d\n", class_cnt_); + return false; + } + // memory for class strings + class_strings_ = new string_32*[class_cnt_]; + if (class_strings_ == NULL) { + fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not " + "allocate memory for class strings.\n"); + return false; + } + // memory for unicharset map + if (tess_unicharset) { + unicharset_map_ = new int[class_cnt_]; + if (unicharset_map_ == NULL) { + fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not " + "allocate memory for unicharset map.\n"); + return false; + } + } + + // Read in character strings and add to hash table + for (int class_id = 0; class_id < class_cnt_; class_id++) { + // Read the class string + if (fgets(str_line, sizeof(str_line), fp) == NULL) { + fprintf(stderr, "Cube ERROR (CharSet::ReadAndHashStrings): " + "could not read class string with class_id=%d.\n", class_id); + return false; + } + // Terminate at space if any + char *p = strchr(str_line, ' '); + if (p != NULL) + *p = '\0'; + // Convert to UTF32 and store + string_32 str32; + // Convert NULL to a space + if (strcmp(str_line, "NULL") == 0) { + strcpy(str_line, " "); + } + CubeUtils::UTF8ToUTF32(str_line, &str32); + class_strings_[class_id] = new string_32(str32); + if (class_strings_[class_id] == NULL) { + fprintf(stderr, "Cube ERROR (CharSet::ReadAndHashStrings): could not " + "allocate memory for class string with class_id=%d.\n", class_id); + return false; + } + + // Add to hash-table + int hash_val = Hash(reinterpret_cast(str32.c_str())); + if (hash_bin_size_[hash_val] >= kMaxHashSize) { + fprintf(stderr, "Cube ERROR (CharSet::LoadSupportedCharList): hash " + "table is full.\n"); + return false; + } + hash_bins_[hash_val][hash_bin_size_[hash_val]++] = class_id; + + if (tess_unicharset != NULL) { + // Add class id to unicharset map + UNICHAR_ID tess_id = tess_unicharset->unichar_to_id(str_line); + if (tess_id == INVALID_UNICHAR_ID) { + tess_unicharset->unichar_insert(str_line); + tess_id = tess_unicharset->unichar_to_id(str_line); + } + ASSERT_HOST(tess_id != INVALID_UNICHAR_ID); + unicharset_map_[class_id] = tess_id; + } + } + return true; +} + +} // tesseract diff --git a/cube/char_set.h b/cube/char_set.h new file mode 100644 index 0000000000..baa77a1a5d --- /dev/null +++ b/cube/char_set.h @@ -0,0 +1,162 @@ +/********************************************************************** + * File: char_samp_enum.h + * Description: Declaration of a Character Set Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CharSet class encapsulates the list of 32-bit strings/characters that +// Cube supports for a specific language. The char set is loaded from the +// .unicharset file corresponding to a specific language +// Each string has a corresponding int class-id that gets used throughout Cube +// The class provides pass back and forth conversion between the class-id +// and its corresponding 32-bit string. This is done using a hash table that +// maps the string to the class id. + +#ifndef CHAR_SET_H +#define CHAR_SET_H + +#include +#include +#include + +#include "string_32.h" +#include "tessdatamanager.h" +#include "unicharset.h" + +namespace tesseract { + +class CharSet { + public: + CharSet(); + ~CharSet(); + + // Returns true if Cube is sharing Tesseract's unicharset. + inline bool SharedUnicharset() { return (unicharset_map_ == NULL); } + + // Returns the class id corresponding to a 32-bit string. Returns -1 + // if the string is not supported. This is done by hashing the + // string and then looking up the string in the hash-bin if there + // are collisions. + inline int ClassID(const char_32 *str) const { + int hash_val = Hash(str); + if (hash_bin_size_[hash_val] == 0) + return -1; + for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) { + if (class_strings_[hash_bins_[hash_val][bin]]->compare(str) == 0) + return hash_bins_[hash_val][bin]; + } + return -1; + } + // Same as above but using a 32-bit char instead of a string + inline int ClassID(char_32 ch) const { + int hash_val = Hash(ch); + if (hash_bin_size_[hash_val] == 0) + return -1; + for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) { + if ((*class_strings_[hash_bins_[hash_val][bin]])[0] == ch && + class_strings_[hash_bins_[hash_val][bin]]->length() == 1) { + return hash_bins_[hash_val][bin]; + } + } + return -1; + } + // Retrieve the unicharid in Tesseract's unicharset corresponding + // to a 32-bit string. When Tesseract and Cube share the same + // unicharset, this will just be the class id. + inline int UnicharID(const char_32 *str) const { + int class_id = ClassID(str); + if (class_id == INVALID_UNICHAR_ID) + return INVALID_UNICHAR_ID; + int unichar_id; + if (unicharset_map_) + unichar_id = unicharset_map_[class_id]; + else + unichar_id = class_id; + return unichar_id; + } + // Same as above but using a 32-bit char instead of a string + inline int UnicharID(char_32 ch) const { + int class_id = ClassID(ch); + if (class_id == INVALID_UNICHAR_ID) + return INVALID_UNICHAR_ID; + int unichar_id; + if (unicharset_map_) + unichar_id = unicharset_map_[class_id]; + else + unichar_id = class_id; + return unichar_id; + } + // Returns the 32-bit string corresponding to a class id + inline const char_32 * ClassString(int class_id) const { + if (class_id < 0 || class_id >= class_cnt_) { + return NULL; + } + return reinterpret_cast(class_strings_[class_id]->c_str()); + } + // Returns the count of supported strings + inline int ClassCount() const { return class_cnt_; } + + // Creates CharSet object by reading the unicharset from the + // TessDatamanager, and mapping Cube's unicharset to Tesseract's if + // they differ. + static CharSet *Create(TessdataManager *tessdata_manager, + UNICHARSET *tess_unicharset); + + private: + // Hash table configuration params. Determined emperically on + // the supported languages so far (Eng, Ara, Hin). Might need to be + // tuned for speed when more languages are supported + static const int kHashBins = 3001; + static const int kMaxHashSize = 16; + + // Using djb2 hashing function to hash a 32-bit string + // introduced in http://www.cse.yorku.ca/~oz/hash.html + static inline int Hash(const char_32 *str) { + unsigned long hash = 5381; + int c; + while ((c = *str++)) + hash = ((hash << 5) + hash) + c; + return (hash%kHashBins); + } + // Same as above but for a single char + static inline int Hash(char_32 ch) { + char_32 b[2]; + b[0] = ch; + b[1] = 0; + return Hash(b); + } + + // Load the list of supported chars from the given data file + // pointer. If tess_unicharset is non-NULL, mapping each Cube class + // id to a tesseract unicharid. + bool LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset); + + // class count + int class_cnt_; + // hash-bin sizes array + int hash_bin_size_[kHashBins]; + // hash bins + int hash_bins_[kHashBins][kMaxHashSize]; + // supported strings array + string_32 **class_strings_; + // map from class id to secondary (tesseract's) unicharset's ids + int *unicharset_map_; + // has the char set been initialized flag + bool init_; +}; +} + +#endif // CHAR_SET_H diff --git a/cube/classifier_base.h b/cube/classifier_base.h new file mode 100644 index 0000000000..8c2b1bbf9a --- /dev/null +++ b/cube/classifier_base.h @@ -0,0 +1,100 @@ +/********************************************************************** + * File: classifier_base.h + * Description: Declaration of the Base Character Classifier + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CharClassifier class is the abstract class for any character/grapheme +// classifier. + +#ifndef CHAR_CLASSIFIER_BASE_H +#define CHAR_CLASSIFIER_BASE_H + +#include +#include "char_samp.h" +#include "char_altlist.h" +#include "char_set.h" +#include "feature_base.h" +#include "lang_model.h" +#include "tuning_params.h" + +namespace tesseract { +class CharClassifier { + public: + CharClassifier(CharSet *char_set, TuningParams *params, + FeatureBase *feat_extract) { + char_set_ = char_set; + params_ = params; + feat_extract_ = feat_extract; + fold_sets_ = NULL; + fold_set_cnt_ = 0; + fold_set_len_ = NULL; + init_ = false; + case_sensitive_ = true; + } + + virtual ~CharClassifier() { + if (fold_sets_ != NULL) { + for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) { + if (fold_sets_[fold_set] != NULL) { + delete []fold_sets_[fold_set]; + } + } + delete []fold_sets_; + fold_sets_ = NULL; + } + if (fold_set_len_ != NULL) { + delete []fold_set_len_; + fold_set_len_ = NULL; + } + if (feat_extract_ != NULL) { + delete feat_extract_; + feat_extract_ = NULL; + } + } + + // pure virtual functions that need to be implemented by any inheriting class + virtual CharAltList * Classify(CharSamp *char_samp) = 0; + virtual int CharCost(CharSamp *char_samp) = 0; + virtual bool Train(CharSamp *char_samp, int ClassID) = 0; + virtual bool SetLearnParam(char *var_name, float val) = 0; + virtual bool Init(const string &data_file_path, const string &lang, + LangModel *lang_mod) = 0; + + // accessors + FeatureBase *FeatureExtractor() {return feat_extract_;} + inline bool CaseSensitive() const { return case_sensitive_; } + inline void SetCaseSensitive(bool case_sensitive) { + case_sensitive_ = case_sensitive; + } + + protected: + virtual void Fold() = 0; + virtual bool LoadFoldingSets(const string &data_file_path, + const string &lang, + LangModel *lang_mod) = 0; + FeatureBase *feat_extract_; + CharSet *char_set_; + TuningParams *params_; + int **fold_sets_; + int *fold_set_len_; + int fold_set_cnt_; + bool init_; + bool case_sensitive_; +}; +} // tesseract + +#endif // CHAR_CLASSIFIER_BASE_H diff --git a/cube/classifier_factory.cpp b/cube/classifier_factory.cpp new file mode 100644 index 0000000000..a22f0d4ea8 --- /dev/null +++ b/cube/classifier_factory.cpp @@ -0,0 +1,97 @@ +/********************************************************************** + * File: classifier_factory.cpp + * Description: Implementation of the Base Character Classifier + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include "classifier_factory.h" +#include "conv_net_classifier.h" +#include "feature_base.h" +#include "feature_bmp.h" +#include "feature_chebyshev.h" +#include "feature_hybrid.h" +#include "hybrid_neural_net_classifier.h" + +namespace tesseract { + +// Creates a CharClassifier object of the appropriate type depending on the +// classifier type in the settings file +CharClassifier *CharClassifierFactory::Create(const string &data_file_path, + const string &lang, + LangModel *lang_mod, + CharSet *char_set, + TuningParams *params) { + // create the feature extraction object + FeatureBase *feat_extract; + + switch (params->TypeFeature()) { + case TuningParams::BMP: + feat_extract = new FeatureBmp(params); + break; + case TuningParams::CHEBYSHEV: + feat_extract = new FeatureChebyshev(params); + break; + case TuningParams::HYBRID: + feat_extract = new FeatureHybrid(params); + break; + default: + fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): invalid " + "feature type.\n"); + return NULL; + } + + if (feat_extract == NULL) { + fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): unable " + "to instantiate feature extraction object.\n"); + return NULL; + } + + // create the classifier object + CharClassifier *classifier_obj; + switch (params->TypeClassifier()) { + case TuningParams::NN: + classifier_obj = new ConvNetCharClassifier(char_set, params, + feat_extract); + break; + case TuningParams::HYBRID_NN: + classifier_obj = new HybridNeuralNetCharClassifier(char_set, params, + feat_extract); + break; + default: + fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): invalid " + "classifier type.\n"); + return NULL; + } + + if (classifier_obj == NULL) { + fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): error " + "allocating memory for character classifier object.\n"); + return NULL; + } + + // Init the classifier + if (!classifier_obj->Init(data_file_path, lang, lang_mod)) { + delete classifier_obj; + fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): unable " + "to Init() character classifier object.\n"); + return NULL; + } + return classifier_obj; +} +} diff --git a/cube/classifier_factory.h b/cube/classifier_factory.h new file mode 100644 index 0000000000..f7254d58cf --- /dev/null +++ b/cube/classifier_factory.h @@ -0,0 +1,43 @@ +/********************************************************************** + * File: classifier_factory.h + * Description: Declaration of the Base Character Classifier + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CharClassifierFactory provides a single static method to create an +// instance of the desired classifier + +#ifndef CHAR_CLASSIFIER_FACTORY_H +#define CHAR_CLASSIFIER_FACTORY_H + +#include +#include "classifier_base.h" +#include "lang_model.h" + +namespace tesseract { +class CharClassifierFactory { + public: + // Creates a CharClassifier object of the appropriate type depending on the + // classifier type in the settings file + static CharClassifier *Create(const string &data_file_path, + const string &lang, + LangModel *lang_mod, + CharSet *char_set, + TuningParams *params); +}; +} // tesseract + +#endif // CHAR_CLASSIFIER_FACTORY_H diff --git a/cube/con_comp.cpp b/cube/con_comp.cpp new file mode 100644 index 0000000000..0df05f37fd --- /dev/null +++ b/cube/con_comp.cpp @@ -0,0 +1,285 @@ +/********************************************************************** + * File: con_comp.cpp + * Description: Implementation of a Connected Component class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include "con_comp.h" +#include "const.h" + +namespace tesseract { + +ConComp::ConComp() { + head_ = NULL; + tail_ = NULL; + left_ = 0; + top_ = 0; + right_ = 0; + bottom_ = 0; + left_most_ = false; + right_most_ = false; + id_ = -1; + pt_cnt_ = 0; +} + +ConComp::~ConComp() { + if (head_ != NULL) { + ConCompPt *pt_ptr = head_; + while (pt_ptr != NULL) { + ConCompPt *pptNext = pt_ptr->Next(); + delete pt_ptr; + pt_ptr = pptNext; + } + head_ = NULL; + } +} + +// adds a pt to the conn comp and updates its boundaries +bool ConComp::Add(int x, int y) { + ConCompPt *pt_ptr = new ConCompPt(x, y); + if (pt_ptr == NULL) { + return false; + } + + if (head_ == NULL) { + left_ = x; + right_ = x; + top_ = y; + bottom_ = y; + + head_ = pt_ptr; + } else { + left_ = left_ <= x ? left_ : x; + top_ = top_ <= y ? top_ : y; + right_ = right_ >= x ? right_ : x; + bottom_ = bottom_ >= y ? bottom_ : y; + } + + if (tail_ != NULL) { + tail_->SetNext(pt_ptr); + } + + tail_ = pt_ptr; + pt_cnt_++; + return true; +} + +// merges two connected components +bool ConComp::Merge(ConComp *concomp) { + if (head_ == NULL || tail_ == NULL || + concomp->head_ == NULL || concomp->tail_ == NULL) { + return false; + } + + tail_->SetNext(concomp->head_); + tail_ = concomp->tail_; + left_ = left_ <= concomp->left_ ? left_ : concomp->left_; + top_ = top_ <= concomp->top_ ? top_ : concomp->top_; + right_ = right_ >= concomp->right_ ? right_ : concomp->right_; + bottom_ = bottom_ >= concomp->bottom_ ? bottom_ : concomp->bottom_; + pt_cnt_ += concomp->pt_cnt_; + + concomp->head_ = NULL; + concomp->tail_ = NULL; + + return true; +} + +// Creates the x-coord density histogram after spreading +// each x-coord position by the HIST_WND_RATIO fraction of the +// height of the ConComp, but limited to max_hist_wnd +int *ConComp::CreateHistogram(int max_hist_wnd) { + int wid = right_ - left_ + 1, + hgt = bottom_ - top_ + 1, + hist_wnd = static_cast(hgt * HIST_WND_RATIO); + + if (hist_wnd > max_hist_wnd) { + hist_wnd = max_hist_wnd; + } + + // alloc memo for histogram + int *hist_array = new int[wid]; + if (hist_array == NULL) { + return NULL; + } + + memset(hist_array, 0, wid * sizeof(*hist_array)); + + // compute windowed histogram + ConCompPt *pt_ptr = head_; + + while (pt_ptr != NULL) { + int x = pt_ptr->x() - left_, + xw = x - hist_wnd; + + for (int xdel = -hist_wnd; xdel <= hist_wnd; xdel++, xw++) { + if (xw >= 0 && xw < wid) { + hist_array[xw]++; + } + } + + pt_ptr = pt_ptr->Next(); + } + + return hist_array; +} + +// find out the seg pts by looking for local minima in the histogram +int *ConComp::SegmentHistogram(int *hist_array, int *seg_pt_cnt) { + // init + (*seg_pt_cnt) = 0; + + int wid = right_ - left_ + 1, + hgt = bottom_ - top_ + 1; + + int *x_seg_pt = new int[wid]; + if (x_seg_pt == NULL) { + return NULL; + } + + int seg_pt_wnd = static_cast(hgt * SEG_PT_WND_RATIO); + + if (seg_pt_wnd > 1) { + seg_pt_wnd = 1; + } + + for (int x = 2; x < (wid - 2); x++) { + if (hist_array[x] < hist_array[x - 1] && + hist_array[x] < hist_array[x - 2] && + hist_array[x] <= hist_array[x + 1] && + hist_array[x] <= hist_array[x + 2]) { + x_seg_pt[(*seg_pt_cnt)++] = x; + x += seg_pt_wnd; + } else if (hist_array[x] <= hist_array[x - 1] && + hist_array[x] <= hist_array[x - 2] && + hist_array[x] < hist_array[x + 1] && + hist_array[x] < hist_array[x + 2]) { + x_seg_pt[(*seg_pt_cnt)++] = x; + x += seg_pt_wnd; + } + } + + // no segments, nothing to do + if ((*seg_pt_cnt) == 0) { + delete []x_seg_pt; + return NULL; + } + + return x_seg_pt; +} + +// segments a concomp based on pixel density histogram local minima +// if there were none found, it returns NULL +// this is more useful than creating a clone of itself +ConComp **ConComp::Segment(int max_hist_wnd, int *concomp_cnt) { + // init + (*concomp_cnt) = 0; + + // No pts + if (head_ == NULL) { + return NULL; + } + + int seg_pt_cnt = 0; + + // create the histogram + int *hist_array = CreateHistogram(max_hist_wnd); + if (hist_array == NULL) { + return NULL; + } + + int *x_seg_pt = SegmentHistogram(hist_array, &seg_pt_cnt); + + // free histogram + delete []hist_array; + + // no segments, nothing to do + if (seg_pt_cnt == 0) { + return NULL; + } + + // create concomp array + ConComp **concomp_array = new ConComp *[seg_pt_cnt + 1]; + if (concomp_array == NULL) { + delete []x_seg_pt; + return NULL; + } + + for (int concomp = 0; concomp <= seg_pt_cnt; concomp++) { + concomp_array[concomp] = new ConComp(); + if (concomp_array[concomp] == NULL) { + delete []x_seg_pt; + delete []concomp_array; + return NULL; + } + + // split concomps inherit the ID this concomp + concomp_array[concomp]->SetID(id_); + } + + // set the left and right most attributes of the + // appropriate concomps + concomp_array[0]->left_most_ = true; + concomp_array[seg_pt_cnt]->right_most_ = true; + + // assign pts to concomps + ConCompPt *pt_ptr = head_; + while (pt_ptr != NULL) { + int seg_pt; + + // find the first seg-pt that exceeds the x value + // of the pt + for (seg_pt = 0; seg_pt < seg_pt_cnt; seg_pt++) { + if ((x_seg_pt[seg_pt] + left_) > pt_ptr->x()) { + break; + } + } + + // add the pt to the proper concomp + if (concomp_array[seg_pt]->Add(pt_ptr->x(), pt_ptr->y()) == false) { + delete []x_seg_pt; + delete []concomp_array; + return NULL; + } + + pt_ptr = pt_ptr->Next(); + } + + delete []x_seg_pt; + + (*concomp_cnt) = (seg_pt_cnt + 1); + + return concomp_array; +} + +// Shifts the co-ordinates of all points by the specified x & y deltas +void ConComp::Shift(int dx, int dy) { + ConCompPt *pt_ptr = head_; + + while (pt_ptr != NULL) { + pt_ptr->Shift(dx, dy); + pt_ptr = pt_ptr->Next(); + } + + left_ += dx; + right_ += dx; + top_ += dy; + bottom_ += dy; +} + +} // namespace tesseract diff --git a/cube/con_comp.h b/cube/con_comp.h new file mode 100644 index 0000000000..4d0001547f --- /dev/null +++ b/cube/con_comp.h @@ -0,0 +1,124 @@ +/********************************************************************** + * File: con_comp.h + * Description: Declaration of a Connected Component class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef CONCOMP_H +#define CONCOMP_H + +// The ConComp class implements the functionality needed for a +// Connected Component object and Connected Component (ConComp) points. +// The points consituting a connected component are kept in a linked-list +// The Concomp class provided methods to: +// 1- Compare components in L2R and R2L reading orders. +// 2- Merge ConComps +// 3- Compute the windowed vertical pixel density histogram for a specific +// windows size +// 4- Segment a ConComp based on the local windowed vertical pixel +// density histogram local minima + +namespace tesseract { + +// Implments a ConComp point in a linked list of points +class ConCompPt { + public: + ConCompPt(int x, int y) { + x_ = x; + y_ = y; + next_pt_ = NULL; + } + inline int x() { return x_; } + inline int y() { return y_; } + inline void Shift(int dx, int dy) { + x_ += dx; + y_ += dy; + } + inline ConCompPt * Next() { return next_pt_; } + inline void SetNext(ConCompPt *pt) { next_pt_ = pt; } + + private: + int x_; + int y_; + ConCompPt *next_pt_; +}; + +class ConComp { + public: + ConComp(); + virtual ~ConComp(); + // accessors + inline ConCompPt *Head() { return head_; } + inline int Left() const { return left_; } + inline int Top() const { return top_; } + inline int Right() const { return right_; } + inline int Bottom() const { return bottom_; } + inline int Width() const { return right_ - left_ + 1; } + inline int Height() const { return bottom_ - top_ + 1; } + + // Comparer used for sorting L2R reading order + inline static int Left2RightComparer(const void *comp1, + const void *comp2) { + return (*(reinterpret_cast(comp1)))->left_ + + (*(reinterpret_cast(comp1)))->right_ - + (*(reinterpret_cast(comp2)))->left_ - + (*(reinterpret_cast(comp2)))->right_; + } + + // Comparer used for sorting R2L reading order + inline static int Right2LeftComparer(const void *comp1, + const void *comp2) { + return (*(reinterpret_cast(comp2)))->right_ - + (*(reinterpret_cast(comp1)))->right_; + } + + // accessors for attribues of a ConComp + inline bool LeftMost() const { return left_most_; } + inline bool RightMost() const { return right_most_; } + inline void SetLeftMost(bool left_most) { left_most_ = left_most; } + inline void SetRightMost(bool right_most) { right_most_ = right_most; + } + inline int ID () const { return id_; } + inline void SetID(int id) { id_ = id; } + inline int PtCnt () const { return pt_cnt_; } + // Add a new pt + bool Add(int x, int y); + // Merge two connected components in-place + bool Merge(ConComp *con_comp); + // Shifts the co-ordinates of all points by the specified x & y deltas + void Shift(int dx, int dy); + // segments a concomp based on pixel density histogram local minima + ConComp **Segment(int max_hist_wnd, int *concomp_cnt); + // creates the vertical pixel density histogram of the concomp + int *CreateHistogram(int max_hist_wnd); + // find out the seg pts by looking for local minima in the histogram + int *SegmentHistogram(int *hist_array, int *seg_pt_cnt); + + private: + int id_; + bool left_most_; + bool right_most_; + int left_; + int top_; + int right_; + int bottom_; + ConCompPt *head_; + ConCompPt *tail_; + int pt_cnt_; +}; +} + +#endif // CONCOMP_H diff --git a/textord/blobcmpl.h b/cube/const.h similarity index 51% rename from textord/blobcmpl.h rename to cube/const.h index 2ce7350bbd..0154b8a6ed 100644 --- a/textord/blobcmpl.h +++ b/cube/const.h @@ -1,10 +1,10 @@ /********************************************************************** - * File: blobcmpl.h (Formerly paircmpl.h) - * Description: Code to compare two blobs using the adaptive matcher - * Author: Ray Smith - * Created: Wed Apr 21 09:31:02 BST 1993 + * File: const.h + * Description: Defintions of constants used by Cube + * Author: Ahmad Abdulkader + * Created: 2007 * - * (C) Copyright 1993, Hewlett-Packard Ltd. + * (C) Copyright 2008, Google Inc. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -17,15 +17,19 @@ * **********************************************************************/ -#ifndef BLOBCMPL_H -#define BLOBCMPL_H +#ifndef CUBE_CONST_H +#define CUBE_CONST_H -class PBLOB; -class ROW; +// Scale used to normalize a log-prob to a cost +#define PROB2COST_SCALE 4096.0 +// Maximum possible cost (-log prob of MIN_PROB) +#define MIN_PROB_COST 65536 +// Probability corresponding to the max cost MIN_PROB_COST +#define MIN_PROB 0.000000113 +// Worst possible cost (returned on failure) +#define WORST_COST 0x40000 +// Oversegmentation hysteresis thresholds +#define HIST_WND_RATIO 0.1f +#define SEG_PT_WND_RATIO 0.1f -float compare_blobs( //match 2 blobs - PBLOB *blob1, //first blob - ROW *row1, //row it came from - PBLOB *blob2, //other blob - ROW *row2); -#endif +#endif // CUBE_CONST_H diff --git a/cube/conv_net_classifier.cpp b/cube/conv_net_classifier.cpp new file mode 100644 index 0000000000..8800b6b91e --- /dev/null +++ b/cube/conv_net_classifier.cpp @@ -0,0 +1,370 @@ +/********************************************************************** + * File: charclassifier.cpp + * Description: Implementation of Convolutional-NeuralNet Character Classifier + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include "char_set.h" +#include "classifier_base.h" +#include "const.h" +#include "conv_net_classifier.h" +#include "cube_utils.h" +#include "feature_base.h" +#include "feature_bmp.h" +#include "tess_lang_model.h" + +namespace tesseract { + +ConvNetCharClassifier::ConvNetCharClassifier(CharSet *char_set, + TuningParams *params, + FeatureBase *feat_extract) + : CharClassifier(char_set, params, feat_extract) { + char_net_ = NULL; + net_input_ = NULL; + net_output_ = NULL; +} + +ConvNetCharClassifier::~ConvNetCharClassifier() { + if (char_net_ != NULL) { + delete char_net_; + char_net_ = NULL; + } + + if (net_input_ != NULL) { + delete []net_input_; + net_input_ = NULL; + } + + if (net_output_ != NULL) { + delete []net_output_; + net_output_ = NULL; + } +} + +// The main training function. Given a sample and a class ID the classifier +// updates its parameters according to its learning algorithm. This function +// is currently not implemented. TODO(ahmadab): implement end-2-end training +bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) { + return false; +} + +// A secondary function needed for training. Allows the trainer to set the +// value of any train-time paramter. This function is currently not +// implemented. TODO(ahmadab): implement end-2-end training +bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) { + // TODO(ahmadab): implementation of parameter initializing. + return false; +} + +// Folds the output of the NeuralNet using the loaded folding sets +void ConvNetCharClassifier::Fold() { + // in case insensitive mode + if (case_sensitive_ == false) { + int class_cnt = char_set_->ClassCount(); + // fold case + for (int class_id = 0; class_id < class_cnt; class_id++) { + // get class string + const char_32 *str32 = char_set_->ClassString(class_id); + // get the upper case form of the string + string_32 upper_form32 = str32; + for (int ch = 0; ch < upper_form32.length(); ch++) { + if (iswalpha(static_cast(upper_form32[ch])) != 0) { + upper_form32[ch] = towupper(upper_form32[ch]); + } + } + + // find out the upperform class-id if any + int upper_class_id = + char_set_->ClassID(reinterpret_cast( + upper_form32.c_str())); + if (upper_class_id != -1 && class_id != upper_class_id) { + float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]); + net_output_[class_id] = max_out; + net_output_[upper_class_id] = max_out; + } + } + } + + // The folding sets specify how groups of classes should be folded + // Folding involved assigning a min-activation to all the members + // of the folding set. The min-activation is a fraction of the max-activation + // of the members of the folding set + for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) { + if (fold_set_len_[fold_set] == 0) + continue; + float max_prob = net_output_[fold_sets_[fold_set][0]]; + for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) { + if (net_output_[fold_sets_[fold_set][ch]] > max_prob) { + max_prob = net_output_[fold_sets_[fold_set][ch]]; + } + } + for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) { + net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio, + net_output_[fold_sets_[fold_set][ch]]); + } + } +} + +// Compute the features of specified charsamp and feedforward the +// specified nets +bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) { + if (char_net_ == NULL) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): " + "NeuralNet is NULL\n"); + return false; + } + int feat_cnt = char_net_->in_cnt(); + int class_cnt = char_set_->ClassCount(); + + // allocate i/p and o/p buffers if needed + if (net_input_ == NULL) { + net_input_ = new float[feat_cnt]; + if (net_input_ == NULL) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): " + "unable to allocate memory for input nodes\n"); + return false; + } + + net_output_ = new float[class_cnt]; + if (net_output_ == NULL) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): " + "unable to allocate memory for output nodes\n"); + return false; + } + } + + // compute input features + if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): " + "unable to compute features\n"); + return false; + } + + if (char_net_ != NULL) { + if (char_net_->FeedForward(net_input_, net_output_) == false) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): " + "unable to run feed-forward\n"); + return false; + } + } else { + return false; + } + Fold(); + return true; +} + +// return the cost of being a char +int ConvNetCharClassifier::CharCost(CharSamp *char_samp) { + if (RunNets(char_samp) == false) { + return 0; + } + return CubeUtils::Prob2Cost(1.0f - net_output_[0]); +} + +// classifies a charsamp and returns an alternate list +// of chars sorted by char costs +CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) { + // run the needed nets + if (RunNets(char_samp) == false) { + return NULL; + } + + int class_cnt = char_set_->ClassCount(); + + // create an altlist + CharAltList *alt_list = new CharAltList(char_set_, class_cnt); + if (alt_list == NULL) { + fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::Classify): " + "returning emtpy CharAltList\n"); + return NULL; + } + + for (int out = 1; out < class_cnt; out++) { + int cost = CubeUtils::Prob2Cost(net_output_[out]); + alt_list->Insert(out, cost); + } + + return alt_list; +} + +// Set an external net (for training purposes) +void ConvNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) { + if (char_net_ != NULL) { + delete char_net_; + char_net_ = NULL; + } + char_net_ = char_net; +} + +// This function will return true if the file does not exist. +// But will fail if the it did not pass the sanity checks +bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path, + const string &lang, + LangModel *lang_mod) { + fold_set_cnt_ = 0; + string fold_file_name; + fold_file_name = data_file_path + lang; + fold_file_name += ".cube.fold"; + + // folding sets are optional + FILE *fp = fopen(fold_file_name.c_str(), "r"); + if (fp == NULL) { + return true; + } + fclose(fp); + + string fold_sets_str; + if (!CubeUtils::ReadFileToString(fold_file_name.c_str(), + &fold_sets_str)) { + return false; + } + + // split into lines + vector str_vec; + CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec); + fold_set_cnt_ = str_vec.size(); + + fold_sets_ = new int *[fold_set_cnt_]; + if (fold_sets_ == NULL) { + return false; + } + fold_set_len_ = new int[fold_set_cnt_]; + if (fold_set_len_ == NULL) { + fold_set_cnt_ = 0; + return false; + } + + for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) { + reinterpret_cast(lang_mod)->RemoveInvalidCharacters( + &str_vec[fold_set]); + + // if all or all but one character are invalid, invalidate this set + if (str_vec[fold_set].length() <= 1) { + fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): " + "invalidating folding set %d\n", fold_set); + fold_set_len_[fold_set] = 0; + fold_sets_[fold_set] = NULL; + continue; + } + + string_32 str32; + CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32); + fold_set_len_[fold_set] = str32.length(); + fold_sets_[fold_set] = new int[fold_set_len_[fold_set]]; + if (fold_sets_[fold_set] == NULL) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): " + "could not allocate folding set\n"); + fold_set_cnt_ = fold_set; + return false; + } + for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) { + fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]); + } + } + return true; +} + +// Init the classifier provided a data-path and a language string +bool ConvNetCharClassifier::Init(const string &data_file_path, + const string &lang, + LangModel *lang_mod) { + if (init_) { + return true; + } + + // load the nets if any. This function will return true if the net file + // does not exist. But will fail if the net did not pass the sanity checks + if (!LoadNets(data_file_path, lang)) { + return false; + } + + // load the folding sets if any. This function will return true if the + // file does not exist. But will fail if the it did not pass the sanity checks + if (!LoadFoldingSets(data_file_path, lang, lang_mod)) { + return false; + } + + init_ = true; + return true; +} + +// Load the classifier's Neural Nets +// This function will return true if the net file does not exist. +// But will fail if the net did not pass the sanity checks +bool ConvNetCharClassifier::LoadNets(const string &data_file_path, + const string &lang) { + string char_net_file; + + // add the lang identifier + char_net_file = data_file_path + lang; + char_net_file += ".cube.nn"; + + // neural network is optional + FILE *fp = fopen(char_net_file.c_str(), "r"); + if (fp == NULL) { + return true; + } + fclose(fp); + + // load main net + char_net_ = tesseract::NeuralNet::FromFile(char_net_file.c_str()); + if (char_net_ == NULL) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): " + "could not load %s\n", char_net_file.c_str()); + return false; + } + + // validate net + if (char_net_->in_cnt()!= feat_extract_->FeatureCnt()) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): " + "could not validate net %s\n", char_net_file.c_str()); + return false; + } + + // alloc net i/o buffers + int feat_cnt = char_net_->in_cnt(); + int class_cnt = char_set_->ClassCount(); + + if (char_net_->out_cnt() != class_cnt) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): " + "output count (%d) and class count (%d) are not equal\n", + char_net_->out_cnt(), class_cnt); + return false; + } + + // allocate i/p and o/p buffers if needed + if (net_input_ == NULL) { + net_input_ = new float[feat_cnt]; + if (net_input_ == NULL) { + return false; + } + + net_output_ = new float[class_cnt]; + if (net_output_ == NULL) { + return false; + } + } + + return true; +} +} // tesseract diff --git a/cube/conv_net_classifier.h b/cube/conv_net_classifier.h new file mode 100644 index 0000000000..e9bcd8c2cc --- /dev/null +++ b/cube/conv_net_classifier.h @@ -0,0 +1,94 @@ +/********************************************************************** + * File: conv_net_classifier.h + * Description: Declaration of Convolutional-NeuralNet Character Classifier + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The ConvNetCharClassifier inherits from the base classifier class: +// "CharClassifierBase". It implements a Convolutional Neural Net classifier +// instance of the base classifier. It uses the Tesseract Neural Net library +// The Neural Net takes a scaled version of a bitmap and feeds it to a +// Convolutional Neural Net as input and performs a FeedForward. Each output +// of the net corresponds to class_id in the CharSet passed at construction +// time. +// Afterwards, the outputs of the Net are "folded" using the folding set +// (if any) +#ifndef CONV_NET_CLASSIFIER_H +#define CONV_NET_CLASSIFIER_H + +#include +#include "char_samp.h" +#include "char_altlist.h" +#include "char_set.h" +#include "feature_base.h" +#include "classifier_base.h" +#include "neural_net.h" +#include "lang_model.h" +#include "tuning_params.h" + +namespace tesseract { + +// Folding Ratio is the ratio of the max-activation of members of a folding +// set that is used to compute the min-activation of the rest of the set +static const float kFoldingRatio = 0.75; + +class ConvNetCharClassifier : public CharClassifier { + public: + ConvNetCharClassifier(CharSet *char_set, TuningParams *params, + FeatureBase *feat_extract); + virtual ~ConvNetCharClassifier(); + // The main training function. Given a sample and a class ID the classifier + // updates its parameters according to its learning algorithm. This function + // is currently not implemented. TODO(ahmadab): implement end-2-end training + virtual bool Train(CharSamp *char_samp, int ClassID); + // A secondary function needed for training. Allows the trainer to set the + // value of any train-time paramter. This function is currently not + // implemented. TODO(ahmadab): implement end-2-end training + virtual bool SetLearnParam(char *var_name, float val); + // Externally sets the Neural Net used by the classifier. Used for training + void SetNet(tesseract::NeuralNet *net); + + // Classifies an input charsamp and return a CharAltList object containing + // the possible candidates and corresponding scores + virtual CharAltList * Classify(CharSamp *char_samp); + // Computes the cost of a specific charsamp being a character (versus a + // non-character: part-of-a-character OR more-than-one-character) + virtual int CharCost(CharSamp *char_samp); + + + private: + // Neural Net object used for classification + tesseract::NeuralNet *char_net_; + // data buffers used to hold Neural Net inputs and outputs + float *net_input_; + float *net_output_; + + // Init the classifier provided a data-path and a language string + virtual bool Init(const string &data_file_path, const string &lang, + LangModel *lang_mod); + // Loads the NeuralNets needed for the classifier + bool LoadNets(const string &data_file_path, const string &lang); + // Loads the folding sets provided a data-path and a language string + virtual bool LoadFoldingSets(const string &data_file_path, + const string &lang, + LangModel *lang_mod); + // Folds the output of the NeuralNet using the loaded folding sets + virtual void Fold(); + // Scales the input char_samp and feeds it to the NeuralNet as input + bool RunNets(CharSamp *char_samp); +}; +} +#endif // CONV_NET_CLASSIFIER_H diff --git a/cube/cube_line_object.cpp b/cube/cube_line_object.cpp new file mode 100644 index 0000000000..64b90cadff --- /dev/null +++ b/cube/cube_line_object.cpp @@ -0,0 +1,255 @@ +/********************************************************************** + * File: cube_line_object.cpp + * Description: Implementation of the Cube Line Object Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include "cube_line_object.h" + +namespace tesseract { +CubeLineObject::CubeLineObject(CubeRecoContext *cntxt, Pix *pix) { + line_pix_ = pix; + own_pix_ = false; + processed_ = false; + cntxt_ = cntxt; + phrase_cnt_ = 0; + phrases_ = NULL; +} + +CubeLineObject::~CubeLineObject() { + if (line_pix_ != NULL && own_pix_ == true) { + pixDestroy(&line_pix_); + line_pix_ = NULL; + } + + if (phrases_ != NULL) { + for (int phrase_idx = 0; phrase_idx < phrase_cnt_; phrase_idx++) { + if (phrases_[phrase_idx] != NULL) { + delete phrases_[phrase_idx]; + } + } + + delete []phrases_; + phrases_ = NULL; + } +} + +// Recognize the specified pix as one line returning the recognized +bool CubeLineObject::Process() { + // do nothing if pix had already been processed + if (processed_) { + return true; + } + + // validate data + if (line_pix_ == NULL || cntxt_ == NULL) { + return false; + } + + // create a CharSamp + CharSamp *char_samp = CubeUtils::CharSampleFromPix(line_pix_, 0, 0, + line_pix_->w, + line_pix_->h); + if (char_samp == NULL) { + return false; + } + + // compute connected components. + int con_comp_cnt = 0; + ConComp **con_comps = char_samp->FindConComps(&con_comp_cnt, + cntxt_->Params()->MinConCompSize()); + // no longer need char_samp, delete it + delete char_samp; + // no connected components, bail out + if (con_comp_cnt <= 0 || con_comps == NULL) { + return false; + } + + // sort connected components based on reading order + bool rtl = (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L); + qsort(con_comps, con_comp_cnt, sizeof(*con_comps), rtl ? + ConComp::Right2LeftComparer : ConComp::Left2RightComparer); + + // compute work breaking threshold as a ratio of line height + bool ret_val = false; + int word_break_threshold = ComputeWordBreakThreshold(con_comp_cnt, con_comps, + rtl); + if (word_break_threshold > 0) { + // over-allocate phrases object buffer + phrases_ = new CubeObject *[con_comp_cnt]; + if (phrases_ != NULL) { + // create a phrase if the horizontal distance between two consecutive + // concomps is higher than threshold + int start_con_idx = 0; + int current_phrase_limit = rtl ? con_comps[0]->Left() : + con_comps[0]->Right(); + + for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) { + bool create_new_phrase = true; + // if not at the end, compute the distance between two consecutive + // concomps + if (con_idx < con_comp_cnt) { + int dist = 0; + if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) { + dist = current_phrase_limit - con_comps[con_idx]->Right(); + } else { + dist = con_comps[con_idx]->Left() - current_phrase_limit; + } + create_new_phrase = (dist > word_break_threshold); + } + + // create a new phrase + if (create_new_phrase) { + // create a phrase corresponding to a range on components + bool left_most; + bool right_most; + CharSamp *phrase_char_samp = + CharSamp::FromConComps(con_comps, start_con_idx, + con_idx - start_con_idx, NULL, + &left_most, &right_most, + line_pix_->h); + if (phrase_char_samp == NULL) { + break; + } + phrases_[phrase_cnt_] = new CubeObject(cntxt_, phrase_char_samp); + if (phrases_[phrase_cnt_] == NULL) { + delete phrase_char_samp; + break; + } + // set the ownership of the charsamp to the cube object + phrases_[phrase_cnt_]->SetCharSampOwnership(true); + phrase_cnt_++; + // advance the starting index to the current index + start_con_idx = con_idx; + // set the limit of the newly starting phrase (if any) + if (con_idx < con_comp_cnt) { + current_phrase_limit = rtl ? con_comps[con_idx]->Left() : + con_comps[con_idx]->Right(); + } + } else { + // update the limit of the current phrase + if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) { + current_phrase_limit = MIN(current_phrase_limit, + con_comps[con_idx]->Left()); + } else { + current_phrase_limit = MAX(current_phrase_limit, + con_comps[con_idx]->Right()); + } + } + } + ret_val = true; + } + } + + // clean-up connected comps + for (int con_idx = 0; con_idx < con_comp_cnt; con_idx++) { + delete con_comps[con_idx]; + } + delete []con_comps; + + // success + processed_ = true; + return ret_val; +} + +// Compute the least word breaking threshold that is required to produce a +// valid set of phrases. Phrases are validated using the Aspect ratio +// constraints specified in the language specific Params object +int CubeLineObject::ComputeWordBreakThreshold(int con_comp_cnt, + ConComp **con_comps, bool rtl) { + // initial estimate of word breaking threshold + int word_break_threshold = + static_cast(line_pix_->h * cntxt_->Params()->MaxSpaceHeightRatio()); + bool valid = false; + + // compute the resulting words and validate each's aspect ratio + do { + // group connected components into words based on breaking threshold + int start_con_idx = 0; + int current_phrase_limit = (rtl ? con_comps[0]->Left() : + con_comps[0]->Right()); + int min_x = con_comps[0]->Left(); + int max_x = con_comps[0]->Right(); + int min_y = con_comps[0]->Top(); + int max_y = con_comps[0]->Bottom(); + valid = true; + for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) { + bool create_new_phrase = true; + // if not at the end, compute the distance between two consecutive + // concomps + if (con_idx < con_comp_cnt) { + int dist = 0; + if (rtl) { + dist = current_phrase_limit - con_comps[con_idx]->Right(); + } else { + dist = con_comps[con_idx]->Left() - current_phrase_limit; + } + create_new_phrase = (dist > word_break_threshold); + } + + // create a new phrase + if (create_new_phrase) { + // check aspect ratio. Break if invalid + if ((max_x - min_x + 1) > + (cntxt_->Params()->MaxWordAspectRatio() * (max_y - min_y + 1))) { + valid = false; + break; + } + // advance the starting index to the current index + start_con_idx = con_idx; + // set the limit of the newly starting phrase (if any) + if (con_idx < con_comp_cnt) { + current_phrase_limit = rtl ? con_comps[con_idx]->Left() : + con_comps[con_idx]->Right(); + // re-init bounding box + min_x = con_comps[con_idx]->Left(); + max_x = con_comps[con_idx]->Right(); + min_y = con_comps[con_idx]->Top(); + max_y = con_comps[con_idx]->Bottom(); + } + } else { + // update the limit of the current phrase + if (rtl) { + current_phrase_limit = MIN(current_phrase_limit, + con_comps[con_idx]->Left()); + } else { + current_phrase_limit = MAX(current_phrase_limit, + con_comps[con_idx]->Right()); + } + // update bounding box + UpdateRange(con_comps[con_idx]->Left(), + con_comps[con_idx]->Right(), &min_x, &max_x); + UpdateRange(con_comps[con_idx]->Top(), + con_comps[con_idx]->Bottom(), &min_y, &max_y); + } + } + + // return the breaking threshold if all broken word dimensions are valid + if (valid) { + return word_break_threshold; + } + + // decrease the threshold and try again + word_break_threshold--; + } while (!valid && word_break_threshold > 0); + + // failed to find a threshold that acheives the target aspect ratio. + // Just use the default threshold + return static_cast(line_pix_->h * + cntxt_->Params()->MaxSpaceHeightRatio()); +} +} diff --git a/cube/cube_line_object.h b/cube/cube_line_object.h new file mode 100644 index 0000000000..037ae6b665 --- /dev/null +++ b/cube/cube_line_object.h @@ -0,0 +1,67 @@ +/********************************************************************** + * File: cube_line_object.h + * Description: Declaration of the Cube Line Object Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CubeLineObject implements an objects that holds a line of text +// Each line is broken into phrases. Phrases are blocks within the line that +// are unambiguously separate collections of words + +#ifndef CUBE_LINE_OBJECT_H +#define CUBE_LINE_OBJECT_H + +#include "cube_reco_context.h" +#include "cube_object.h" +#include "allheaders.h" + +namespace tesseract { +class CubeLineObject { + public: + CubeLineObject(CubeRecoContext *cntxt, Pix *pix); + ~CubeLineObject(); + + // accessors + inline int PhraseCount() { + if (!processed_ && !Process()) { + return 0; + } + return phrase_cnt_; + } + inline CubeObject **Phrases() { + if (!processed_ && !Process()) { + return NULL; + } + return phrases_; + } + + private: + CubeRecoContext *cntxt_; + bool own_pix_; + bool processed_; + Pix *line_pix_; + CubeObject **phrases_; + int phrase_cnt_; + bool Process(); + // Compute the least word breaking threshold that is required to produce a + // valid set of phrases. Phrases are validated using the Aspect ratio + // constraints specified in the language specific Params object + int ComputeWordBreakThreshold(int con_comp_cnt, ConComp **con_comps, + bool rtl); +}; +} + +#endif // CUBE_LINE_OBJECT_H diff --git a/cube/cube_line_segmenter.cpp b/cube/cube_line_segmenter.cpp new file mode 100644 index 0000000000..deee573b8b --- /dev/null +++ b/cube/cube_line_segmenter.cpp @@ -0,0 +1,945 @@ +/********************************************************************** + * File: cube_page_segmenter.cpp + * Description: Implementation of the Cube Page Segmenter Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "cube_line_segmenter.h" +#include "ndminx.h" + +namespace tesseract { +// constants that worked for Arabic page segmenter +const int CubeLineSegmenter::kLineSepMorphMinHgt = 20; +const int CubeLineSegmenter::kHgtBins = 20; +const double CubeLineSegmenter::kMaxValidLineRatio = 3.2; +const int CubeLineSegmenter::kMaxConnCompHgt = 150; +const int CubeLineSegmenter::kMaxConnCompWid = 500; +const int CubeLineSegmenter::kMaxHorzAspectRatio = 50; +const int CubeLineSegmenter::kMaxVertAspectRatio = 20; +const int CubeLineSegmenter::kMinWid = 2; +const int CubeLineSegmenter::kMinHgt = 2; +const float CubeLineSegmenter::kMinValidLineHgtRatio = 2.5; + +CubeLineSegmenter::CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img) { + cntxt_ = cntxt; + orig_img_ = img; + img_ = NULL; + lines_pixa_ = NULL; + init_ = false; + line_cnt_ = 0; + columns_ = NULL; + con_comps_ = NULL; + est_alef_hgt_ = 0.0; + est_dot_hgt_ = 0.0; +} + +CubeLineSegmenter::~CubeLineSegmenter() { + if (img_ != NULL) { + pixDestroy(&img_); + img_ = NULL; + } + + if (lines_pixa_ != NULL) { + pixaDestroy(&lines_pixa_); + lines_pixa_ = NULL; + } + + if (con_comps_ != NULL) { + pixaDestroy(&con_comps_); + con_comps_ = NULL; + } + + if (columns_ != NULL) { + pixaaDestroy(&columns_); + columns_ = NULL; + } +} + +// compute validity ratio for a line +double CubeLineSegmenter::ValidityRatio(Pix *line_mask_pix, Box *line_box) { + return line_box->h / est_alef_hgt_; +} + +// validate line +bool CubeLineSegmenter::ValidLine(Pix *line_mask_pix, Box *line_box) { + double validity_ratio = ValidityRatio(line_mask_pix, line_box); + + return validity_ratio < kMaxValidLineRatio; +} + +// perform a vertical Closing with the specified threshold +// returning the resulting conn comps as a pixa +Pixa *CubeLineSegmenter::VerticalClosing(Pix *pix, + int threshold, Boxa **boxa) { + char sequence_str[16]; + + // do the morphology + sprintf(sequence_str, "c100.%d", threshold); + Pix *morphed_pix = pixMorphCompSequence(pix, sequence_str, 0); + if (morphed_pix == NULL) { + return NULL; + } + + // get the resulting lines by computing concomps + Pixa *pixac; + (*boxa) = pixConnComp(morphed_pix, &pixac, 8); + + pixDestroy(&morphed_pix); + + if ((*boxa) == NULL) { + return NULL; + } + + return pixac; +} + +// do a desperate attempt at cracking lines +Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix, + Box *cracked_line_box, int line_cnt) { + // create lines pixa array + Pixa **lines_pixa = new Pixa*[line_cnt]; + if (lines_pixa == NULL) { + return NULL; + } + + memset(lines_pixa, 0, line_cnt * sizeof(*lines_pixa)); + + // compute line conn comps + Pixa *line_con_comps_pix; + Boxa *line_con_comps = ComputeLineConComps(cracked_line_pix, + cracked_line_box, &line_con_comps_pix); + + if (line_con_comps == NULL) { + delete []lines_pixa; + return false; + } + + // assign each conn comp to the a line based on its centroid + for (int con = 0; con < line_con_comps->n; con++) { + Box *con_box = line_con_comps->box[con]; + Pix *con_pix = line_con_comps_pix->pix[con]; + int mid_y = (con_box->y - cracked_line_box->y) + (con_box->h / 2), + line_idx = MIN(line_cnt - 1, + (mid_y * line_cnt / cracked_line_box->h)); + + // create the line if it has not been created? + if (lines_pixa[line_idx] == NULL) { + lines_pixa[line_idx] = pixaCreate(line_con_comps->n); + if (lines_pixa[line_idx] == NULL) { + delete []lines_pixa; + boxaDestroy(&line_con_comps); + pixaDestroy(&line_con_comps_pix); + return false; + } + } + + // add the concomp to the line + if (pixaAddPix(lines_pixa[line_idx], con_pix, L_CLONE) != 0 || + pixaAddBox(lines_pixa[line_idx], con_box, L_CLONE)) { + delete []lines_pixa; + boxaDestroy(&line_con_comps); + pixaDestroy(&line_con_comps_pix); + } + } + + // create the lines pixa + Pixa *lines = pixaCreate(line_cnt); + bool success = true; + + // create and check the validity of the lines + for (int line = 0; line < line_cnt; line++) { + Pixa *line_pixa = lines_pixa[line]; + + // skip invalid lines + if (line_pixa == NULL) { + continue; + } + + // merge the pix, check the validity of the line + // and add it to the lines pixa + Box *line_box; + Pix *line_pix = Pixa2Pix(line_pixa, &line_box); + if (line_pix == NULL || + line_box == NULL || + ValidLine(line_pix, line_box) == false || + pixaAddPix(lines, line_pix, L_INSERT) != 0 || + pixaAddBox(lines, line_box, L_INSERT) != 0) { + if (line_pix != NULL) { + pixDestroy(&line_pix); + } + + if (line_box != NULL) { + boxDestroy(&line_box); + } + + success = false; + + break; + } + } + + // cleanup + for (int line = 0; line < line_cnt; line++) { + if (lines_pixa[line] != NULL) { + pixaDestroy(&lines_pixa[line]); + } + } + + delete []lines_pixa; + boxaDestroy(&line_con_comps); + pixaDestroy(&line_con_comps_pix); + + if (success == false) { + pixaDestroy(&lines); + lines = NULL; + } + + return lines; +} + +// do a desperate attempt at cracking lines +Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix, + Box *cracked_line_box) { + // estimate max line count + int max_line_cnt = static_cast((cracked_line_box->h / + est_alef_hgt_) + 0.5); + if (max_line_cnt < 2) { + return NULL; + } + + for (int line_cnt = 2; line_cnt < max_line_cnt; line_cnt++) { + Pixa *lines = CrackLine(cracked_line_pix, cracked_line_box, line_cnt); + if (lines != NULL) { + return lines; + } + } + + return NULL; +} + +// split a line continously until valid or fail +Pixa *CubeLineSegmenter::SplitLine(Pix *line_mask_pix, Box *line_box) { + // clone the line mask + Pix *line_pix = pixClone(line_mask_pix); + + if (line_pix == NULL) { + return NULL; + } + + // AND with the image to get the actual line + pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h, + PIX_SRC & PIX_DST, img_, line_box->x, line_box->y); + + // continue to do rasterop morphology on the line until + // it splits to valid lines or we fail + int morph_hgt = kLineSepMorphMinHgt - 1, + best_threshold = kLineSepMorphMinHgt - 1, + max_valid_portion = 0; + + Boxa *boxa; + Pixa *pixac; + + do { + pixac = VerticalClosing(line_pix, morph_hgt, &boxa); + + // add the box offset to all the lines + // and check for the validity of each + int line, + valid_line_cnt = 0, + valid_portion = 0; + + for (line = 0; line < pixac->n; line++) { + boxa->box[line]->x += line_box->x; + boxa->box[line]->y += line_box->y; + + if (ValidLine(pixac->pix[line], boxa->box[line]) == true) { + // count valid lines + valid_line_cnt++; + + // and the valid portions + valid_portion += boxa->box[line]->h; + } + } + + // all the lines are valid + if (valid_line_cnt == pixac->n) { + boxaDestroy(&boxa); + pixDestroy(&line_pix); + return pixac; + } + + // a larger valid portion + if (valid_portion > max_valid_portion) { + max_valid_portion = valid_portion; + best_threshold = morph_hgt; + } + + boxaDestroy(&boxa); + pixaDestroy(&pixac); + + morph_hgt--; + } + while (morph_hgt > 0); + + // failed to break into valid lines + // attempt to crack the line + pixac = CrackLine(line_pix, line_box); + if (pixac != NULL) { + pixDestroy(&line_pix); + return pixac; + } + + // try to leverage any of the lines + // did the best threshold yield a non zero valid portion + if (max_valid_portion > 0) { + // use this threshold to break lines + pixac = VerticalClosing(line_pix, best_threshold, &boxa); + + // add the box offset to all the lines + // and check for the validity of each + for (int line = 0; line < pixac->n; line++) { + boxa->box[line]->x += line_box->x; + boxa->box[line]->y += line_box->y; + + // remove invalid lines from the pixa + if (ValidLine(pixac->pix[line], boxa->box[line]) == false) { + pixaRemovePix(pixac, line); + line--; + } + } + + boxaDestroy(&boxa); + pixDestroy(&line_pix); + return pixac; + } + + // last resort: attempt to crack the line + pixDestroy(&line_pix); + + return NULL; +} + +// Checks of a line is too small +bool CubeLineSegmenter::SmallLine(Box *line_box) { + return line_box->h <= (kMinValidLineHgtRatio * est_dot_hgt_); +} + +// Compute the connected components in a line +Boxa * CubeLineSegmenter::ComputeLineConComps(Pix *line_mask_pix, + Box *line_box, + Pixa **con_comps_pixa) { + // clone the line mask + Pix *line_pix = pixClone(line_mask_pix); + + if (line_pix == NULL) { + return NULL; + } + + // AND with the image to get the actual line + pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h, + PIX_SRC & PIX_DST, img_, line_box->x, line_box->y); + + // compute the connected components of the line to be merged + Boxa *line_con_comps = pixConnComp(line_pix, con_comps_pixa, 8); + + pixDestroy(&line_pix); + + // offset boxes by the bbox of the line + for (int con = 0; con < line_con_comps->n; con++) { + line_con_comps->box[con]->x += line_box->x; + line_con_comps->box[con]->y += line_box->y; + } + + return line_con_comps; +} + +// create a union of two arbitrary pix +Pix *CubeLineSegmenter::PixUnion(Pix *dest_pix, Box *dest_box, + Pix *src_pix, Box *src_box) { + // compute dimensions of union rect + BOX *union_box = boxBoundingRegion(src_box, dest_box); + + // create the union pix + Pix *union_pix = pixCreate(union_box->w, union_box->h, src_pix->d); + if (union_pix == NULL) { + return NULL; + } + + // blt the src and dest pix + pixRasterop(union_pix, + src_box->x - union_box->x, src_box->y - union_box->y, + src_box->w, src_box->h, PIX_SRC | PIX_DST, src_pix, 0, 0); + + pixRasterop(union_pix, + dest_box->x - union_box->x, dest_box->y - union_box->y, + dest_box->w, dest_box->h, PIX_SRC | PIX_DST, dest_pix, 0, 0); + + // replace the dest_box + *dest_box = *union_box; + + boxDestroy(&union_box); + + return union_pix; +} + +// create a union of a number of arbitrary pix +Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box, + int start_pix, int pix_cnt) { + // compute union_box + int min_x = INT_MAX, + max_x = INT_MIN, + min_y = INT_MAX, + max_y = INT_MIN; + + for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) { + Box *pix_box = pixa->boxa->box[pix_idx]; + + UpdateRange(pix_box->x, pix_box->x + pix_box->w, &min_x, &max_x); + UpdateRange(pix_box->y, pix_box->y + pix_box->h, &min_y, &max_y); + } + + (*dest_box) = boxCreate(min_x, min_y, max_x - min_x, max_y - min_y); + if ((*dest_box) == NULL) { + return false; + } + + // create the union pix + Pix *union_pix = pixCreate((*dest_box)->w, (*dest_box)->h, img_->d); + if (union_pix == NULL) { + boxDestroy(dest_box); + return false; + } + + // create a pix corresponding to the union of all pixs + // blt the src and dest pix + for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) { + Box *pix_box = pixa->boxa->box[pix_idx]; + Pix *con_pix = pixa->pix[pix_idx]; + + pixRasterop(union_pix, + pix_box->x - (*dest_box)->x, pix_box->y - (*dest_box)->y, + pix_box->w, pix_box->h, PIX_SRC | PIX_DST, con_pix, 0, 0); + } + + return union_pix; +} + +// create a union of a number of arbitrary pix +Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box) { + return Pixa2Pix(pixa, dest_box, 0, pixa->n); +} + +// merges a number of lines into one line given a bounding box and a mask +bool CubeLineSegmenter::MergeLine(Pix *line_mask_pix, Box *line_box, + Pixa *lines, Boxaa *lines_con_comps) { + // compute the connected components of the lines to be merged + Pixa *small_con_comps_pix; + Boxa *small_line_con_comps = ComputeLineConComps(line_mask_pix, + line_box, &small_con_comps_pix); + + if (small_line_con_comps == NULL) { + return false; + } + + // for each connected component + for (int con = 0; con < small_line_con_comps->n; con++) { + Box *small_con_comp_box = small_line_con_comps->box[con]; + int best_line = -1, + best_dist = INT_MAX, + small_box_right = small_con_comp_box->x + small_con_comp_box->w, + small_box_bottom = small_con_comp_box->y + small_con_comp_box->h; + + // for each valid line + for (int line = 0; line < lines->n; line++) { + if (SmallLine(lines->boxa->box[line]) == true) { + continue; + } + + // for all the connected components in the line + Boxa *line_con_comps = lines_con_comps->boxa[line]; + + for (int lcon = 0; lcon < line_con_comps->n; lcon++) { + Box *con_comp_box = line_con_comps->box[lcon]; + int xdist, + ydist, + box_right = con_comp_box->x + con_comp_box->w, + box_bottom = con_comp_box->y + con_comp_box->h; + + xdist = MAX(small_con_comp_box->x, con_comp_box->x) - + MIN(small_box_right, box_right); + + ydist = MAX(small_con_comp_box->y, con_comp_box->y) - + MIN(small_box_bottom, box_bottom); + + // if there is an overlap in x-direction + if (xdist <= 0) { + if (best_line == -1 || ydist < best_dist) { + best_dist = ydist; + best_line = line; + } + } + } + } + + // if the distance is too big, do not merged + if (best_line != -1 && best_dist < est_alef_hgt_) { + // add the pix to the best line + Pix *new_line = PixUnion(lines->pix[best_line], + lines->boxa->box[best_line], + small_con_comps_pix->pix[con], small_con_comp_box); + + if (new_line == NULL) { + return false; + } + + pixDestroy(&lines->pix[best_line]); + lines->pix[best_line] = new_line; + } + } + + pixaDestroy(&small_con_comps_pix); + boxaDestroy(&small_line_con_comps); + + return true; +} + +// Creates new set of lines from the computed columns +bool CubeLineSegmenter::AddLines(Pixa *lines) { + // create an array that will hold the bounding boxes + // of the concomps belonging to each line + Boxaa *lines_con_comps = boxaaCreate(lines->n); + if (lines_con_comps == NULL) { + return false; + } + + for (int line = 0; line < lines->n; line++) { + // if the line is not valid + if (ValidLine(lines->pix[line], lines->boxa->box[line]) == false) { + // split it + Pixa *split_lines = SplitLine(lines->pix[line], + lines->boxa->box[line]); + + // remove the old line + if (pixaRemovePix(lines, line) != 0) { + return false; + } + + line--; + + if (split_lines == NULL) { + continue; + } + + // add the split lines instead and move the pointer + for (int s_line = 0; s_line < split_lines->n; s_line++) { + Pix *sp_line = pixaGetPix(split_lines, s_line, L_CLONE); + Box *sp_box = boxaGetBox(split_lines->boxa, s_line, L_CLONE); + + if (sp_line == NULL || sp_box == NULL) { + return false; + } + + // insert the new line + if (pixaInsertPix(lines, ++line, sp_line, sp_box) != 0) { + return false; + } + } + + // remove the split lines + pixaDestroy(&split_lines); + } + } + + // compute the concomps bboxes of each line + for (int line = 0; line < lines->n; line++) { + Boxa *line_con_comps = ComputeLineConComps(lines->pix[line], + lines->boxa->box[line], NULL); + + if (line_con_comps == NULL) { + return false; + } + + // insert it into the boxaa array + if (boxaaAddBoxa(lines_con_comps, line_con_comps, L_INSERT) != 0) { + return false; + } + } + + // post process the lines: + // merge the contents of "small" lines info legitimate lines + for (int line = 0; line < lines->n; line++) { + // a small line detected + if (SmallLine(lines->boxa->box[line]) == true) { + // merge its components to one of the valid lines + if (MergeLine(lines->pix[line], lines->boxa->box[line], + lines, lines_con_comps) == true) { + // remove the small line + if (pixaRemovePix(lines, line) != 0) { + return false; + } + + if (boxaaRemoveBoxa(lines_con_comps, line) != 0) { + return false; + } + + line--; + } + } + } + + boxaaDestroy(&lines_con_comps); + + // add the pix masks + if (pixaaAddPixa(columns_, lines, L_INSERT) != 0) { + return false; + } + + return true; +} + +// Index the specific pixa using RTL reading order +int *CubeLineSegmenter::IndexRTL(Pixa *pixa) { + int *pix_index = new int[pixa->n]; + if (pix_index == NULL) { + return NULL; + } + + for (int pix = 0; pix < pixa->n; pix++) { + pix_index[pix] = pix; + } + + for (int ipix = 0; ipix < pixa->n; ipix++) { + for (int jpix = ipix + 1; jpix < pixa->n; jpix++) { + Box *ipix_box = pixa->boxa->box[pix_index[ipix]], + *jpix_box = pixa->boxa->box[pix_index[jpix]]; + + // swap? + if ((ipix_box->x + ipix_box->w) < (jpix_box->x + jpix_box->w)) { + int temp = pix_index[ipix]; + pix_index[ipix] = pix_index[jpix]; + pix_index[jpix] = temp; + } + } + } + + return pix_index; +} + +// Performs line segmentation +bool CubeLineSegmenter::LineSegment() { + // Use full image morphology to find columns + // This only works for simple layouts where each column + // of text extends the full height of the input image. + Pix *pix_temp1 = pixMorphCompSequence(img_, "c5.500", 0); + if (pix_temp1 == NULL) { + return false; + } + + // Mask with a single component over each column + Pixa *pixam; + Boxa *boxa = pixConnComp(pix_temp1, &pixam, 8); + + if (boxa == NULL) { + return false; + } + + int init_morph_min_hgt = kLineSepMorphMinHgt; + char sequence_str[16]; + sprintf(sequence_str, "c100.%d", init_morph_min_hgt); + + // Use selective region-based morphology to get the textline mask. + Pixa *pixad = pixaMorphSequenceByRegion(img_, pixam, sequence_str, 0, 0); + if (pixad == NULL) { + return false; + } + + // for all columns + int col_cnt = boxaGetCount(boxa); + + // create columns + columns_ = pixaaCreate(col_cnt); + if (columns_ == NULL) { + return false; + } + + // index columns based on readind order (RTL) + int *col_order = IndexRTL(pixad); + if (col_order == NULL) { + return false; + } + + line_cnt_ = 0; + + for (int col_idx = 0; col_idx < col_cnt; col_idx++) { + int col = col_order[col_idx]; + + // get the pix and box corresponding to the column + Pix *pixt3 = pixaGetPix(pixad, col, L_CLONE); + if (pixt3 == NULL) { + return false; + } + + Box *col_box = pixad->boxa->box[col]; + + Pixa *pixac; + Boxa *boxa2 = pixConnComp(pixt3, &pixac, 8); + if (boxa2 == NULL) { + return false; + } + + // offset the boxes by the column box + for (int line = 0; line < pixac->n; line++) { + pixac->boxa->box[line]->x += col_box->x; + pixac->boxa->box[line]->y += col_box->y; + } + + // add the lines + if (AddLines(pixac) == true) { + if (pixaaAddBox(columns_, col_box, L_CLONE) != 0) { + return false; + } + } + + pixDestroy(&pixt3); + boxaDestroy(&boxa2); + + line_cnt_ += columns_->pixa[col_idx]->n; + } + + pixaDestroy(&pixam); + pixaDestroy(&pixad); + boxaDestroy(&boxa); + + delete []col_order; + pixDestroy(&pix_temp1); + + return true; +} + +// Estimate the paramters of the font(s) used in the page +bool CubeLineSegmenter::EstimateFontParams() { + int hgt_hist[kHgtBins]; + int max_hgt; + double mean_hgt; + + // init hgt histogram of concomps + memset(hgt_hist, 0, sizeof(hgt_hist)); + + // compute max hgt + max_hgt = 0; + + for (int con = 0; con < con_comps_->n; con++) { + // skip conn comps that are too long or too wide + if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt || + con_comps_->boxa->box[con]->w > kMaxConnCompWid) { + continue; + } + + max_hgt = MAX(max_hgt, con_comps_->boxa->box[con]->h); + } + + if (max_hgt <= 0) { + return false; + } + + // init hgt histogram of concomps + memset(hgt_hist, 0, sizeof(hgt_hist)); + + // compute histogram + mean_hgt = 0.0; + for (int con = 0; con < con_comps_->n; con++) { + // skip conn comps that are too long or too wide + if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt || + con_comps_->boxa->box[con]->w > kMaxConnCompWid) { + continue; + } + + int bin = static_cast(kHgtBins * con_comps_->boxa->box[con]->h / + max_hgt); + bin = MIN(bin, kHgtBins - 1); + hgt_hist[bin]++; + mean_hgt += con_comps_->boxa->box[con]->h; + } + + mean_hgt /= con_comps_->n; + + // find the top 2 bins + int idx[kHgtBins]; + + for (int bin = 0; bin < kHgtBins; bin++) { + idx[bin] = bin; + } + + for (int ibin = 0; ibin < 2; ibin++) { + for (int jbin = ibin + 1; jbin < kHgtBins; jbin++) { + if (hgt_hist[idx[ibin]] < hgt_hist[idx[jbin]]) { + int swap = idx[ibin]; + idx[ibin] = idx[jbin]; + idx[jbin] = swap; + } + } + } + + // emperically, we found out that the 2 highest freq bins correspond + // respectively to the dot and alef + est_dot_hgt_ = (1.0 * (idx[0] + 1) * max_hgt / kHgtBins); + est_alef_hgt_ = (1.0 * (idx[1] + 1) * max_hgt / kHgtBins); + + // as a sanity check the dot hgt must be significanly lower than alef + if (est_alef_hgt_ < (est_dot_hgt_ * 2)) { + // use max_hgt to estimate instead + est_alef_hgt_ = mean_hgt * 1.5; + est_dot_hgt_ = est_alef_hgt_ / 5.0; + } + + est_alef_hgt_ = MAX(est_alef_hgt_, est_dot_hgt_ * 4.0); + + return true; +} + +// clean up the image +Pix *CubeLineSegmenter::CleanUp(Pix *orig_img) { + // get rid of long horizontal lines + Pix *pix_temp0 = pixMorphCompSequence(orig_img, "o300.2", 0); + pixXor(pix_temp0, pix_temp0, orig_img); + + // get rid of long vertical lines + Pix *pix_temp1 = pixMorphCompSequence(pix_temp0, "o2.300", 0); + pixXor(pix_temp1, pix_temp1, pix_temp0); + + pixDestroy(&pix_temp0); + + // detect connected components + Pixa *con_comps; + Boxa *boxa = pixConnComp(pix_temp1, &con_comps, 8); + if (boxa == NULL) { + return NULL; + } + + // detect and remove suspicious conn comps + for (int con = 0; con < con_comps->n; con++) { + Box *box = boxa->box[con]; + + // remove if suspc. conn comp + if ((box->w > (box->h * kMaxHorzAspectRatio)) || + (box->h > (box->w * kMaxVertAspectRatio)) || + (box->w < kMinWid && box->h < kMinHgt)) { + pixRasterop(pix_temp1, box->x, box->y, box->w, box->h, + PIX_SRC ^ PIX_DST, con_comps->pix[con], 0, 0); + } + } + + pixaDestroy(&con_comps); + boxaDestroy(&boxa); + + return pix_temp1; +} + +// Init the page segmenter +bool CubeLineSegmenter::Init() { + if (init_ == true) { + return true; + } + + if (orig_img_ == NULL) { + return false; + } + + // call the internal line segmentation + return FindLines(); +} + +// return the pix mask and box of a specific line +Pix *CubeLineSegmenter::Line(int line, Box **line_box) { + if (init_ == false && Init() == false) { + return NULL; + } + + if (line < 0 || line >= line_cnt_) { + return NULL; + } + + (*line_box) = lines_pixa_->boxa->box[line]; + return lines_pixa_->pix[line]; +} + +// Implements a basic rudimentary layout analysis based on Leptonica +// works OK for Arabic. For other languages, the function TesseractPageAnalysis +// should be called instead. +bool CubeLineSegmenter::FindLines() { + // convert the image to gray scale if necessary + Pix *gray_scale_img = NULL; + if (orig_img_->d != 2 && orig_img_->d != 8) { + gray_scale_img = pixConvertTo8(orig_img_, false); + if (gray_scale_img == NULL) { + return false; + } + } else { + gray_scale_img = orig_img_; + } + + // threshold image + Pix *thresholded_img; + thresholded_img = pixThresholdToBinary(gray_scale_img, 128); + // free the gray scale image if necessary + if (gray_scale_img != orig_img_) { + pixDestroy(&gray_scale_img); + } + // bail-out if thresholding failed + if (thresholded_img == NULL) { + return false; + } + + // deskew + Pix *deskew_img = pixDeskew(thresholded_img, 2); + if (deskew_img == NULL) { + return false; + } + + pixDestroy(&thresholded_img); + + img_ = CleanUp(deskew_img); + pixDestroy(&deskew_img); + if (img_ == NULL) { + return false; + } + + pixDestroy(&deskew_img); + + // compute connected components + Boxa *boxa = pixConnComp(img_, &con_comps_, 8); + if (boxa == NULL) { + return false; + } + + boxaDestroy(&boxa); + + // estimate dot and alef hgts + if (EstimateFontParams() == false) { + return false; + } + + // perform line segmentation + if (LineSegment() == false) { + return false; + } + + // success + init_ = true; + return true; +} + +} diff --git a/cube/cube_line_segmenter.h b/cube/cube_line_segmenter.h new file mode 100644 index 0000000000..71facdf54b --- /dev/null +++ b/cube/cube_line_segmenter.h @@ -0,0 +1,156 @@ +/********************************************************************** + * File: cube_page_segmenter.h + * Description: Declaration of the Cube Page Segmenter Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// TODO(ahmadab) +// This is really a makeshift line segmenter that works well for Arabic +// This should eventually be replaced by Ray Smith's Page segmenter +// There are lots of magic numbers below that were determined empirically +// but not thoroughly tested + +#ifndef CUBE_LINE_SEGMENTER_H +#define CUBE_LINE_SEGMENTER_H + +#include "cube_reco_context.h" +#include "allheaders.h" + +namespace tesseract { + +class CubeLineSegmenter { + public: + CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img); + ~CubeLineSegmenter(); + + // Accessor functions + Pix *PostProcessedImage() { + if (init_ == false && Init() == false) { + return NULL; + } + return img_; + } + int ColumnCnt() { + if (init_ == false && Init() == false) { + return NULL; + } + return columns_->n; + } + Box *Column(int col) { + if (init_ == false && Init() == false) { + return NULL; + } + + return columns_->boxa->box[col]; + } + int LineCnt() { + if (init_ == false && Init() == false) { + return NULL; + } + + return line_cnt_; + } + Pixa *ConComps() { + if (init_ == false && Init() == false) { + return NULL; + } + + return con_comps_; + } + Pixaa *Columns() { + if (init_ == false && Init() == false) { + return NULL; + } + + return columns_; + } + inline double AlefHgtEst() { return est_alef_hgt_; } + inline double DotHgtEst() { return est_dot_hgt_; } + Pix *Line(int line, Box **line_box); + + private: + static const float kMinValidLineHgtRatio; + static const int kLineSepMorphMinHgt; + static const int kHgtBins; + static const int kMaxConnCompHgt; + static const int kMaxConnCompWid; + static const int kMaxHorzAspectRatio; + static const int kMaxVertAspectRatio; + static const int kMinWid; + static const int kMinHgt; + static const double kMaxValidLineRatio; + + // Cube Reco context + CubeRecoContext *cntxt_; + // Original image + Pix *orig_img_; + // Post processed image + Pix *img_; + // Init flag + bool init_; + // Output Line and column info + int line_cnt_; + Pixaa *columns_; + Pixa *con_comps_; + Pixa *lines_pixa_; + // Estimates for sizes of ALEF and DOT needed for Arabic analysis + double est_alef_hgt_; + double est_dot_hgt_; + + // Init the page analysis + bool Init(); + // Performs line segmentation + bool LineSegment(); + // Cleanup function + Pix *CleanUp(Pix *pix); + // compute validity ratio for a line + double ValidityRatio(Pix *line_mask_pix, Box *line_box); + // validate line + bool ValidLine(Pix *line_mask_pix, Box *line_box); + // split a line continuously until valid or fail + Pixa *SplitLine(Pix *line_mask_pix, Box *line_box); + // do a desperate attempt at cracking lines + Pixa *CrackLine(Pix *line_mask_pix, Box *line_box); + Pixa *CrackLine(Pix *line_mask_pix, Box *line_box, int line_cnt); + // Checks of a line is too small + bool SmallLine(Box *line_box); + // Compute the connected components in a line + Boxa * ComputeLineConComps(Pix *line_mask_pix, Box *line_box, + Pixa **con_comps_pixa); + // create a union of two arbitrary pix + Pix *PixUnion(Pix *dest_pix, Box *dest_box, Pix *src_pix, Box *src_box); + // create a union of a pixa subset + Pix *Pixa2Pix(Pixa *pixa, Box **dest_box, int start_pix, int pix_cnt); + // create a union of a pixa + Pix *Pixa2Pix(Pixa *pixa, Box **dest_box); + // merges a number of lines into one line given a bounding box and a mask + bool MergeLine(Pix *line_mask_pix, Box *line_box, + Pixa *lines, Boxaa *lines_con_comps); + // Creates new set of lines from the computed columns + bool AddLines(Pixa *lines); + // Estimate the parameters of the font(s) used in the page + bool EstimateFontParams(); + // perform a vertical Closing with the specified threshold + // returning the resulting conn comps as a pixa + Pixa *VerticalClosing(Pix *pix, int thresold, Boxa **boxa); + // Index the specific pixa using RTL reading order + int *IndexRTL(Pixa *pixa); + // Implements a rudimentary page & line segmenter + bool FindLines(); +}; +} + +#endif // CUBE_LINE_SEGMENTER_H diff --git a/cube/cube_object.cpp b/cube/cube_object.cpp new file mode 100644 index 0000000000..48bce64a2c --- /dev/null +++ b/cube/cube_object.cpp @@ -0,0 +1,287 @@ +/********************************************************************** + * File: cube_object.cpp + * Description: Implementation of the Cube Object Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include "cube_object.h" +#include "cube_utils.h" +#include "word_list_lang_model.h" + +namespace tesseract { +CubeObject::CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp) { + Init(); + char_samp_ = char_samp; + cntxt_ = cntxt; +} + +CubeObject::CubeObject(CubeRecoContext *cntxt, IMAGE *img, + int left, int top, int wid, int hgt) { + Init(); + char_samp_ = CubeUtils::CharSampleFromImg(img, left, top, wid, hgt); + own_char_samp_ = true; + cntxt_ = cntxt; +} + +CubeObject::CubeObject(CubeRecoContext *cntxt, Pix *pix, + int left, int top, int wid, int hgt) { + Init(); + char_samp_ = CubeUtils::CharSampleFromPix(pix, left, top, wid, hgt); + own_char_samp_ = true; + cntxt_ = cntxt; +} + +// Data member initialization function +void CubeObject::Init() { + char_samp_ = NULL; + own_char_samp_ = false; + alt_list_ = NULL; + srch_obj_ = NULL; + deslanted_alt_list_ = NULL; + deslanted_srch_obj_ = NULL; + deslanted_ = false; + deslanted_char_samp_ = NULL; + beam_obj_ = NULL; + deslanted_beam_obj_ = NULL; + cntxt_ = NULL; +} + +// Cleanup function +void CubeObject::Cleanup() { + if (alt_list_ != NULL) { + delete alt_list_; + alt_list_ = NULL; + } + + if (deslanted_alt_list_ != NULL) { + delete deslanted_alt_list_; + deslanted_alt_list_ = NULL; + } +} + +CubeObject::~CubeObject() { + if (char_samp_ != NULL && own_char_samp_ == true) { + delete char_samp_; + char_samp_ = NULL; + } + + if (srch_obj_ != NULL) { + delete srch_obj_; + srch_obj_ = NULL; + } + + if (deslanted_srch_obj_ != NULL) { + delete deslanted_srch_obj_; + deslanted_srch_obj_ = NULL; + } + + if (beam_obj_ != NULL) { + delete beam_obj_; + beam_obj_ = NULL; + } + + if (deslanted_beam_obj_ != NULL) { + delete deslanted_beam_obj_; + deslanted_beam_obj_ = NULL; + } + + if (deslanted_char_samp_ != NULL) { + delete deslanted_char_samp_; + deslanted_char_samp_ = NULL; + } + + Cleanup(); +} + +// Actually do the recognition using the specified language mode. If none +// is specified, the default language model in the CubeRecoContext is used. +// Returns the sorted list of alternate answers +// The Word mode determines whether recognition is done as a word or a phrase +WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) { + if (char_samp_ == NULL) { + return NULL; + } + + // clear alt lists + Cleanup(); + + // no specified language model, use the one in the reco context + if (lang_mod == NULL) { + lang_mod = cntxt_->LangMod(); + } + + // normalize if necessary + if (cntxt_->SizeNormalization()) { + Normalize(); + } + + // assume not de-slanted by default + deslanted_ = false; + + // create a beam search object + if (beam_obj_ == NULL) { + beam_obj_ = new BeamSearch(cntxt_, word_mode); + if (beam_obj_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " + "BeamSearch\n"); + return NULL; + } + } + + // create a cube search object + if (srch_obj_ == NULL) { + srch_obj_ = new CubeSearchObject(cntxt_, char_samp_); + if (srch_obj_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " + "CubeSearchObject\n"); + return NULL; + } + } + + // run a beam search against the tesslang model + alt_list_ = beam_obj_->Search(srch_obj_, lang_mod); + + // deslant (if supported by language) and re-reco if probability is low enough + if (cntxt_->HasItalics() == true && + (alt_list_ == NULL || alt_list_->AltCount() < 1 || + alt_list_->AltCost(0) > CubeUtils::Prob2Cost(kMinProbSkipDeslanted))) { + + if (deslanted_beam_obj_ == NULL) { + deslanted_beam_obj_ = new BeamSearch(cntxt_); + if (deslanted_beam_obj_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " + "construct deslanted BeamSearch\n"); + return false; + } + } + + if (deslanted_srch_obj_ == NULL) { + deslanted_char_samp_ = char_samp_->Clone(); + if (deslanted_char_samp_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " + "construct deslanted CharSamp\n"); + return NULL; + } + + if (deslanted_char_samp_->Deslant() == false) { + return NULL; + } + + deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_); + if (deslanted_srch_obj_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " + "construct deslanted CubeSearchObject\n"); + return NULL; + } + } + + // run a beam search against the tesslang model + deslanted_alt_list_ = deslanted_beam_obj_->Search(deslanted_srch_obj_, + lang_mod); + // should we use de-slanted altlist? + if (deslanted_alt_list_ != NULL && deslanted_alt_list_->AltCount() > 0) { + if (alt_list_ == NULL || alt_list_->AltCount() < 1 || + deslanted_alt_list_->AltCost(0) < alt_list_->AltCost(0)) { + deslanted_ = true; + return deslanted_alt_list_; + } + } + } + + return alt_list_; +} + +// Recognize the member char sample as a word +WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) { + return Recognize(lang_mod, true); +} + +// Recognize the member char sample as a word +WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) { + return Recognize(lang_mod, false); +} + +// Computes the cost of a specific string. This is done by performing +// recognition of a language model that allows only the specified word +int CubeObject::WordCost(const char *str) { + WordListLangModel *lang_mod = new WordListLangModel(cntxt_); + if (lang_mod == NULL) { + return WORST_COST; + } + + if (lang_mod->AddString(str) == false) { + delete lang_mod; + return WORST_COST; + } + + // run a beam search against the single string wordlist model + WordAltList *alt_list = RecognizeWord(lang_mod); + delete lang_mod; + + int cost = WORST_COST; + if (alt_list != NULL) { + if (alt_list->AltCount() > 0) { + cost = alt_list->AltCost(0); + } + } + + return cost; +} + +// Normalize the input word bitmap to have a minimum aspect ratio +bool CubeObject::Normalize() { + // create a cube search object + CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_); + if (srch_obj == NULL) { + return false; + } + // Perform over-segmentation + int seg_cnt = srch_obj->SegPtCnt(); + // Only perform normalization if segment count is large enough + if (seg_cnt < kMinNormalizationSegmentCnt) { + delete srch_obj; + return true; + } + // compute the mean AR of the segments + double ar_mean = 0.0; + for (int seg_idx = 0; seg_idx <= seg_cnt; seg_idx++) { + CharSamp *seg_samp = srch_obj->CharSample(seg_idx - 1, seg_idx); + if (seg_samp != NULL && seg_samp->Width() > 0) { + ar_mean += (1.0 * seg_samp->Height() / seg_samp->Width()); + } + } + ar_mean /= (seg_cnt + 1); + // perform normalization if segment AR is too high + if (ar_mean > kMinNormalizationAspectRatio) { + // scale down the image in the y-direction to attain AR + CharSamp *new_samp = char_samp_->Scale(char_samp_->Width(), + 2.0 * char_samp_->Height() / ar_mean, + false); + if (new_samp != NULL) { + // free existing char samp if owned + if (own_char_samp_) { + delete char_samp_; + } + // update with new scaled charsamp and set ownership flag + char_samp_ = new_samp; + own_char_samp_ = true; + } + } + delete srch_obj; + return true; +} +} diff --git a/cube/cube_object.h b/cube/cube_object.h new file mode 100644 index 0000000000..057efc99a4 --- /dev/null +++ b/cube/cube_object.h @@ -0,0 +1,174 @@ +/********************************************************************** + * File: cube_object.h + * Description: Declaration of the Cube Object Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CubeObject class is the main class used to perform recognition of +// a specific char_samp as a single word. +// To recognize a word, a CubeObject is constructed for this word. +// A Call to RecognizeWord is then issued specifying the language model that +// will be used during recognition. If none is specified, the default language +// model in the CubeRecoContext is used. The CubeRecoContext is passed at +// construction time +// +// The typical usage pattern for Cube is shown below: +// +// // Create and initialize Tesseract object and get its +// // CubeRecoContext object (note that Tesseract object owns it, +// // so it will be freed when the Tesseract object is freed). +// tesseract::Tesseract *tess_obj = new tesseract::Tesseract(); +// tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY); +// CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext(); +// CHECK(cntxt != NULL) << "Unable to create a Cube reco context"; +// . +// . +// . +// // Do this to recognize a word in pix whose co-ordinates are +// // (left,top,width,height) +// tesseract::CubeObject *cube_obj; +// cube_obj = new tesseract::CubeObject(cntxt, pix, +// left, top, width, height); +// +// // Get back Cube's list of answers +// tesseract::WordAltList *alt_list = cube_obj->RecognizeWord(); +// CHECK(alt_list != NULL && alt_list->AltCount() > 0); +// +// // Get the string and cost of every alternate +// for (int alt = 0; alt < alt_list->AltCount(); alt++) { +// // Return the result as a UTF-32 string +// string_32 res_str32 = alt_list->Alt(alt); +// // Convert to UTF8 if need-be +// string res_str; +// CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str); +// // Get the string cost. This should get bigger as you go deeper +// // in the list +// int cost = alt_list->AltCost(alt); +// } +// +// // Call this once you are done recognizing this word +// delete cube_obj; +// +// // Call this once you are done recognizing all words with +// // for the current language +// tess_obj->end_tesseract(); +// delete tess_obj; +// +// Note that if the language supports "Italics" (see the CubeRecoContext), the +// RecognizeWord function attempts to de-slant the word. + +#ifndef CUBE_OBJECT_H +#define CUBE_OBJECT_H + +#include "img.h" +#include "char_samp.h" +#include "word_altlist.h" +#include "beam_search.h" +#include "cube_search_object.h" +#include "tess_lang_model.h" +#include "cube_reco_context.h" + +namespace tesseract { + +// minimum aspect ratio needed to normalize a char_samp before recognition +static const float kMinNormalizationAspectRatio = 3.5; +// minimum probability a top alt choice must meet before having +// deslanted processing applied to it +static const float kMinProbSkipDeslanted = 0.25; + +class CubeObject { + public: + // Different flavors of constructor. They just differ in the way the + // word image is specified + CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp); + CubeObject(CubeRecoContext *cntxt, IMAGE *img, + int left, int top, int wid, int hgt); + CubeObject(CubeRecoContext *cntxt, Pix *pix, + int left, int top, int wid, int hgt); + ~CubeObject(); + + // Perform the word recognition using the specified language mode. If none + // is specified, the default language model in the CubeRecoContext is used. + // Returns the sorted list of alternate word answers + WordAltList *RecognizeWord(LangModel *lang_mod = NULL); + // Same as RecognizeWord but recognizes as a phrase + WordAltList *RecognizePhrase(LangModel *lang_mod = NULL); + // Computes the cost of a specific string. This is done by performing + // recognition of a language model that allows only the specified word. + // The alternate list(s) will be permanently modified. + int WordCost(const char *str); + + // Returns the BeamSearch object that resulted from the last call to + // RecognizeWord + inline BeamSearch *BeamObj() const { + return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_); + } + // Returns the WordAltList object that resulted from the last call to + // RecognizeWord + inline WordAltList *AlternateList() const { + return (deslanted_ == true ? deslanted_alt_list_ : alt_list_); + } + // Returns the CubeSearchObject object that resulted from the last call to + // RecognizeWord + inline CubeSearchObject *SrchObj() const { + return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_); + } + // Returns the CharSamp object that resulted from the last call to + // RecognizeWord. Note that this object is not necessarily identical to the + // one passed at construction time as normalization might have occurred + inline CharSamp *CharSample() const { + return (deslanted_ == true ? deslanted_char_samp_ : char_samp_); + } + + // Set the ownership of the CharSamp + inline void SetCharSampOwnership(bool own_char_samp) { + own_char_samp_ = own_char_samp; + } + + protected: + // Normalize the CharSamp if its aspect ratio exceeds the below constant. + bool Normalize(); + + private: + // minimum segment count needed to normalize a char_samp before recognition + static const int kMinNormalizationSegmentCnt = 4; + + // Data member initialization function + void Init(); + // Free alternate lists. + void Cleanup(); + // Perform the actual recognition using the specified language mode. If none + // is specified, the default language model in the CubeRecoContext is used. + // Returns the sorted list of alternate answers. Called by both + // RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false) + WordAltList *Recognize(LangModel *lang_mod, bool word_mode); + + CubeRecoContext *cntxt_; + BeamSearch *beam_obj_; + BeamSearch *deslanted_beam_obj_; + bool offline_mode_; + bool own_char_samp_; + bool deslanted_; + CharSamp *char_samp_; + CharSamp *deslanted_char_samp_; + CubeSearchObject *srch_obj_; + CubeSearchObject *deslanted_srch_obj_; + WordAltList *alt_list_; + WordAltList *deslanted_alt_list_; +}; +} + +#endif // CUBE_OBJECT_H diff --git a/cube/cube_search_object.cpp b/cube/cube_search_object.cpp new file mode 100644 index 0000000000..9d1c7b0dbf --- /dev/null +++ b/cube/cube_search_object.cpp @@ -0,0 +1,455 @@ +/********************************************************************** + * File: cube_search_object.cpp + * Description: Implementation of the Cube Search Object Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "cube_search_object.h" +#include "cube_utils.h" +#include "ndminx.h" + +namespace tesseract { + +const bool CubeSearchObject::kUseCroppedChars = true; + +CubeSearchObject::CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp) + : SearchObject(cntxt) { + init_ = false; + reco_cache_ = NULL; + samp_cache_ = NULL; + segments_ = NULL; + segment_cnt_ = 0; + samp_ = samp; + left_ = 0; + itop_ = 0; + space_cost_ = NULL; + no_space_cost_ = NULL; + wid_ = samp_->Width(); + hgt_ = samp_->Height(); + max_seg_per_char_ = cntxt_->Params()->MaxSegPerChar(); + rtl_ = (cntxt_->ReadingOrder() == CubeRecoContext::R2L); + min_spc_gap_ = + static_cast(hgt_ * cntxt_->Params()->MinSpaceHeightRatio()); + max_spc_gap_ = + static_cast(hgt_ * cntxt_->Params()->MaxSpaceHeightRatio()); +} + +CubeSearchObject::~CubeSearchObject() { + Cleanup(); +} + +// Cleanup +void CubeSearchObject::Cleanup() { + // delete Recognition Cache + if (reco_cache_) { + for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) { + if (reco_cache_[strt_seg]) { + for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) { + if (reco_cache_[strt_seg][end_seg]) { + delete reco_cache_[strt_seg][end_seg]; + } + } + delete []reco_cache_[strt_seg]; + } + } + delete []reco_cache_; + reco_cache_ = NULL; + } + + // delete CharSamp Cache + if (samp_cache_) { + for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) { + if (samp_cache_[strt_seg]) { + for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) { + if (samp_cache_[strt_seg][end_seg]) { + delete samp_cache_[strt_seg][end_seg]; + } + } + delete []samp_cache_[strt_seg]; + } + } + delete []samp_cache_; + samp_cache_ = NULL; + } + + // delete segment list + if (segments_) { + for (int seg = 0; seg < segment_cnt_; seg++) { + if (segments_[seg]) { + delete segments_[seg]; + } + } + delete []segments_; + segments_ = NULL; + } + + if (space_cost_) { + delete []space_cost_; + space_cost_ = NULL; + } + + if (no_space_cost_) { + delete []no_space_cost_; + no_space_cost_ = NULL; + } + + segment_cnt_ = 0; + init_ = false; +} + +// # of segmentation points. One less than the count of segments +int CubeSearchObject::SegPtCnt() { + if (!init_ && !Init()) + return -1; + return segment_cnt_ - 1; +} + +// init and allocate variables, perform segmentation +bool CubeSearchObject::Init() { + if (init_) + return true; + if (!Segment()) { + return false; + } + + // init cache + reco_cache_ = new CharAltList **[segment_cnt_]; + if (reco_cache_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " + "allocate CharAltList array\n"); + return false; + } + + samp_cache_ = new CharSamp **[segment_cnt_]; + if (samp_cache_ == NULL) { + fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " + "allocate CharSamp array\n"); + return false; + } + + for (int seg = 0; seg < segment_cnt_; seg++) { + reco_cache_[seg] = new CharAltList *[segment_cnt_]; + if (reco_cache_[seg] == NULL) { + fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " + "allocate a single segment's CharAltList array\n"); + return false; + } + + memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg])); + + samp_cache_[seg] = new CharSamp *[segment_cnt_]; + if (samp_cache_[seg] == NULL) { + fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " + "allocate a single segment's CharSamp array\n"); + return false; + } + + memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg])); + } + + init_ = true; + return true; +} + +// returns a char sample corresponding to the bitmap between 2 seg pts +CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) { + // init if necessary + if (!init_ && !Init()) + return NULL; + // validate segment range + if (!IsValidSegmentRange(start_pt, end_pt)) + return NULL; + + // look for the samp in the cache + if (samp_cache_ && samp_cache_[start_pt + 1] && + samp_cache_[start_pt + 1][end_pt]) { + return samp_cache_[start_pt + 1][end_pt]; + } + // create a char samp object from the specified range of segments + bool left_most; + bool right_most; + CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1, + end_pt - start_pt, NULL, + &left_most, &right_most, hgt_); + if (!samp) + return NULL; + + if (kUseCroppedChars) { + CharSamp *cropped_samp = samp->Crop(); + // we no longer need the orig sample + delete samp; + if (!cropped_samp) + return NULL; + samp = cropped_samp; + } + + // get the dimensions of the new cropped sample + int char_top = samp->Top(); + int char_wid = samp->Width(); + int char_hgt = samp->Height(); + + // for cursive languages, these features correspond to whether + // the charsamp is at the beginning or end of conncomp + if (cntxt_->Cursive() == true) { + // first and last char flags depend on reading order + bool first_char = rtl_ ? right_most : left_most; + bool last_char = rtl_ ? left_most : right_most; + + samp->SetFirstChar(first_char ? 255 : 0); + samp->SetLastChar(last_char ? 255 : 0); + } else { + // for non cursive languages, these features correspond + // to whether the charsamp is at the begining or end of the word + samp->SetFirstChar((start_pt == -1) ? 255 : 0); + samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0); + } + samp->SetNormTop(255 * char_top / hgt_); + samp->SetNormBottom(255 * (char_top + char_hgt) / hgt_); + samp->SetNormAspectRatio(255 * char_wid / (char_wid + char_hgt)); + + // add to cache & return + samp_cache_[start_pt + 1][end_pt] = samp; + return samp; +} + +Box *CubeSearchObject::CharBox(int start_pt, int end_pt) { + if (!init_ && !Init()) + return NULL; + if (!IsValidSegmentRange(start_pt, end_pt)) { + fprintf(stderr, "Cube ERROR (CubeSearchObject::CharBox): invalid " + "segment range (%d, %d)\n", start_pt, end_pt); + return NULL; + } + + // create a char samp object from the specified range of segments, + // extract its dimensions into a leptonica box, and delete it + bool left_most; + bool right_most; + CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1, + end_pt - start_pt, NULL, + &left_most, &right_most, hgt_); + if (!samp) + return NULL; + if (kUseCroppedChars) { + CharSamp *cropped_samp = samp->Crop(); + delete samp; + if (!cropped_samp) { + return NULL; + } + samp = cropped_samp; + } + Box *box = boxCreate(samp->Left(), samp->Top(), + samp->Width(), samp->Height()); + delete samp; + return box; +} + +// call from Beam Search to return the alt list corresponding to +// recognizing the bitmap between two segmentation pts +CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) { + // init if necessary + if (!init_ && !Init()) { + fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could " + "not initialize CubeSearchObject\n"); + return NULL; + } + + // validate segment range + if (!IsValidSegmentRange(start_pt, end_pt)) { + fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): invalid " + "segment range (%d, %d)\n", start_pt, end_pt); + return NULL; + } + + // look for the recognition results in cache in the cache + if (reco_cache_ && reco_cache_[start_pt + 1] && + reco_cache_[start_pt + 1][end_pt]) { + return reco_cache_[start_pt + 1][end_pt]; + } + + // create the char sample corresponding to the blob + CharSamp *samp = CharSample(start_pt, end_pt); + if (!samp) { + fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could " + "not construct CharSamp\n"); + return NULL; + } + + // recognize the char sample + CharClassifier *char_classifier = cntxt_->Classifier(); + if (char_classifier) { + reco_cache_[start_pt + 1][end_pt] = char_classifier->Classify(samp); + } else { + // no classifer: all characters are equally probable; add a penalty + // that favors 2-segment characters and aspect ratios (w/h) > 1 + fprintf(stderr, "Cube WARNING (CubeSearchObject::RecognizeSegment): cube " + "context has no character classifier!! Inventing a probability " + "distribution.\n"); + int class_cnt = cntxt_->CharacterSet()->ClassCount(); + CharAltList *alt_list = new CharAltList(cntxt_->CharacterSet(), class_cnt); + int seg_cnt = end_pt - start_pt; + double prob_val = (1.0 / class_cnt) * + exp(-abs(seg_cnt - 2.0)) * + exp(-samp->Width() / static_cast(samp->Height())); + + if (alt_list) { + for (int class_idx = 0; class_idx < class_cnt; class_idx++) { + alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val)); + } + reco_cache_[start_pt + 1][end_pt] = alt_list; + } + } + + return reco_cache_[start_pt + 1][end_pt]; +} + +// Perform segmentation of the bitmap by detecting connected components, +// segmenting each connected component using windowed vertical pixel density +// histogram and sorting the resulting segments in reading order +bool CubeSearchObject::Segment() { + if (!samp_) + return false; + segment_cnt_ = 0; + segments_ = samp_->Segment(&segment_cnt_, rtl_, + cntxt_->Params()->HistWindWid(), + cntxt_->Params()->MinConCompSize()); + if (!segments_ || segment_cnt_ <= 0) { + return false; + } + if (segment_cnt_ >= kMaxSegmentCnt) { + return false; + } + return true; +} + +// computes the space and no space costs at gaps between segments +bool CubeSearchObject::ComputeSpaceCosts() { + // init if necessary + if (!init_ && !Init()) + return false; + + // Already computed + if (space_cost_) + return true; + + // No segmentation points + if (segment_cnt_ < 2) + return false; + + // Compute the maximum x to the left of and minimum x to the right of each + // segmentation point + int *max_left_x = new int[segment_cnt_ - 1]; + int *min_right_x = new int[segment_cnt_ - 1]; + if (!max_left_x || !min_right_x) { + delete []min_right_x; + delete []max_left_x; + return false; + } + if (rtl_) { + min_right_x[0] = segments_[0]->Left(); + max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right(); + for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) { + min_right_x[pt_idx] = + MIN(min_right_x[pt_idx - 1], segments_[pt_idx]->Left()); + max_left_x[segment_cnt_ - pt_idx - 2] = + MAX(max_left_x[segment_cnt_ - pt_idx - 1], + segments_[segment_cnt_ - pt_idx - 1]->Right()); + } + } else { + min_right_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Left(); + max_left_x[0] = segments_[0]->Right(); + for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) { + min_right_x[segment_cnt_ - pt_idx - 2] = + MIN(min_right_x[segment_cnt_ - pt_idx - 1], + segments_[segment_cnt_ - pt_idx - 1]->Left()); + max_left_x[pt_idx] = + MAX(max_left_x[pt_idx - 1], segments_[pt_idx]->Right()); + } + } + + // Allocate memory for space and no space costs + // trivial cases + space_cost_ = new int[segment_cnt_ - 1]; + no_space_cost_ = new int[segment_cnt_ - 1]; + if (!space_cost_ || !no_space_cost_) { + delete []min_right_x; + delete []max_left_x; + return false; + } + + // go through all segmentation points determining the horizontal gap between + // the images on both sides of each break points. Use the gap to estimate + // the probability of a space. The probability is modeled a linear function + // of the gap width + for (int pt_idx = 0; pt_idx < (segment_cnt_ - 1); pt_idx++) { + // determine the gap at the segmentation point + int gap = min_right_x[pt_idx] - max_left_x[pt_idx]; + float prob = 0.0; + + // gap is too small => no space + if (gap < min_spc_gap_) { + prob = 0.0; + } else if (gap > max_spc_gap_) { + // gap is too big => definite space + prob = 1.0; + } else { + // gap is somewhere in between, compute probability + prob = (gap - min_spc_gap_) / + static_cast(max_spc_gap_ - min_spc_gap_); + } + + // compute cost of space and non-space + space_cost_[pt_idx] = CubeUtils::Prob2Cost(prob) + + CubeUtils::Prob2Cost(0.1); + no_space_cost_[pt_idx] = CubeUtils::Prob2Cost(1.0 - prob); + } + + delete []min_right_x; + delete []max_left_x; + + return true; +} + +// Returns the cost of having a space before the specified segmentation point +int CubeSearchObject::SpaceCost(int pt_idx) { + if (!space_cost_ && !ComputeSpaceCosts()) { + // Failed to compute costs return a zero prob + return CubeUtils::Prob2Cost(0.0); + } + return space_cost_[pt_idx]; +} + +// Returns the cost of not having a space before the specified +// segmentation point +int CubeSearchObject::NoSpaceCost(int pt_idx) { + // If failed to compute costs, return a 1.0 prob + if (!space_cost_ && !ComputeSpaceCosts()) + return CubeUtils::Prob2Cost(0.0); + return no_space_cost_[pt_idx]; +} + +// Returns the cost of not having any spaces within the specified range +// of segmentation points +int CubeSearchObject::NoSpaceCost(int st_pt, int end_pt) { + // If fail to compute costs, return a 1.0 prob + if (!space_cost_ && !ComputeSpaceCosts()) + return CubeUtils::Prob2Cost(1.0); + int no_spc_cost = 0; + for (int pt_idx = st_pt + 1; pt_idx < end_pt; pt_idx++) + no_spc_cost += NoSpaceCost(pt_idx); + return no_spc_cost; +} +} diff --git a/cube/cube_search_object.h b/cube/cube_search_object.h new file mode 100644 index 0000000000..8452417a69 --- /dev/null +++ b/cube/cube_search_object.h @@ -0,0 +1,122 @@ +/********************************************************************** + * File: cube_search_object.h + * Description: Declaration of the Cube Search Object Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CubeSearchObject class represents a char_samp (a word bitmap) that is +// being searched for characters (or recognizeable entities). +// The Class detects the connected components and peforms an oversegmentation +// on each ConComp. The result of which is a list of segments that are ordered +// in reading order. +// The class provided methods that inquire about the number of segments, the +// CharSamp corresponding to any segment range and the recognition results +// of any segment range +// An object of Class CubeSearchObject is used by the BeamSearch algorithm +// to recognize a CharSamp into a list of word alternates + +#ifndef CUBE_SEARCH_OBJECT_H +#define CUBE_SEARCH_OBJECT_H + +#include "search_object.h" +#include "char_samp.h" +#include "conv_net_classifier.h" +#include "cube_reco_context.h" +#include "allheaders.h" + +namespace tesseract { +class CubeSearchObject : public SearchObject { + public: + CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp); + ~CubeSearchObject(); + + // returns the Segmentation Point count of the CharSamp owned by the class + int SegPtCnt(); + // Recognize the set of segments given by the specified range and return + // a list of possible alternate answers + CharAltList * RecognizeSegment(int start_pt, int end_pt); + // Returns the CharSamp corresponding to the specified segment range + CharSamp *CharSample(int start_pt, int end_pt); + // Returns a leptonica box corresponding to the specified segment range + Box *CharBox(int start_pt, int end_pt); + // Returns the cost of having a space before the specified segmentation pt + int SpaceCost(int seg_pt); + // Returns the cost of not having a space before the specified + // segmentation pt + int NoSpaceCost(int seg_pt); + // Returns the cost of not having any spaces within the specified range + // of segmentation points + int NoSpaceCost(int seg_pt, int end_pt); + + private: + // Maximum reasonable segment count + static const int kMaxSegmentCnt = 128; + // Use cropped samples + static const bool kUseCroppedChars; + + // reading order flag + bool rtl_; + // cached dimensions of char samp + int left_; + int itop_; + int wid_; + int hgt_; + // minimum and maximum and possible inter-segment gaps for spaces + int min_spc_gap_; + int max_spc_gap_; + // initialization flag + bool init_; + // maximum segments per character: Cached from tuning parameters object + int max_seg_per_char_; + // char sample to be processed + CharSamp *samp_; + // segment count + int segment_cnt_; + // segments of the processed char samp + ConComp **segments_; + // Cache data members: + // There are two caches kept; a CharSamp cache and a CharAltList cache + // Each is a 2-D array of CharSamp and CharAltList pointers respectively + // hence the triple pointer. + CharAltList ***reco_cache_; + CharSamp ***samp_cache_; + // Cached costs of space and no-space after every segment. Computed only + // in phrase mode + int *space_cost_; + int *no_space_cost_; + + // init and allocate variables, perform segmentation + bool Init(); + // Cleanup + void Cleanup(); + // Perform segmentation of the bitmap by detecting connected components, + // segmenting each connected component using windowed vertical pixel density + // histogram and sorting the resulting segments in reading order + // Returns true on success + bool Segment(); + // validate the segment ranges. + inline bool IsValidSegmentRange(int start_pt, int end_pt) { + return (end_pt > start_pt && start_pt >= -1 && start_pt < segment_cnt_ && + end_pt >= 0 && end_pt <= segment_cnt_ && + end_pt <= (start_pt + max_seg_per_char_)); + } + // computes the space and no space costs at gaps between segments + // return true on sucess + bool ComputeSpaceCosts(); +}; +} + +#endif // CUBE_SEARCH_OBJECT_H diff --git a/cube/cube_tuning_params.cpp b/cube/cube_tuning_params.cpp new file mode 100644 index 0000000000..71b857d82e --- /dev/null +++ b/cube/cube_tuning_params.cpp @@ -0,0 +1,218 @@ +/********************************************************************** + * File: cube_tuning_params.cpp + * Description: Implementation of the CubeTuningParameters Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include "cube_tuning_params.h" +#include "tuning_params.h" +#include "cube_utils.h" + +namespace tesseract { +CubeTuningParams::CubeTuningParams() { + reco_wgt_ = 1.0; + size_wgt_ = 1.0; + char_bigrams_wgt_ = 1.0; + word_unigrams_wgt_ = 0.0; + max_seg_per_char_ = 8; + beam_width_ = 32; + tp_classifier_ = NN; + tp_feat_ = BMP; + conv_grid_size_ = 32; + hist_wind_wid_ = 0; + max_word_aspect_ratio_ = 10.0; + min_space_height_ratio_ = 0.2; + max_space_height_ratio_ = 0.3; + min_con_comp_size_ = 0; + combiner_run_thresh_ = 1.0; + combiner_classifier_thresh_ = 0.5; + ood_wgt_ = 1.0; + num_wgt_ = 1.0; + +} + +CubeTuningParams::~CubeTuningParams() { +} + +// Create an Object given the data file path and the language by loading +// the approporiate file +CubeTuningParams *CubeTuningParams::Create(const string &data_file_path, + const string &lang) { + CubeTuningParams *obj = new CubeTuningParams(); + if (!obj) { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to " + "allocate new tuning params object\n"); + return NULL; + } + + string tuning_params_file; + tuning_params_file = data_file_path + lang; + tuning_params_file += ".cube.params"; + + if (!obj->Load(tuning_params_file)) { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to " + "load tuning parameters from %s\n", tuning_params_file.c_str()); + delete obj; + obj = NULL; + } + + return obj; +} + +// Loads the params file +bool CubeTuningParams::Load(string tuning_params_file) { + // load the string into memory + string param_str; + + if (CubeUtils::ReadFileToString(tuning_params_file, ¶m_str) == false) { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unable to read " + "file %s\n", tuning_params_file.c_str()); + return false; + } + + // split into lines + vector str_vec; + CubeUtils::SplitStringUsing(param_str, "\r\n", &str_vec); + if (str_vec.size() < 8) { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): number of rows " + "in parameter file is too low\n"); + return false; + } + + // for all entries + for (int entry = 0; entry < str_vec.size(); entry++) { + // tokenize + vector str_tok; + + // should be only two tokens + CubeUtils::SplitStringUsing(str_vec[entry], "=", &str_tok); + if (str_tok.size() != 2) { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format in " + "line: %s.\n", str_vec[entry].c_str()); + return false; + } + + double val = 0; + char peekchar = (str_tok[1].c_str())[0]; + if ((peekchar >= '0' && peekchar <= '9') || + peekchar == '-' || peekchar == '+' || + peekchar == '.') { + // read the value + if (sscanf(str_tok[1].c_str(), "%lf", &val) != 1) { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format " + "in line: %s.\n", str_vec[entry].c_str()); + return false; + } + } + + // token type + if (str_tok[0] == "RecoWgt") { + reco_wgt_ = val; + } else if (str_tok[0] == "SizeWgt") { + size_wgt_ = val; + } else if (str_tok[0] == "CharBigramsWgt") { + char_bigrams_wgt_ = val; + } else if (str_tok[0] == "WordUnigramsWgt") { + word_unigrams_wgt_ = val; + } else if (str_tok[0] == "MaxSegPerChar") { + max_seg_per_char_ = static_cast(val); + } else if (str_tok[0] == "BeamWidth") { + beam_width_ = static_cast(val); + } else if (str_tok[0] == "Classifier") { + if (str_tok[1] == "NN") { + tp_classifier_ = TuningParams::NN; + } else if (str_tok[1] == "HYBRID_NN") { + tp_classifier_ = TuningParams::HYBRID_NN; + } else { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid " + "classifier type in line: %s.\n", str_vec[entry].c_str()); + return false; + } + } else if (str_tok[0] == "FeatureType") { + if (str_tok[1] == "BMP") { + tp_feat_ = TuningParams::BMP; + } else if (str_tok[1] == "CHEBYSHEV") { + tp_feat_ = TuningParams::CHEBYSHEV; + } else if (str_tok[1] == "HYBRID") { + tp_feat_ = TuningParams::HYBRID; + } else { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid feature " + "type in line: %s.\n", str_vec[entry].c_str()); + return false; + } + } else if (str_tok[0] == "ConvGridSize") { + conv_grid_size_ = static_cast(val); + } else if (str_tok[0] == "HistWindWid") { + hist_wind_wid_ = val; + } else if (str_tok[0] == "MinConCompSize") { + min_con_comp_size_ = val; + } else if (str_tok[0] == "MaxWordAspectRatio") { + max_word_aspect_ratio_ = val; + } else if (str_tok[0] == "MinSpaceHeightRatio") { + min_space_height_ratio_ = val; + } else if (str_tok[0] == "MaxSpaceHeightRatio") { + max_space_height_ratio_ = val; + } else if (str_tok[0] == "CombinerRunThresh") { + combiner_run_thresh_ = val; + } else if (str_tok[0] == "CombinerClassifierThresh") { + combiner_classifier_thresh_ = val; + } else if (str_tok[0] == "OODWgt") { + ood_wgt_ = val; + } else if (str_tok[0] == "NumWgt") { + num_wgt_ = val; + } else { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unknown parameter " + "in line: %s.\n", str_vec[entry].c_str()); + return false; + } + } + + return true; +} + +// Save the parameters to a file +bool CubeTuningParams::Save(string file_name) { + FILE *params_file = fopen(file_name.c_str(), "w"); + if (params_file == NULL) { + fprintf(stderr, "Cube ERROR (CubeTuningParams::Save): error opening file " + "%s for write.\n", file_name.c_str()); + return false; + } + + fprintf(params_file, "RecoWgt=%.4f\n", reco_wgt_); + fprintf(params_file, "SizeWgt=%.4f\n", size_wgt_); + fprintf(params_file, "CharBigramsWgt=%.4f\n", char_bigrams_wgt_); + fprintf(params_file, "WordUnigramsWgt=%.4f\n", word_unigrams_wgt_); + fprintf(params_file, "MaxSegPerChar=%d\n", max_seg_per_char_); + fprintf(params_file, "BeamWidth=%d\n", beam_width_); + fprintf(params_file, "ConvGridSize=%d\n", conv_grid_size_); + fprintf(params_file, "HistWindWid=%d\n", hist_wind_wid_); + fprintf(params_file, "MinConCompSize=%d\n", min_con_comp_size_); + fprintf(params_file, "MaxWordAspectRatio=%.4f\n", max_word_aspect_ratio_); + fprintf(params_file, "MinSpaceHeightRatio=%.4f\n", min_space_height_ratio_); + fprintf(params_file, "MaxSpaceHeightRatio=%.4f\n", max_space_height_ratio_); + fprintf(params_file, "CombinerRunThresh=%.4f\n", combiner_run_thresh_); + fprintf(params_file, "CombinerClassifierThresh=%.4f\n", + combiner_classifier_thresh_); + fprintf(params_file, "OODWgt=%.4f\n", ood_wgt_); + fprintf(params_file, "NumWgt=%.4f\n", num_wgt_); + + fclose(params_file); + return true; +} +} diff --git a/cube/cube_tuning_params.h b/cube/cube_tuning_params.h new file mode 100644 index 0000000000..8b1258724e --- /dev/null +++ b/cube/cube_tuning_params.h @@ -0,0 +1,57 @@ +/********************************************************************** + * File: cube_tuning_params.h + * Description: Declaration of the CubeTuningParameters Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CubeTuningParams class abstracts all the parameters that are used +// in Cube and are tuned/learned during the training process. Inherits +// from the TuningParams class. + +#ifndef CUBE_TUNING_PARAMS_H +#define CUBE_TUNING_PARAMS_H + +#include +#include "tuning_params.h" + +namespace tesseract { +class CubeTuningParams : public TuningParams { + public: + CubeTuningParams(); + ~CubeTuningParams(); + + // Accessor functions + inline double OODWgt() { return ood_wgt_; } + inline double NumWgt() { return num_wgt_; } + + inline void SetOODWgt(double wgt) { ood_wgt_ = wgt; } + inline void SetNumWgt(double wgt) { num_wgt_ = wgt; } + + // Create an object given the data file path and the language by loading + // the approporiate file + static CubeTuningParams * Create(const string &data_file, + const string &lang); + // Save and load the tuning parameters to a specified file + bool Save(string file_name); + bool Load(string file_name); + + private: + double ood_wgt_; + double num_wgt_; +}; +} + +#endif // CUBE_TUNING_PARAMS_H diff --git a/cube/cube_utils.cpp b/cube/cube_utils.cpp new file mode 100644 index 0000000000..7be5986d85 --- /dev/null +++ b/cube/cube_utils.cpp @@ -0,0 +1,484 @@ +/********************************************************************** + * File: cube_utils.cpp + * Description: Implementation of the Cube Utilities Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include "cube_utils.h" +#include "char_set.h" +#include "unichar.h" + +namespace tesseract { +CubeUtils::CubeUtils() { +} + +CubeUtils::~CubeUtils() { +} + +// convert a prob to a cost (-ve log prob) +int CubeUtils::Prob2Cost(double prob_val) { + if (prob_val < MIN_PROB) { + return MIN_PROB_COST; + } + return static_cast(-log(prob_val) * PROB2COST_SCALE); +} + +// converts a cost to probability +double CubeUtils::Cost2Prob(int cost) { + return exp(-cost / PROB2COST_SCALE); +} + +// computes the length of a NULL terminated char_32 string +int CubeUtils::StrLen(const char_32 *char_32_ptr) { + if (char_32_ptr == NULL) { + return 0; + } + int len = -1; + while (char_32_ptr[++len]); + return len; +} + +// compares two char_32 strings +int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) { + const char_32 *pch1 = str1; + const char_32 *pch2 = str2; + + for (; (*pch1) != 0 && (*pch2) != 0; pch1++, pch2++) { + if ((*pch1) != (*pch2)) { + return (*pch1) - (*pch2); + } + } + + if ((*pch1) == 0) { + if ((*pch2) == 0) { + return 0; + } else { + return -1; + } + } else { + return 1; + } +} + +// Duplicates a 32-bit char buffer +char_32 *CubeUtils::StrDup(const char_32 *str32) { + int len = StrLen(str32); + char_32 *new_str = new char_32[len + 1]; + if (new_str == NULL) { + return NULL; + } + memcpy(new_str, str32, len * sizeof(*str32)); + new_str[len] = 0; + return new_str; +} + +// creates a raw buffer from the specified location of the image +unsigned char *CubeUtils::GetImageData(IMAGE *img, int left, + int top, int wid, int hgt) { + // skip invalid dimensions + if (left < 0 || top < 0 || wid < 0 || hgt < 0 || + (left + wid) > img->get_xsize() || + (top + hgt) > img->get_ysize()) { + return NULL; + } + + // copy the char img to a temp buffer + unsigned char *temp_buff = new unsigned char[wid * hgt]; + if (temp_buff == NULL) { + return NULL; + } + + IMAGELINE line; + line.init(wid); + + for (int y = 0, off = 0; y < hgt ; y++) { + img->get_line(left, img->get_ysize() - 1 - y - top, wid, &line, 0); + for (int x = 0; x < wid; x++, off++) { + temp_buff[off] = line.pixels[x] ? 255 : 0; + } + } + + return temp_buff; +} + +// creates a char samp from a specified portion of the image +CharSamp *CubeUtils::CharSampleFromImg(IMAGE *img, + int left, int top, + int wid, int hgt) { + // get the raw img data from the image + unsigned char *temp_buff = GetImageData(img, left, top, wid, hgt); + if (temp_buff == NULL) { + return NULL; + } + + // create a char samp from temp buffer + CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff); + // clean up temp buffer + delete []temp_buff; + return char_samp; +} + +// creates a char samp from a specified portion of the image +CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top, + int wid, int hgt) { + // get the raw img data from the image + unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt); + if (temp_buff == NULL) { + return NULL; + } + + // create a char samp from temp buffer + CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff); + + // clean up temp buffer + delete []temp_buff; + return char_samp; +} + +// create a B/W image from a char_sample +IMAGE *CubeUtils::ImageFromCharSample(CharSamp *char_samp) { + // parameter check + if (char_samp == NULL) { + return NULL; + } + + // get the raw data + int stride = char_samp->Stride(), + wid = char_samp->Width(), + hgt = char_samp->Height(); + + unsigned char *buff = char_samp->RawData(); + if (buff == NULL) { + return NULL; + } + + // create a new image object + IMAGE *img = new IMAGE(); + if (img == NULL) { + return NULL; + } + + // create a blank B/W image + if (img->create(wid, hgt, 1) == -1) { + delete img; + return NULL; + } + + // copy the contents + IMAGELINE line; + line.init(wid); + + for (int y = 0, off = 0; y < hgt ; y++, off += stride) { + for (int x = 0; x < wid; x++) { + line.pixels[x] = (buff[off + x] == 0) ? 0 : 1; + } + + img->fast_put_line(0, hgt - 1 - y, wid, &line); + } + + return img; +} + +// create a B/W image from a char_sample +Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) { + // parameter check + if (char_samp == NULL) { + return NULL; + } + + // get the raw data + int stride = char_samp->Stride(); + int wid = char_samp->Width(); + int hgt = char_samp->Height(); + + Pix *pix = pixCreate(wid, hgt, 1); + if (pix == NULL) { + return NULL; + } + + // copy the contents + unsigned char *line = char_samp->RawData(); + for (int y = 0; y < hgt ; y++, line += stride) { + for (int x = 0; x < wid; x++) { + if (line[x] != 0) { + pixSetPixel(pix, x, y, 0); + } else { + pixSetPixel(pix, x, y, 255); + } + } + } + + return pix; +} + +// creates a raw buffer from the specified location of the pix +unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top, + int wid, int hgt) { + // skip invalid dimensions + if (left < 0 || top < 0 || wid < 0 || hgt < 0 || + (left + wid) > pix->w || (top + hgt) > pix->h || + pix->d != 1) { + return NULL; + } + + // copy the char img to a temp buffer + unsigned char *temp_buff = new unsigned char[wid * hgt]; + if (temp_buff == NULL) { + return NULL; + } + + l_int32 w; + l_int32 h; + l_int32 d; + l_int32 wpl; + l_uint32 *line; + l_uint32 *data; + + pixGetDimensions(pix, &w, &h, &d); + wpl = pixGetWpl(pix); + data = pixGetData(pix); + line = data + (top * wpl); + + for (int y = 0, off = 0; y < hgt ; y++) { + for (int x = 0; x < wid; x++, off++) { + temp_buff[off] = GET_DATA_BIT(line, x + left) ? 0 : 255; + } + line += wpl; + } + return temp_buff; +} + +// read file contents to a string +bool CubeUtils::ReadFileToString(const string &file_name, string *str) { + str->clear(); + FILE *fp = fopen(file_name.c_str(), "r"); + if (fp == NULL) { + return false; + } + + // get the size of the size + fseek(fp, 0, SEEK_END); + int file_size = ftell(fp); + if (file_size < 1) { + fclose(fp); + return false; + } + // adjust string size + str->reserve(file_size); + // read the contents + rewind(fp); + char *buff = new char[file_size]; + if (buff == NULL) { + fclose(fp); + return false; + } + int read_bytes = fread(buff, 1, static_cast(file_size), fp); + if (read_bytes == file_size) { + str->append(buff, file_size); + } + delete []buff; + fclose(fp); + return (read_bytes == file_size); +} + +// splits a string into vectors based on specified delimiters +void CubeUtils::SplitStringUsing(const string &str, + const string &delims, + vector *str_vec) { + // Optimize the common case where delims is a single character. + if (delims[0] != '\0' && delims[1] == '\0') { + char c = delims[0]; + const char* p = str.data(); + const char* end = p + str.size(); + while (p != end) { + if (*p == c) { + ++p; + } else { + const char* start = p; + while (++p != end && *p != c); + str_vec->push_back(string(start, p - start)); + } + } + return; + } + + string::size_type begin_index, end_index; + begin_index = str.find_first_not_of(delims); + while (begin_index != string::npos) { + end_index = str.find_first_of(delims, begin_index); + if (end_index == string::npos) { + str_vec->push_back(str.substr(begin_index)); + return; + } + str_vec->push_back(str.substr(begin_index, (end_index - begin_index))); + begin_index = str.find_first_not_of(delims, end_index); + } +} + +// UTF-8 to UTF-32 convesion functions +void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) { + str32->clear(); + int len = strlen(utf8_str); + int step = 0; + for (int ch = 0; ch < len; ch += step) { + step = UNICHAR::utf8_step(utf8_str + ch); + if (step > 0) { + UNICHAR uni_ch(utf8_str + ch, step); + (*str32) += uni_ch.first_uni(); + } + } +} + +// UTF-8 to UTF-32 convesion functions +void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) { + str->clear(); + for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) { + UNICHAR uni_ch((*ch_32)); + char *utf8 = uni_ch.utf8_str(); + if (utf8 != NULL) { + (*str) += utf8; + delete []utf8; + } + } +} + +bool CubeUtils::IsCaseInvariant(const char_32 *str32, CharSet *char_set, + UNICHARSET *unicharset) { + bool all_one_case = true; + bool capitalized; + bool prev_upper; + bool prev_lower; + bool first_upper; + bool first_lower; + bool cur_upper; + bool cur_lower; + + string str8; + if (!char_set || !unicharset || !char_set->SharedUnicharset()) { + // If cube char_set or tesseract unicharset are missing, or + // unicharset is not shared, then use C-locale-dependent functions + // on UTF8 characters to determine case properties. + UTF32ToUTF8(str32, &str8); + first_upper = isupper(str8[0]); + first_lower = islower(str8[0]); + if (first_upper) + capitalized = true; + prev_upper = first_upper; + prev_lower = islower(str8[0]); + for (int c = 1; c < str8.length(); ++c) { + cur_upper = isupper(str8[c]); + cur_lower = islower(str8[c]); + if ((prev_upper && cur_lower) || (prev_lower && cur_upper)) + all_one_case = false; + if (cur_upper) + capitalized = false; + prev_upper = cur_upper; + prev_lower = cur_lower; + } + } else { + // Use UNICHARSET functions to determine case properties + first_upper = unicharset->get_isupper(char_set->UnicharID(str32[0])); + first_lower = unicharset->get_islower(char_set->UnicharID(str32[0])); + if (first_upper) + capitalized = true; + prev_upper = first_upper; + prev_lower = unicharset->get_islower(char_set->UnicharID(str32[0])); + + for (int c = 1; c < StrLen(str32); ++c) { + cur_upper = unicharset->get_isupper(char_set->UnicharID(str32[c])); + cur_lower = unicharset->get_islower(char_set->UnicharID(str32[c])); + if ((prev_upper && cur_lower) || (prev_lower && cur_upper)) + all_one_case = false; + if (cur_upper) + capitalized = false; + prev_upper = cur_upper; + prev_lower = cur_lower; + } + } + return all_one_case || capitalized; +} + +char_32 *CubeUtils::ToLower(const char_32 *str32, CharSet *char_set, + UNICHARSET *unicharset) { + if (!char_set || !unicharset || !char_set->SharedUnicharset()) { + return NULL; + } + int len = StrLen(str32); + char_32 *lower = new char_32[len + 1]; + if (!lower) + return NULL; + for (int i = 0; i < len; ++i) { + char_32 ch = str32[i]; + if (ch == INVALID_UNICHAR_ID) { + delete [] lower; + return NULL; + } + // convert upper-case characters to lower-case + if (unicharset->get_isupper(char_set->UnicharID(ch))) { + UNICHAR_ID uid_lower = + unicharset->get_other_case(char_set->UnicharID(ch)); + const char_32 *str32_lower = char_set->ClassString(uid_lower); + // expect lower-case version of character to be a single character + if (!str32_lower || StrLen(str32_lower) != 1) { + delete [] lower; + return NULL; + } + lower[i] = str32_lower[0]; + } else { + lower[i] = ch; + } + } + lower[len] = 0; + return lower; +} + +char_32 *CubeUtils::ToUpper(const char_32 *str32, CharSet *char_set, + UNICHARSET *unicharset) { + if (!char_set || !unicharset || !char_set->SharedUnicharset()) { + return NULL; + } + int len = StrLen(str32); + char_32 *upper = new char_32[len + 1]; + if (!upper) + return NULL; + for (int i = 0; i < len; ++i) { + char_32 ch = str32[i]; + if (ch == INVALID_UNICHAR_ID) { + delete [] upper; + return NULL; + } + // convert lower-case characters to upper-case + if (unicharset->get_islower(char_set->UnicharID(ch))) { + UNICHAR_ID uid_upper = + unicharset->get_other_case(char_set->UnicharID(ch)); + const char_32 *str32_upper = char_set->ClassString(uid_upper); + // expect upper-case version of character to be a single character + if (!str32_upper || StrLen(str32_upper) != 1) { + delete [] upper; + return NULL; + } + upper[i] = str32_upper[0]; + } else { + upper[i] = ch; + } + } + upper[len] = 0; + return upper; +} +} // namespace tesseract diff --git a/cube/cube_utils.h b/cube/cube_utils.h new file mode 100644 index 0000000000..32f7d6bc6e --- /dev/null +++ b/cube/cube_utils.h @@ -0,0 +1,97 @@ +/********************************************************************** + * File: cube_utils.h + * Description: Declaration of the Cube Utilities Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + *(C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0(the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The CubeUtils class provides miscellaneous utility and helper functions +// to the rest of the Cube Engine + +#ifndef CUBE_UTILS_H +#define CUBE_UTILS_H + +#include +#include + +#include "allheaders.h" +#include "const.h" +#include "char_set.h" +#include "char_samp.h" +#include "img.h" + +namespace tesseract { +class CubeUtils { + public: + CubeUtils(); + ~CubeUtils(); + + // Converts a probability value to a cost by getting the -log() of the + // probability value to a known base + static int Prob2Cost(double prob_val); + // Converts a cost to probability by getting the exp(-normalized cost) + static double Cost2Prob(int cost); + // Computes the length of a 32-bit char buffer + static int StrLen(const char_32 *str); + // Compares two 32-bit char buffers + static int StrCmp(const char_32 *str1, const char_32 *str2); + // Duplicates a 32-bit char buffer + static char_32 *StrDup(const char_32 *str); + // Creates a CharSamp from an IMAGE and a bounding box + static CharSamp *CharSampleFromImg(IMAGE *img, + int left, int top, int wid, int hgt); + // Creates a CharSamp from an Pix and a bounding box + static CharSamp *CharSampleFromPix(Pix *pix, + int left, int top, int wid, int hgt); + // Creates an IMAGE from a CharSamp + static IMAGE *ImageFromCharSample(CharSamp *char_samp); + // Creates a Pix from a CharSamp + static Pix *PixFromCharSample(CharSamp *char_samp); + // read the contents of a file to a string + static bool ReadFileToString(const string &file_name, string *str); + // split a string into vectors using any of the specified delimiters + static void SplitStringUsing(const string &str, const string &delims, + vector *str_vec); + // UTF-8 to UTF-32 convesion functions + static void UTF8ToUTF32(const char *utf8_str, string_32 *str32); + static void UTF32ToUTF8(const char_32 *utf32_str, string *str); + // Returns true if input word has either 1) all-one-case, or 2) + // first character upper-case, and remaining characters lower-case. + // If char_set and unicharset are not NULL, uses tesseract's unicharset + // functions to determine case properties. Otherwise, uses + // C-locale-dependent functions, which may be unreliable on + // non-ASCII characters. + static bool IsCaseInvariant(const char_32 *str32, CharSet *char_set, + UNICHARSET *unicharset); + // Returns char_32 pointer to the lower-case-transformed version of + // the input string or NULL on error. If char_set or unicharset are + // NULL, or tesseract and cube do not share unicharsets, returns + // NULL. Return array must be freed by caller. + static char_32 *ToLower(const char_32 *str32, CharSet *char_set, + UNICHARSET *unicharset); + // Returns char_32 pointer to the upper-case-transformed version of + // the input string or NULL on error. If char_set or unicharset are + // NULL, or tesseract and cube do not share unicharsets, returns + // NULL. Return array must be freed by caller. + static char_32 *ToUpper(const char_32 *str32, CharSet *char_set, + UNICHARSET *unicharset); + private: + static unsigned char *GetImageData(IMAGE *img, + int left, int top, int wid, int hgt); + static unsigned char *GetImageData(Pix *pix, + int left, int top, int wid, int hgt); +}; +} // namespace tesseract +#endif // CUBE_UTILS_H diff --git a/cube/feature_base.h b/cube/feature_base.h new file mode 100644 index 0000000000..032bc73cf4 --- /dev/null +++ b/cube/feature_base.h @@ -0,0 +1,55 @@ +/********************************************************************** + * File: feature_base.h + * Description: Declaration of the Feature Base Class + * Author: Ping Ping (xiupingping), Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The FeatureBase class is the base class for any Feature Extraction class +// It provided 3 pure virtual functions (to inherit): +// 1- FeatureCnt: A method to returns the count of features +// 2- ComputeFeatures: A method to compute the features for a given CharSamp +// 3- ComputeFeatureBitmap: A method to render a visualization of the features +// to a CharSamp. This is mainly used by visual-debuggers + +#ifndef FEATURE_BASE_H +#define FEATURE_BASE_H + +#include "char_samp.h" +#include "tuning_params.h" + +namespace tesseract { +class FeatureBase { + public: + explicit FeatureBase(TuningParams *params) + : params_(params) { + } + virtual ~FeatureBase() {} + + // Compute the features for a given CharSamp + virtual bool ComputeFeatures(CharSamp *char_samp, float *features) = 0; + // Render a visualization of the features to a CharSamp. + // This is mainly used by visual-debuggers + virtual CharSamp *ComputeFeatureBitmap(CharSamp *char_samp) = 0; + // Returns the count of features + virtual int FeatureCnt() = 0; + + protected: + TuningParams *params_; +}; +} + +#endif // FEATURE_BASE_H + diff --git a/cube/feature_bmp.cpp b/cube/feature_bmp.cpp new file mode 100644 index 0000000000..06e18798f2 --- /dev/null +++ b/cube/feature_bmp.cpp @@ -0,0 +1,50 @@ +/********************************************************************** + * File: feature_bmp.cpp + * Description: Implementation of the Bitmap Feature Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include "feature_base.h" +#include "feature_bmp.h" +#include "cube_utils.h" +#include "const.h" +#include "char_samp.h" + +namespace tesseract { + +FeatureBmp::FeatureBmp(TuningParams *params) + :FeatureBase(params) { + conv_grid_size_ = params->ConvGridSize(); +} + +FeatureBmp::~FeatureBmp() { +} + +// Render a visualization of the features to a CharSamp. +// This is mainly used by visual-debuggers +CharSamp *FeatureBmp::ComputeFeatureBitmap(CharSamp *char_samp) { + return char_samp->Scale(conv_grid_size_, conv_grid_size_); +} + +// Compute the features for a given CharSamp +bool FeatureBmp::ComputeFeatures(CharSamp *char_samp, float *features) { + return char_samp->ComputeFeatures(conv_grid_size_, features); +} +} + diff --git a/cube/feature_bmp.h b/cube/feature_bmp.h new file mode 100644 index 0000000000..2a84941b56 --- /dev/null +++ b/cube/feature_bmp.h @@ -0,0 +1,53 @@ +/********************************************************************** + * File: feature_bmp.h + * Description: Declaration of the Bitmap Feature Class + * Author: PingPing xiu (xiupingping) & Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The FeatureBmp class implements a Bitmap feature extractor class. It +// inherits from the FeatureBase class +// The Bitmap feature vectors is the the bitmap of the specified CharSamp +// scaled to a fixed grid size and then augmented by a 5 aux features that +// describe the size, aspect ration and placement within a word + +#ifndef FEATURE_BMP_H +#define FEATURE_BMP_H + +#include "char_samp.h" +#include "feature_base.h" + +namespace tesseract { +class FeatureBmp : public FeatureBase { + public: + explicit FeatureBmp(TuningParams *params); + virtual ~FeatureBmp(); + // Render a visualization of the features to a CharSamp. + // This is mainly used by visual-debuggers + virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp); + // Compute the features for a given CharSamp + virtual bool ComputeFeatures(CharSamp *samp, float *features); + // Returns the count of features + virtual int FeatureCnt() { + return 5 + (conv_grid_size_ * conv_grid_size_); + } + + protected: + // grid size, cached from the TuningParams object + int conv_grid_size_; +}; +} + +#endif // FEATURE_BMP_H diff --git a/cube/feature_chebyshev.cpp b/cube/feature_chebyshev.cpp new file mode 100644 index 0000000000..70f77dc73c --- /dev/null +++ b/cube/feature_chebyshev.cpp @@ -0,0 +1,144 @@ +/********************************************************************** + * File: feature_chebyshev.cpp + * Description: Implementation of the Chebyshev coefficients Feature Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include "feature_base.h" +#include "feature_chebyshev.h" +#include "cube_utils.h" +#include "const.h" +#include "char_samp.h" + +#ifdef WIN32 +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif +#endif + +namespace tesseract { + +FeatureChebyshev::FeatureChebyshev(TuningParams *params) + : FeatureBase(params) { +} + +FeatureChebyshev::~FeatureChebyshev() { +} + +// Render a visualization of the features to a CharSamp. +// This is mainly used by visual-debuggers +CharSamp *FeatureChebyshev::ComputeFeatureBitmap(CharSamp *char_samp) { + return char_samp; +} + +// Compute Chebyshev coefficients for the specified vector +void FeatureChebyshev::ChebyshevCoefficients(const vector &input, + int coeff_cnt, float *coeff) { + // re-sample function + int input_range = (input.size() - 1); + vector resamp(coeff_cnt); + for (int samp_idx = 0; samp_idx < coeff_cnt; samp_idx++) { + // compute sampling position + float samp_pos = input_range * + (1 + cos(M_PI * (samp_idx + 0.5) / coeff_cnt)) / 2; + // interpolate + int samp_start = static_cast(samp_pos); + int samp_end = static_cast(samp_pos + 0.5); + float func_delta = input[samp_end] - input[samp_start]; + resamp[samp_idx] = input[samp_start] + + ((samp_pos - samp_start) * func_delta); + } + // compute the coefficients + float normalizer = 2.0 / coeff_cnt; + for (int coeff_idx = 0; coeff_idx < coeff_cnt; coeff_idx++, coeff++) { + double sum = 0.0; + for (int samp_idx = 0; samp_idx < coeff_cnt; samp_idx++) { + sum += resamp[samp_idx] * cos(M_PI * coeff_idx * (samp_idx + 0.5) / + coeff_cnt); + } + (*coeff) = (normalizer * sum); + } +} + +// Compute the features of a given CharSamp +bool FeatureChebyshev::ComputeFeatures(CharSamp *char_samp, float *features) { + return ComputeChebyshevCoefficients(char_samp, features); +} + +// Compute the Chebyshev coefficients of a given CharSamp +bool FeatureChebyshev::ComputeChebyshevCoefficients(CharSamp *char_samp, + float *features) { + if (char_samp->NormBottom() <= 0) { + return false; + } + unsigned char *raw_data = char_samp->RawData(); + int stride = char_samp->Stride(); + // compute the height of the word + int word_hgt = (255 * (char_samp->Top() + char_samp->Height()) / + char_samp->NormBottom()); + // compute left & right profiles + vector left_profile(word_hgt, 0.0); + vector right_profile(word_hgt, 0.0); + unsigned char *line_data = raw_data; + for (int y = 0; y < char_samp->Height(); y++, line_data += stride) { + int min_x = char_samp->Width(); + int max_x = -1; + for (int x = 0; x < char_samp->Width(); x++) { + if (line_data[x] == 0) { + UpdateRange(x, &min_x, &max_x); + } + } + left_profile[char_samp->Top() + y] = + 1.0 * (min_x == char_samp->Width() ? 0 : (min_x + 1)) / + char_samp->Width(); + right_profile[char_samp->Top() + y] = + 1.0 * (max_x == -1 ? 0 : char_samp->Width() - max_x) / + char_samp->Width(); + } + + // compute top and bottom profiles + vector top_profile(char_samp->Width(), 0); + vector bottom_profile(char_samp->Width(), 0); + for (int x = 0; x < char_samp->Width(); x++) { + int min_y = word_hgt; + int max_y = -1; + line_data = raw_data; + for (int y = 0; y < char_samp->Height(); y++, line_data += stride) { + if (line_data[x] == 0) { + UpdateRange(y + char_samp->Top(), &min_y, &max_y); + } + } + top_profile[x] = 1.0 * (min_y == word_hgt ? 0 : (min_y + 1)) / word_hgt; + bottom_profile[x] = 1.0 * (max_y == -1 ? 0 : (word_hgt - max_y)) / word_hgt; + } + + // compute the chebyshev coefficients of each profile + ChebyshevCoefficients(left_profile, kChebychevCoefficientCnt, features); + ChebyshevCoefficients(top_profile, kChebychevCoefficientCnt, + features + kChebychevCoefficientCnt); + ChebyshevCoefficients(right_profile, kChebychevCoefficientCnt, + features + (2 * kChebychevCoefficientCnt)); + ChebyshevCoefficients(bottom_profile, kChebychevCoefficientCnt, + features + (3 * kChebychevCoefficientCnt)); + return true; +} +} // namespace tesseract diff --git a/cube/feature_chebyshev.h b/cube/feature_chebyshev.h new file mode 100644 index 0000000000..13c2d96d93 --- /dev/null +++ b/cube/feature_chebyshev.h @@ -0,0 +1,57 @@ +/********************************************************************** + * File: feature_chebyshev.h + * Description: Declaration of the Chebyshev coefficients Feature Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The FeatureChebyshev class implements a Bitmap feature extractor class. It +// inherits from the FeatureBase class +// The feature vector is the composed of the chebyshev coefficients of 4 time +// sequences. The time sequences are the left, top, right & bottom +// bitmap profiles of the input samples + +#ifndef FEATURE_CHEBYSHEV_H +#define FEATURE_CHEBYSHEV_H + +#include "char_samp.h" +#include "feature_base.h" + +namespace tesseract { +class FeatureChebyshev : public FeatureBase { + public: + explicit FeatureChebyshev(TuningParams *params); + virtual ~FeatureChebyshev(); + // Render a visualization of the features to a CharSamp. + // This is mainly used by visual-debuggers + virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp); + // Compute the features for a given CharSamp + virtual bool ComputeFeatures(CharSamp *samp, float *features); + // Returns the count of features + virtual int FeatureCnt() { + return (4 * kChebychevCoefficientCnt); + } + + protected: + static const int kChebychevCoefficientCnt = 40; + // Compute Chebychev coefficients for the specified vector + void ChebyshevCoefficients(const vector &input, + int coeff_cnt, float *coeff); + // Compute the features for a given CharSamp + bool ComputeChebyshevCoefficients(CharSamp *samp, float *features); +}; +} + +#endif // FEATURE_CHEBYSHEV_H diff --git a/cube/feature_hybrid.cpp b/cube/feature_hybrid.cpp new file mode 100644 index 0000000000..35aeda0a1f --- /dev/null +++ b/cube/feature_hybrid.cpp @@ -0,0 +1,64 @@ +/********************************************************************** + * File: feature_chebyshev.cpp + * Description: Implementation of the Chebyshev coefficients Feature Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include "feature_base.h" +#include "feature_hybrid.h" +#include "cube_utils.h" +#include "const.h" +#include "char_samp.h" + +namespace tesseract { + +FeatureHybrid::FeatureHybrid(TuningParams *params) + :FeatureBase(params) { + feature_bmp_ = new FeatureBmp(params); + feature_chebyshev_ = new FeatureChebyshev(params); +} + +FeatureHybrid::~FeatureHybrid() { + delete feature_bmp_; + delete feature_chebyshev_; +} + +// Render a visualization of the features to a CharSamp. +// This is mainly used by visual-debuggers +CharSamp *FeatureHybrid::ComputeFeatureBitmap(CharSamp *char_samp) { + return char_samp; +} + + +// Compute the features of a given CharSamp +bool FeatureHybrid::ComputeFeatures(CharSamp *char_samp, float *features) { + if (feature_bmp_ == NULL || feature_chebyshev_ == NULL) { + return false; + } + if (!feature_bmp_->ComputeFeatures(char_samp, features)) { + return false; + } + return feature_chebyshev_->ComputeFeatures(char_samp, + features + feature_bmp_->FeatureCnt()); +} + +} // namespace tesseract diff --git a/cube/feature_hybrid.h b/cube/feature_hybrid.h new file mode 100644 index 0000000000..dc94a52619 --- /dev/null +++ b/cube/feature_hybrid.h @@ -0,0 +1,56 @@ +/********************************************************************** + * File: feature_chebyshev.h + * Description: Declaration of the Chebyshev coefficients Feature Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The FeatureHybrid class implements a Bitmap feature extractor class. It +// inherits from the FeatureBase class +// This class describes the a hybrid feature vector composed by combining +// the bitmap and the chebyshev feature vectors + +#ifndef FEATURE_HYBRID_H +#define FEATURE_HYBRID_H + +#include "char_samp.h" +#include "feature_bmp.h" +#include "feature_chebyshev.h" + +namespace tesseract { +class FeatureHybrid : public FeatureBase { + public: + explicit FeatureHybrid(TuningParams *params); + virtual ~FeatureHybrid(); + // Render a visualization of the features to a CharSamp. + // This is mainly used by visual-debuggers + virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp); + // Compute the features for a given CharSamp + virtual bool ComputeFeatures(CharSamp *samp, float *features); + // Returns the count of features + virtual int FeatureCnt() { + if (feature_bmp_ == NULL || feature_chebyshev_ == NULL) { + return 0; + } + return feature_bmp_->FeatureCnt() + feature_chebyshev_->FeatureCnt(); + } + + protected: + FeatureBmp *feature_bmp_; + FeatureChebyshev *feature_chebyshev_; +}; +} + +#endif // FEATURE_HYBRID_H diff --git a/cube/hybrid_neural_net_classifier.cpp b/cube/hybrid_neural_net_classifier.cpp new file mode 100644 index 0000000000..e72547fb79 --- /dev/null +++ b/cube/hybrid_neural_net_classifier.cpp @@ -0,0 +1,369 @@ +/********************************************************************** + * File: charclassifier.cpp + * Description: Implementation of Convolutional-NeuralNet Character Classifier + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include "classifier_base.h" +#include "char_set.h" +#include "const.h" +#include "conv_net_classifier.h" +#include "cube_utils.h" +#include "feature_base.h" +#include "feature_bmp.h" +#include "hybrid_neural_net_classifier.h" +#include "tess_lang_model.h" + +namespace tesseract { + +HybridNeuralNetCharClassifier::HybridNeuralNetCharClassifier( + CharSet *char_set, + TuningParams *params, + FeatureBase *feat_extract) + : CharClassifier(char_set, params, feat_extract) { + net_input_ = NULL; + net_output_ = NULL; +} + +HybridNeuralNetCharClassifier::~HybridNeuralNetCharClassifier() { + for (int net_idx = 0; net_idx < nets_.size(); net_idx++) { + if (nets_[net_idx] != NULL) { + delete nets_[net_idx]; + } + } + nets_.clear(); + + if (net_input_ != NULL) { + delete []net_input_; + net_input_ = NULL; + } + + if (net_output_ != NULL) { + delete []net_output_; + net_output_ = NULL; + } +} + +// The main training function. Given a sample and a class ID the classifier +// updates its parameters according to its learning algorithm. This function +// is currently not implemented. TODO(ahmadab): implement end-2-end training +bool HybridNeuralNetCharClassifier::Train(CharSamp *char_samp, int ClassID) { + return false; +} + +// A secondary function needed for training. Allows the trainer to set the +// value of any train-time paramter. This function is currently not +// implemented. TODO(ahmadab): implement end-2-end training +bool HybridNeuralNetCharClassifier::SetLearnParam(char *var_name, float val) { + // TODO(ahmadab): implementation of parameter initializing. + return false; +} + +// Folds the output of the NeuralNet using the loaded folding sets +void HybridNeuralNetCharClassifier::Fold() { + // in case insensitive mode + if (case_sensitive_ == false) { + int class_cnt = char_set_->ClassCount(); + // fold case + for (int class_id = 0; class_id < class_cnt; class_id++) { + // get class string + const char_32 *str32 = char_set_->ClassString(class_id); + // get the upper case form of the string + string_32 upper_form32 = str32; + for (int ch = 0; ch < upper_form32.length(); ch++) { + if (iswalpha(static_cast(upper_form32[ch])) != 0) { + upper_form32[ch] = towupper(upper_form32[ch]); + } + } + + // find out the upperform class-id if any + int upper_class_id = + char_set_->ClassID(reinterpret_cast( + upper_form32.c_str())); + if (upper_class_id != -1 && class_id != upper_class_id) { + float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]); + net_output_[class_id] = max_out; + net_output_[upper_class_id] = max_out; + } + } + } + + // The folding sets specify how groups of classes should be folded + // Folding involved assigning a min-activation to all the members + // of the folding set. The min-activation is a fraction of the max-activation + // of the members of the folding set + for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) { + float max_prob = net_output_[fold_sets_[fold_set][0]]; + + for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) { + if (net_output_[fold_sets_[fold_set][ch]] > max_prob) { + max_prob = net_output_[fold_sets_[fold_set][ch]]; + } + } + for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) { + net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio, + net_output_[fold_sets_[fold_set][ch]]); + } + } +} + +// compute the features of specified charsamp and +// feedforward the specified nets +bool HybridNeuralNetCharClassifier::RunNets(CharSamp *char_samp) { + int feat_cnt = feat_extract_->FeatureCnt(); + int class_cnt = char_set_->ClassCount(); + + // allocate i/p and o/p buffers if needed + if (net_input_ == NULL) { + net_input_ = new float[feat_cnt]; + if (net_input_ == NULL) { + return false; + } + + net_output_ = new float[class_cnt]; + if (net_output_ == NULL) { + return false; + } + } + + // compute input features + if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) { + return false; + } + + // go thru all the nets + memset(net_output_, 0, class_cnt * sizeof(*net_output_)); + float *inputs = net_input_; + for (int net_idx = 0; net_idx < nets_.size(); net_idx++) { + // run each net + vector net_out(class_cnt, 0.0); + if (!nets_[net_idx]->FeedForward(inputs, &net_out[0])) { + return false; + } + // add the output values + for (int class_idx = 0; class_idx < class_cnt; class_idx++) { + net_output_[class_idx] += (net_out[class_idx] * net_wgts_[net_idx]); + } + // increment inputs pointer + inputs += nets_[net_idx]->in_cnt(); + } + + Fold(); + + return true; +} + +// return the cost of being a char +int HybridNeuralNetCharClassifier::CharCost(CharSamp *char_samp) { + // it is by design that a character cost is equal to zero + // when no nets are present. This is the case during training. + if (RunNets(char_samp) == false) { + return 0; + } + + return CubeUtils::Prob2Cost(1.0f - net_output_[0]); +} + +// classifies a charsamp and returns an alternate list +// of chars sorted by char costs +CharAltList *HybridNeuralNetCharClassifier::Classify(CharSamp *char_samp) { + // run the needed nets + if (RunNets(char_samp) == false) { + return NULL; + } + + int class_cnt = char_set_->ClassCount(); + + // create an altlist + CharAltList *alt_list = new CharAltList(char_set_, class_cnt); + if (alt_list == NULL) { + return NULL; + } + + for (int out = 1; out < class_cnt; out++) { + int cost = CubeUtils::Prob2Cost(net_output_[out]); + alt_list->Insert(out, cost); + } + + return alt_list; +} + +// set an external net (for training purposes) +void HybridNeuralNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) { +} + +// Load folding sets +// This function returns true on success or if the file can't be read, +// returns false if an error is encountered. +bool HybridNeuralNetCharClassifier::LoadFoldingSets( + const string &data_file_path, const string &lang, LangModel *lang_mod) { + fold_set_cnt_ = 0; + string fold_file_name; + fold_file_name = data_file_path + lang; + fold_file_name += ".cube.fold"; + + // folding sets are optional + FILE *fp = fopen(fold_file_name.c_str(), "r"); + if (fp == NULL) { + return true; + } + fclose(fp); + + string fold_sets_str; + if (!CubeUtils::ReadFileToString(fold_file_name.c_str(), + &fold_sets_str)) { + return false; + } + + // split into lines + vector str_vec; + CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec); + fold_set_cnt_ = str_vec.size(); + fold_sets_ = new int *[fold_set_cnt_]; + if (fold_sets_ == NULL) { + return false; + } + fold_set_len_ = new int[fold_set_cnt_]; + if (fold_set_len_ == NULL) { + fold_set_cnt_ = 0; + return false; + } + + for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) { + reinterpret_cast(lang_mod)->RemoveInvalidCharacters( + &str_vec[fold_set]); + + // if all or all but one character are invalid, invalidate this set + if (str_vec[fold_set].length() <= 1) { + fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): " + "invalidating folding set %d\n", fold_set); + fold_set_len_[fold_set] = 0; + fold_sets_[fold_set] = NULL; + continue; + } + + string_32 str32; + CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32); + fold_set_len_[fold_set] = str32.length(); + fold_sets_[fold_set] = new int[fold_set_len_[fold_set]]; + if (fold_sets_[fold_set] == NULL) { + fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): " + "could not allocate folding set\n"); + fold_set_cnt_ = fold_set; + return false; + } + for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) { + fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]); + } + } + return true; +} + +// Init the classifier provided a data-path and a language string +bool HybridNeuralNetCharClassifier::Init(const string &data_file_path, + const string &lang, + LangModel *lang_mod) { + if (init_ == true) { + return true; + } + + // load the nets if any. This function will return true if the net file + // does not exist. But will fail if the net did not pass the sanity checks + if (!LoadNets(data_file_path, lang)) { + return false; + } + + // load the folding sets if any. This function will return true if the + // file does not exist. But will fail if the it did not pass the sanity checks + if (!LoadFoldingSets(data_file_path, lang, lang_mod)) { + return false; + } + + init_ = true; + return true; +} + +// Load the classifier's Neural Nets +// This function will return true if the net file does not exist. +// But will fail if the net did not pass the sanity checks +bool HybridNeuralNetCharClassifier::LoadNets(const string &data_file_path, + const string &lang) { + string hybrid_net_file; + string junk_net_file; + + // add the lang identifier + hybrid_net_file = data_file_path + lang; + hybrid_net_file += ".cube.hybrid"; + + // neural network is optional + FILE *fp = fopen(hybrid_net_file.c_str(), "r"); + if (fp == NULL) { + return true; + } + fclose(fp); + + string str; + if (!CubeUtils::ReadFileToString(hybrid_net_file.c_str(), &str)) { + return false; + } + + // split into lines + vector str_vec; + CubeUtils::SplitStringUsing(str, "\r\n", &str_vec); + if (str_vec.size() <= 0) { + return false; + } + + // create and add the nets + nets_.resize(str_vec.size(), NULL); + net_wgts_.resize(str_vec.size(), 0); + int total_input_size = 0; + for (int net_idx = 0; net_idx < str_vec.size(); net_idx++) { + // parse the string + vector tokens_vec; + CubeUtils::SplitStringUsing(str_vec[net_idx], " \t", &tokens_vec); + // has to be 2 tokens, net name and input size + if (tokens_vec.size() != 2) { + return false; + } + // load the net + string net_file_name = data_file_path + tokens_vec[0]; + nets_[net_idx] = tesseract::NeuralNet::FromFile(net_file_name.c_str()); + if (nets_[net_idx] == NULL) { + return false; + } + // parse the input size and validate it + net_wgts_[net_idx] = atof(tokens_vec[1].c_str()); + if (net_wgts_[net_idx] < 0.0) { + return false; + } + total_input_size += nets_[net_idx]->in_cnt(); + } + // validate total input count + if (total_input_size != feat_extract_->FeatureCnt()) { + return false; + } + // success + return true; +} +} // tesseract diff --git a/cube/hybrid_neural_net_classifier.h b/cube/hybrid_neural_net_classifier.h new file mode 100644 index 0000000000..0ab9ba1235 --- /dev/null +++ b/cube/hybrid_neural_net_classifier.h @@ -0,0 +1,90 @@ +/********************************************************************** + * File: conv_net_classifier.h + * Description: Declaration of Convolutional-NeuralNet Character Classifier + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef HYBRID_NEURAL_NET_CLASSIFIER_H +#define HYBRID_NEURAL_NET_CLASSIFIER_H + +#include +#include + +#include "char_samp.h" +#include "char_altlist.h" +#include "char_set.h" +#include "classifier_base.h" +#include "feature_base.h" +#include "lang_model.h" +#include "neural_net.h" +#include "tuning_params.h" + +namespace tesseract { + +// Folding Ratio is the ratio of the max-activation of members of a folding +// set that is used to compute the min-activation of the rest of the set +// static const float kFoldingRatio = 0.75; // see conv_net_classifier.h + +class HybridNeuralNetCharClassifier : public CharClassifier { + public: + HybridNeuralNetCharClassifier(CharSet *char_set, TuningParams *params, + FeatureBase *feat_extract); + virtual ~HybridNeuralNetCharClassifier(); + // The main training function. Given a sample and a class ID the classifier + // updates its parameters according to its learning algorithm. This function + // is currently not implemented. TODO(ahmadab): implement end-2-end training + virtual bool Train(CharSamp *char_samp, int ClassID); + // A secondary function needed for training. Allows the trainer to set the + // value of any train-time paramter. This function is currently not + // implemented. TODO(ahmadab): implement end-2-end training + virtual bool SetLearnParam(char *var_name, float val); + // Externally sets the Neural Net used by the classifier. Used for training + void SetNet(tesseract::NeuralNet *net); + + // Classifies an input charsamp and return a CharAltList object containing + // the possible candidates and corresponding scores + virtual CharAltList *Classify(CharSamp *char_samp); + // Computes the cost of a specific charsamp being a character (versus a + // non-character: part-of-a-character OR more-than-one-character) + virtual int CharCost(CharSamp *char_samp); + + private: + // Neural Net object used for classification + vector nets_; + vector net_wgts_; + + // data buffers used to hold Neural Net inputs and outputs + float *net_input_; + float *net_output_; + + // Init the classifier provided a data-path and a language string + virtual bool Init(const string &data_file_path, const string &lang, + LangModel *lang_mod); + // Loads the NeuralNets needed for the classifier + bool LoadNets(const string &data_file_path, const string &lang); + // Load folding sets + // This function returns true on success or if the file can't be read, + // returns false if an error is encountered. + virtual bool LoadFoldingSets(const string &data_file_path, + const string &lang, + LangModel *lang_mod); + // Folds the output of the NeuralNet using the loaded folding sets + virtual void Fold(); + // Scales the input char_samp and feeds it to the NeuralNet as input + bool RunNets(CharSamp *char_samp); +}; +} +#endif // HYBRID_NEURAL_NET_CLASSIFIER_H diff --git a/cube/lang_mod_edge.h b/cube/lang_mod_edge.h new file mode 100644 index 0000000000..19897942d6 --- /dev/null +++ b/cube/lang_mod_edge.h @@ -0,0 +1,73 @@ +/********************************************************************** + * File: lang_mod_edge.h + * Description: Declaration of the Language Model Edge Base Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The LangModEdge abstracts an Edge in the language model trie +// This is an abstract class that any Language Model Edge should inherit from +// It provides methods for: +// 1- Returns the class ID corresponding to the edge +// 2- If the edge is a valid EndOfWord (EOW) +// 3- If the edge is coming from a OutOfDictionary (OOF) state machine +// 4- If the edge is a Terminal (has no children) +// 5- A Hash of the edge that will be used to retrieve the edge +// quickly from the BeamSearch lattice +// 6- If two edges are identcial +// 7- Returns a verbal description of the edge (use by debuggers) +// 8- the language model cost of the edge (if any) +// 9- The string corresponding to this edge +// 10- Getting and setting the "Root" status of the edge + +#ifndef LANG_MOD_EDGE_H +#define LANG_MOD_EDGE_H + +#include "cube_tuning_params.h" +#include "char_set.h" + +namespace tesseract { + +class LangModEdge { + public: + LangModEdge() {} + virtual ~LangModEdge() {} + + // The string corresponding to this edge + virtual const char_32 * EdgeString() const = 0; + // Returns the class ID corresponding to the edge + virtual int ClassID() const = 0; + // If the edge is the root edge + virtual bool IsRoot() const = 0; + // Set the Root flag + virtual void SetRoot(bool flag) = 0; + // If the edge is a valid EndOfWord (EOW) + virtual bool IsEOW() const = 0; + // is the edge is coming from a OutOfDictionary (OOF) state machine + virtual bool IsOOD() const = 0; + // Is the edge is a Terminal (has no children) + virtual bool IsTerminal() const = 0; + // Returns A hash of the edge that will be used to retrieve the edge + virtual unsigned int Hash() const = 0; + // Are the two edges identcial? + virtual bool IsIdentical(LangModEdge *edge) const = 0; + // a verbal description of the edge (use by debuggers) + virtual char *Description() const = 0; + // the language model cost of the edge (if any) + virtual int PathCost() const = 0; +}; +} + +#endif // LANG_MOD_EDGE_H diff --git a/cube/lang_model.h b/cube/lang_model.h new file mode 100644 index 0000000000..a29bc1e37d --- /dev/null +++ b/cube/lang_model.h @@ -0,0 +1,78 @@ +/********************************************************************** + * File: lang_model.h + * Description: Declaration of the Language Model Edge Base Class + * Author: Ahmad Abdulkader + * Created: 2007 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The LanguageModel class abstracts a State machine that is modeled as a Trie +// structure. The state machine models the language being recognized by the OCR +// Engine +// This is an abstract class that is to be inherited by any language model + +#ifndef LANG_MODEL_H +#define LANG_MODEL_H + +#include "lang_mod_edge.h" +#include "char_altlist.h" +#include "char_set.h" +#include "tuning_params.h" + +namespace tesseract { +class LangModel { + public: + LangModel() { + ood_enabled_ = true; + numeric_enabled_ = true; + word_list_enabled_ = true; + punc_enabled_ = true; + } + virtual ~LangModel() {} + + // Returns an edge pointer to the Root + virtual LangModEdge *Root() = 0; + // Returns the edges that fan-out of the specified edge and their count + virtual LangModEdge **GetEdges(CharAltList *alt_list, + LangModEdge *parent_edge, + int *edge_cnt) = 0; + // Returns is a sequence of 32-bit characters are valid within this language + // model or net. And EndOfWord flag is specified. If true, the sequence has + // to end on a valid word. The function also optionally returns the list + // of language model edges traversed to parse the string + virtual bool IsValidSequence(const char_32 *str, bool eow_flag, + LangModEdge **edge_array = NULL) = 0; + virtual bool IsLeadingPunc(char_32 ch) = 0; + virtual bool IsTrailingPunc(char_32 ch) = 0; + virtual bool IsDigit(char_32 ch) = 0; + + // accessor functions + inline bool OOD() { return ood_enabled_; } + inline bool Numeric() { return numeric_enabled_; } + inline bool WordList() { return word_list_enabled_; } + inline bool Punc() { return punc_enabled_; } + inline void SetOOD(bool ood) { ood_enabled_ = ood; } + inline void SetNumeric(bool numeric) { numeric_enabled_ = numeric; } + inline void SetWordList(bool word_list) { word_list_enabled_ = word_list; } + inline void SetPunc(bool punc_enabled) { punc_enabled_ = punc_enabled; } + + protected: + bool ood_enabled_; + bool numeric_enabled_; + bool word_list_enabled_; + bool punc_enabled_; +}; +} + +#endif // LANG_MODEL_H diff --git a/cube/search_column.cpp b/cube/search_column.cpp new file mode 100644 index 0000000000..7d5b1b2062 --- /dev/null +++ b/cube/search_column.cpp @@ -0,0 +1,229 @@ +/********************************************************************** + * File: search_column.cpp + * Description: Implementation of the Beam Search Column Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "search_column.h" +#include + +namespace tesseract { + +SearchColumn::SearchColumn(int col_idx, int max_node) { + col_idx_ = col_idx; + node_cnt_ = 0; + node_array_ = NULL; + max_node_cnt_ = max_node; + node_hash_table_ = NULL; + init_ = false; + min_cost_ = INT_MAX; + max_cost_ = 0; +} + +// Cleanup data +void SearchColumn::Cleanup() { + if (node_array_ != NULL) { + for (int node_idx = 0; node_idx < node_cnt_; node_idx++) { + if (node_array_[node_idx] != NULL) { + delete node_array_[node_idx]; + } + } + + delete []node_array_; + node_array_ = NULL; + } + FreeHashTable(); + init_ = false; +} + +SearchColumn::~SearchColumn() { + Cleanup(); +} + +// Initializations +bool SearchColumn::Init() { + if (init_ == true) { + return true; + } + + // create hash table + if (node_hash_table_ == NULL) { + node_hash_table_ = new SearchNodeHashTable(); + if (node_hash_table_ == NULL) { + return false; + } + } + + init_ = true; + + return true; +} + +// Prune the nodes if necessary. Pruning is done such that a max +// number of nodes is kept, i.e., the beam width +void SearchColumn::Prune() { + // no need to prune + if (node_cnt_ <= max_node_cnt_) { + return; + } + + // compute the cost histogram + memset(score_bins_, 0, sizeof(score_bins_)); + int cost_range = max_cost_ - min_cost_ + 1; + for (int node_idx = 0; node_idx < node_cnt_; node_idx++) { + int cost_bin = static_cast( + ((node_array_[node_idx]->BestCost() - min_cost_) * + kScoreBins) / static_cast(cost_range)); + if (cost_bin >= kScoreBins) { + cost_bin = kScoreBins - 1; + } + score_bins_[cost_bin]++; + } + + // determine the pruning cost by scanning the cost histogram from + // least to greatest cost bins and finding the cost at which the + // max number of nodes is exceeded + int pruning_cost = 0; + int new_node_cnt = 0; + for (int cost_bin = 0; cost_bin < kScoreBins; cost_bin++) { + if (new_node_cnt > 0 && + (new_node_cnt + score_bins_[cost_bin]) > max_node_cnt_) { + pruning_cost = min_cost_ + ((cost_bin * cost_range) / kScoreBins); + break; + } + new_node_cnt += score_bins_[cost_bin]; + } + + // prune out all the nodes above this cost + for (int node_idx = new_node_cnt = 0; node_idx < node_cnt_; node_idx++) { + // prune this node out + if (node_array_[node_idx]->BestCost() > pruning_cost || + new_node_cnt > max_node_cnt_) { + delete node_array_[node_idx]; + } else { + // keep it + node_array_[new_node_cnt++] = node_array_[node_idx]; + } + } + node_cnt_ = new_node_cnt; +} + +// sort all nodes +void SearchColumn::Sort() { + if (node_cnt_ > 0 && node_array_ != NULL) { + qsort(node_array_, node_cnt_, sizeof(*node_array_), + SearchNode::SearchNodeComparer); + } +} + +// add a new node +SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost, + SearchNode *parent_node, + CubeRecoContext *cntxt) { + // init if necessary + if (init_ == false && Init() == false) { + return NULL; + } + + // find out if we have an node with the same edge + // look in the hash table + SearchNode *new_node = node_hash_table_->Lookup(edge, parent_node); + // node does not exist + if (new_node == NULL) { + new_node = new SearchNode(cntxt, parent_node, reco_cost, edge, col_idx_); + if (new_node == NULL) { + return NULL; + } + + // if the max node count has already been reached, check if the cost of + // the new node exceeds the max cost. This indicates that it will be pruned + // and so there is no point adding it + if (node_cnt_ >= max_node_cnt_ && new_node->BestCost() > max_cost_) { + delete new_node; + return NULL; + } + + // expand the node buffer if necc + if ((node_cnt_ % kNodeAllocChunk) == 0) { + // alloc a new buff + SearchNode **new_node_buff = + new SearchNode *[node_cnt_ + kNodeAllocChunk]; + if (new_node_buff == NULL) { + delete new_node; + return NULL; + } + + // free existing after copying contents + if (node_array_ != NULL) { + memcpy(new_node_buff, node_array_, node_cnt_ * sizeof(*new_node_buff)); + delete []node_array_; + } + + node_array_ = new_node_buff; + } + + // add the node to the hash table only if it is non-OOD edge + // because the langmod state is not unique + if (edge->IsOOD() == false) { + if (!node_hash_table_->Insert(edge, new_node)) { + printf("Hash table full!!!"); + delete new_node; + return NULL; + } + } + + node_array_[node_cnt_++] = new_node; + + } else { + // node exists before + // if no update occurred, return NULL + if (new_node->UpdateParent(parent_node, reco_cost, edge) == false) { + new_node = NULL; + } + + // free the edge + if (edge != NULL) { + delete edge; + } + } + + // update Min and Max Costs + if (new_node != NULL) { + if (min_cost_ > new_node->BestCost()) { + min_cost_ = new_node->BestCost(); + } + + if (max_cost_ < new_node->BestCost()) { + max_cost_ = new_node->BestCost(); + } + } + + return new_node; +} + +SearchNode *SearchColumn::BestNode() { + SearchNode *best_node = NULL; + + for (int node_idx = 0; node_idx < node_cnt_; node_idx++) { + if (best_node == NULL || + best_node->BestCost() > node_array_[node_idx]->BestCost()) { + best_node = node_array_[node_idx]; + } + } + + return best_node; +} +} // namespace tesseract diff --git a/cube/search_column.h b/cube/search_column.h new file mode 100644 index 0000000000..da077d6840 --- /dev/null +++ b/cube/search_column.h @@ -0,0 +1,84 @@ +/********************************************************************** + * File: search_column.h + * Description: Declaration of the Beam Search Column Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The SearchColumn class abstracts a column in the lattice that is created +// by the BeamSearch during the recognition process +// The class holds the lattice nodes. New nodes are added by calls to AddNode +// made from the BeamSearch +// The class maintains a hash table of the nodes to be able to lookup nodes +// quickly using their lang_mod_edge. This is needed to merge similar paths +// in the lattice + +#ifndef SEARCH_COLUMN_H +#define SEARCH_COLUMN_H + +#include "search_node.h" +#include "lang_mod_edge.h" +#include "cube_reco_context.h" + +namespace tesseract { + +class SearchColumn { + public: + SearchColumn(int col_idx, int max_node_cnt); + ~SearchColumn(); + // Accessor functions + inline int ColIdx() const { return col_idx_; } + inline int NodeCount() const { return node_cnt_; } + inline SearchNode **Nodes() const { return node_array_; } + + // Prune the nodes if necessary. Pruning is done such that a max + // number of nodes is kept, i.e., the beam width + void Prune(); + SearchNode *AddNode(LangModEdge *edge, int score, + SearchNode *parent, CubeRecoContext *cntxt); + // Returns the node with the least cost + SearchNode *BestNode(); + // Sort the lattice nodes. Needed for visualization + void Sort(); + // Free up the Hash Table. Added to be called by the Beam Search after + // a column is pruned to reduce memory foot print + void FreeHashTable() { + if (node_hash_table_ != NULL) { + delete node_hash_table_; + node_hash_table_ = NULL; + } + } + + private: + static const int kNodeAllocChunk = 1024; + static const int kScoreBins = 1024; + bool init_; + int min_cost_; + int max_cost_; + int max_node_cnt_; + int node_cnt_; + int col_idx_; + int score_bins_[kScoreBins]; + SearchNode **node_array_; + SearchNodeHashTable *node_hash_table_; + + // Free node array and hash table + void Cleanup(); + // Create hash table + bool Init(); +}; +} + +#endif // SEARCH_COLUMN_H diff --git a/cube/search_node.cpp b/cube/search_node.cpp new file mode 100644 index 0000000000..ff5bfbd844 --- /dev/null +++ b/cube/search_node.cpp @@ -0,0 +1,232 @@ +/********************************************************************** + * File: search_node.cpp + * Description: Implementation of the Beam Search Node Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "search_node.h" + +namespace tesseract { + +// The constructor updates the best paths and costs: +// mean_char_reco_cost_ (returned by BestRecoCost()) is the mean +// char_reco cost of the best_path, including this node. +// best_path_reco_cost is the total char_reco_cost of the best_path, +// but excludes the char_reco_cost of this node. +// best_cost is the mean mixed cost, i.e., mean_char_reco_cost_ + +// current language model cost, all weighted by the cube context's +// RecoWgt parameter +SearchNode::SearchNode(CubeRecoContext *cntxt, SearchNode *parent_node, + int char_reco_cost, LangModEdge *edge, int col_idx) { + // copy data members + cntxt_ = cntxt; + lang_mod_edge_ = edge; + col_idx_ = col_idx; + parent_node_ = parent_node; + char_reco_cost_ = char_reco_cost; + + // the string of this node is the same as that of the language model edge + str_ = (edge == NULL ? NULL : edge->EdgeString()); + + // compute best path total reco cost + best_path_reco_cost_ = (parent_node_ == NULL) ? 0 : + parent_node_->CharRecoCost() + parent_node_->BestPathRecoCost(); + + // update best path length + best_path_len_ = (parent_node_ == NULL) ? + 1 : parent_node_->BestPathLength() + 1; + if (edge != NULL && edge->IsRoot() && parent_node_ != NULL) { + best_path_len_++; + } + + // compute best reco cost mean cost + mean_char_reco_cost_ = static_cast( + (best_path_reco_cost_ + char_reco_cost_) / + static_cast(best_path_len_)); + + // get language model cost + int lm_cost = LangModCost(lang_mod_edge_, parent_node_); + + // compute aggregate best cost + best_cost_ = static_cast(cntxt_->Params()->RecoWgt() * + (best_path_reco_cost_ + char_reco_cost_) / + static_cast(best_path_len_) + ) + lm_cost; +} + +SearchNode::~SearchNode() { + if (lang_mod_edge_ != NULL) { + delete lang_mod_edge_; + } +} + +// update the parent_node node if provides a better (less) cost +bool SearchNode::UpdateParent(SearchNode *new_parent, int new_reco_cost, + LangModEdge *new_edge) { + if (lang_mod_edge_ == NULL) { + if (new_edge != NULL) { + return false; + } + } else { + // to update the parent_node, we have to have the same target + // state and char + if (new_edge == NULL || !lang_mod_edge_->IsIdentical(new_edge) || + !SearchNode::IdenticalPath(parent_node_, new_parent)) { + return false; + } + } + + // compute the path cost and combined cost of the new path + int new_best_path_reco_cost; + int new_cost; + int new_best_path_len; + + new_best_path_reco_cost = (new_parent == NULL) ? + 0 : new_parent->BestPathRecoCost() + new_parent->CharRecoCost(); + + new_best_path_len = + (new_parent == NULL) ? 1 : new_parent->BestPathLength() + 1; + + // compute the new language model cost + int new_lm_cost = LangModCost(new_edge, new_parent); + + new_cost = static_cast(cntxt_->Params()->RecoWgt() * + (new_best_path_reco_cost + new_reco_cost) / + static_cast(new_best_path_len) + ) + new_lm_cost; + + // update if it is better (less) than the current one + if (best_cost_ > new_cost) { + parent_node_ = new_parent; + char_reco_cost_ = new_reco_cost; + best_path_reco_cost_ = new_best_path_reco_cost; + best_path_len_ = new_best_path_len; + mean_char_reco_cost_ = static_cast( + (best_path_reco_cost_ + char_reco_cost_) / + static_cast(best_path_len_)); + best_cost_ = static_cast(cntxt_->Params()->RecoWgt() * + (best_path_reco_cost_ + char_reco_cost_) / + static_cast(best_path_len_) + ) + new_lm_cost; + return true; + } + return false; +} + +char_32 *SearchNode::PathString() { + SearchNode *node = this; + + // compute string length + int len = 0; + + while (node != NULL) { + if (node->str_ != NULL) { + len += CubeUtils::StrLen(node->str_); + } + + // if the edge is a root and does not have a NULL parent, account for space + LangModEdge *lm_edge = node->LangModelEdge(); + if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) { + len++; + } + + node = node->parent_node_; + } + + char_32 *char_ptr = new char_32[len + 1]; + if (char_ptr == NULL) { + return NULL; + } + + int ch_idx = len; + + node = this; + char_ptr[ch_idx--] = 0; + + while (node != NULL) { + int str_len = ((node->str_ == NULL) ? 0 : CubeUtils::StrLen(node->str_)); + while (str_len > 0) { + char_ptr[ch_idx--] = node->str_[--str_len]; + } + + // if the edge is a root and does not have a NULL parent, insert a space + LangModEdge *lm_edge = node->LangModelEdge(); + if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) { + char_ptr[ch_idx--] = (char_32)' '; + } + + node = node->parent_node_; + } + + return char_ptr; +} + +// compares the path of two nodes and checks if its identical +bool SearchNode::IdenticalPath(SearchNode *node1, SearchNode *node2) { + if (node1 != NULL && node2 != NULL && + node1->best_path_len_ != node2->best_path_len_) { + return false; + } + + // backtrack until either a root or a NULL edge is reached + while (node1 != NULL && node2 != NULL) { + if (node1->str_ != node2->str_) { + return false; + } + + // stop if either nodes is a root + if (node1->LangModelEdge()->IsRoot() || node2->LangModelEdge()->IsRoot()) { + break; + } + + node1 = node1->parent_node_; + node2 = node2->parent_node_; + } + + return ((node1 == NULL && node2 == NULL) || + (node1 != NULL && node1->LangModelEdge()->IsRoot() && + node2 != NULL && node2->LangModelEdge()->IsRoot())); +} + +// Computes the language model cost of a path +int SearchNode::LangModCost(LangModEdge *current_lm_edge, + SearchNode *parent_node) { + int lm_cost = 0; + int node_cnt = 0; + + do { + // check if root + bool is_root = ((current_lm_edge != NULL && current_lm_edge->IsRoot()) || + parent_node == NULL); + if (is_root) { + node_cnt++; + lm_cost += (current_lm_edge == NULL ? 0 : current_lm_edge->PathCost()); + } + + // continue until we hit a null parent + if (parent_node == NULL) { + break; + } + + // get the previous language model edge + current_lm_edge = parent_node->LangModelEdge(); + // back track + parent_node = parent_node->ParentNode(); + } while (true); + + return static_cast(lm_cost / static_cast(node_cnt)); +} +} // namespace tesseract diff --git a/cube/search_node.h b/cube/search_node.h new file mode 100644 index 0000000000..b4b69b8cc7 --- /dev/null +++ b/cube/search_node.h @@ -0,0 +1,168 @@ +/********************************************************************** + * File: search_node.h + * Description: Declaration of the Beam Search Node Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The SearchNode class abstracts the search lattice node in the lattice +// generated by the BeamSearch class +// The SearchNode class holds the lang_mod_edge associated with the lattice +// node. It also holds a pointer to the parent SearchNode in the search path +// In addition it holds the recognition and the language model costs of the +// node and the path leading to this node + +#ifndef SEARCH_NODE_H +#define SEARCH_NODE_H + +#include "lang_mod_edge.h" +#include "cube_reco_context.h" + +namespace tesseract { + +class SearchNode { + public: + SearchNode(CubeRecoContext *cntxt, SearchNode *parent_node, + int char_reco_cost, LangModEdge *edge, int col_idx); + + ~SearchNode(); + + // Updates the parent of the current node if the specified path yields + // a better path cost + bool UpdateParent(SearchNode *new_parent, int new_reco_cost, + LangModEdge *new_edge); + // returns the 32-bit string corresponding to the path leading to this node + char_32 *PathString(); + // True if the two input nodes correspond to the same path + static bool IdenticalPath(SearchNode *node1, SearchNode *node2); + + inline const char_32 *NodeString() { return str_; } + inline void SetString(char_32 *str) { str_ = str; } + + // This node's character recognition cost. + inline int CharRecoCost() { return char_reco_cost_; } + // Total character recognition cost of the nodes in the best path, + // excluding this node. + inline int BestPathRecoCost() { return best_path_reco_cost_; } + // Number of nodes in best path. + inline int BestPathLength() { return best_path_len_; } + // Mean mixed cost, i.e., mean character recognition cost + + // current language model cost, all weighted by the RecoWgt parameter + inline int BestCost() { return best_cost_; } + // Mean character recognition cost of the nodes on the best path, + // including this node. + inline int BestRecoCost() { return mean_char_reco_cost_ ; } + + inline int ColIdx() { return col_idx_; } + inline SearchNode *ParentNode() { return parent_node_; } + inline LangModEdge *LangModelEdge() { return lang_mod_edge_;} + inline int LangModCost() { return LangModCost(lang_mod_edge_, parent_node_); } + + // A comparer function that allows the SearchColumn class to sort the + // nodes based on the path cost + inline static int SearchNodeComparer(const void *node1, const void *node2) { + return (*(reinterpret_cast(node1)))->best_cost_ - + (*(reinterpret_cast(node2)))->best_cost_; + } + + private: + CubeRecoContext *cntxt_; + // Character code + const char_32 *str_; + // Recognition cost of most recent character + int char_reco_cost_; + // Mean mixed cost, i.e., mean character recognition cost + + // current language model cost, all weighted by the RecoWgt parameter + int best_cost_; + // Mean character recognition cost of the nodes on the best path, + // including this node. + int mean_char_reco_cost_ ; + // Total character recognition cost of the nodes in the best path, + // excluding this node. + int best_path_reco_cost_; + // Number of nodes in best path. + int best_path_len_; + // Column index + int col_idx_; + // Parent Node + SearchNode *parent_node_; + // Language model edge + LangModEdge *lang_mod_edge_; + static int LangModCost(LangModEdge *lang_mod_edge, SearchNode *parent_node); +}; + +// Implments a SearchNode hash table used to detect if a Search Node exists +// or not. This is needed to make sure that identical paths in the BeamSearch +// converge +class SearchNodeHashTable { + public: + SearchNodeHashTable() { + memset(bin_size_array_, 0, sizeof(bin_size_array_)); + } + + ~SearchNodeHashTable() { + } + + // inserts an entry in the hash table + inline bool Insert(LangModEdge *lang_mod_edge, SearchNode *srch_node) { + // compute hash based on the edge and its parent node edge + unsigned int edge_hash = lang_mod_edge->Hash(); + unsigned int parent_hash = (srch_node->ParentNode() == NULL ? + 0 : srch_node->ParentNode()->LangModelEdge()->Hash()); + unsigned int hash_bin = (edge_hash + parent_hash) % kSearchNodeHashBins; + + // already maxed out, just fail + if (bin_size_array_[hash_bin] >= kMaxSearchNodePerBin) { + return false; + } + + bin_array_[hash_bin][bin_size_array_[hash_bin]++] = srch_node; + + return true; + } + + // Looks up an entry in the hash table + inline SearchNode *Lookup(LangModEdge *lang_mod_edge, + SearchNode *parent_node) { + // compute hash based on the edge and its parent node edge + unsigned int edge_hash = lang_mod_edge->Hash(); + unsigned int parent_hash = (parent_node == NULL ? + 0 : parent_node->LangModelEdge()->Hash()); + unsigned int hash_bin = (edge_hash + parent_hash) % kSearchNodeHashBins; + + // lookup the entries in the hash bin + for (int node_idx = 0; node_idx < bin_size_array_[hash_bin]; node_idx++) { + if (lang_mod_edge->IsIdentical( + bin_array_[hash_bin][node_idx]->LangModelEdge()) == true && + SearchNode::IdenticalPath( + bin_array_[hash_bin][node_idx]->ParentNode(), parent_node) == true) { + return bin_array_[hash_bin][node_idx]; + } + } + + return NULL; + } + + private: + // Hash bin size parameters. These were determined emperically. These affect + // the speed of the beam search but have no impact on accuracy + static const int kSearchNodeHashBins = 4096; + static const int kMaxSearchNodePerBin = 512; + int bin_size_array_[kSearchNodeHashBins]; + SearchNode *bin_array_[kSearchNodeHashBins][kMaxSearchNodePerBin]; +}; +} + +#endif // SEARCH_NODE_H diff --git a/cube/search_object.h b/cube/search_object.h new file mode 100644 index 0000000000..84b866e6fb --- /dev/null +++ b/cube/search_object.h @@ -0,0 +1,55 @@ +/********************************************************************** + * File: search_object.h + * Description: Declaration of the Beam Search Object Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The SearchObject class represents a char_samp (a word bitmap) that is +// being searched for characters (or recognizeable entities). +// This is an abstract class that all SearchObjects should inherit from +// A SearchObject class provides methods to: +// 1- Returns the count of segments +// 2- Recognize a segment range +// 3- Creates a CharSamp for a segment range + +#ifndef SEARCH_OBJECT_H +#define SEARCH_OBJECT_H + +#include "char_altlist.h" +#include "char_samp.h" +#include "cube_reco_context.h" + +namespace tesseract { +class SearchObject { + public: + explicit SearchObject(CubeRecoContext *cntxt) { cntxt_ = cntxt; } + virtual ~SearchObject() {} + + virtual int SegPtCnt() = 0; + virtual CharAltList *RecognizeSegment(int start_pt, int end_pt) = 0; + virtual CharSamp *CharSample(int start_pt, int end_pt) = 0; + virtual Box* CharBox(int start_pt, int end_pt) = 0; + + virtual int SpaceCost(int seg_pt) = 0; + virtual int NoSpaceCost(int seg_pt) = 0; + virtual int NoSpaceCost(int start_pt, int end_pt) = 0; + + protected: + CubeRecoContext *cntxt_; +}; +} + +#endif // SEARCH_OBJECT_H diff --git a/ccmain/paircmp.h b/cube/string_32.h similarity index 54% rename from ccmain/paircmp.h rename to cube/string_32.h index f2b3d0e028..0ae0ceec0e 100644 --- a/ccmain/paircmp.h +++ b/cube/string_32.h @@ -1,10 +1,10 @@ /********************************************************************** - * File: paircmp.h (Formerly paircmp.h) - * Description: Code to compare two blobs using the adaptive matcher - * Author: Ray Smith - * Created: Wed Apr 21 09:31:02 BST 1993 + * File: string_32.h + * Description: Declaration of a 32 Bit string class + * Author: Ahmad Abdulkader + * Created: 2007 * - * (C) Copyright 1993, Hewlett-Packard Ltd. + * (C) Copyright 2008, Google Inc. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -17,11 +17,28 @@ * **********************************************************************/ -#ifndef PAIRCMP_H -#define PAIRCMP_H +// the string_32 class provides the functionality needed +// for a 32-bit string class -#include "ocrblock.h" -#include "varable.h" -#include "notdll.h" +#ifndef STRING_32_H +#define STRING_32_H +#include +#include +#include +#include + +#ifdef USE_STD_NAMESPACE +using std::basic_string; +using std::string; +using std::vector; #endif + +namespace tesseract { + +// basic definitions +typedef signed int char_32; +typedef basic_string string_32; +} + +#endif // STRING_32_H diff --git a/cube/tess_lang_mod_edge.cpp b/cube/tess_lang_mod_edge.cpp new file mode 100644 index 0000000000..16d64a3800 --- /dev/null +++ b/cube/tess_lang_mod_edge.cpp @@ -0,0 +1,120 @@ +/********************************************************************** + * File: tess_lang_mod_edge.cpp + * Description: Implementation of the Tesseract Language Model Edge Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "tess_lang_mod_edge.h" +#include "const.h" +#include "unichar.h" + + + +namespace tesseract { +// OOD constructor +TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) { + root_ = false; + cntxt_ = cntxt; + dawg_ = NULL; + start_edge_ = 0; + end_edge_ = 0; + edge_mask_ = 0; + class_id_ = class_id; + str_ = cntxt_->CharacterSet()->ClassString(class_id); + path_cost_ = Cost(); +} + +// leading, trailing punc constructor and single byte UTF char +TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, + const Dawg *dawg, EDGE_REF edge_idx, int class_id) { + root_ = false; + cntxt_ = cntxt; + dawg_ = dawg; + start_edge_ = edge_idx; + end_edge_ = edge_idx; + edge_mask_ = 0; + class_id_ = class_id; + str_ = cntxt_->CharacterSet()->ClassString(class_id); + path_cost_ = Cost(); +} + +// dict constructor: multi byte UTF char +TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg, + EDGE_REF start_edge_idx, EDGE_REF end_edge_idx, + int class_id) { + root_ = false; + cntxt_ = cntxt; + dawg_ = dawg; + start_edge_ = start_edge_idx; + end_edge_ = end_edge_idx; + edge_mask_ = 0; + class_id_ = class_id; + str_ = cntxt_->CharacterSet()->ClassString(class_id); + path_cost_ = Cost(); +} + +char *TessLangModEdge::Description() const { + char *char_ptr = new char[256]; + if (!char_ptr) { + return NULL; + } + + char dawg_str[256]; + char edge_str[32]; + if (dawg_ == (Dawg *)DAWG_OOD) { + strcpy(dawg_str, "OOD"); + } else if (dawg_ == (Dawg *)DAWG_NUMBER) { + strcpy(dawg_str, "NUM"); + } else if (dawg_->permuter() == SYSTEM_DAWG_PERM) { + strcpy(dawg_str, "Main"); + } else if (dawg_->permuter() == USER_DAWG_PERM) { + strcpy(dawg_str, "User"); + } else if (dawg_->permuter() == DOC_DAWG_PERM) { + strcpy(dawg_str, "Doc"); + } else { + strcpy(dawg_str, "N/A"); + } + + sprintf(edge_str, "%d", static_cast(start_edge_)); + if (IsLeadingPuncEdge(edge_mask_)) { + strcat(edge_str, "-LP"); + } + if (IsTrailingPuncEdge(edge_mask_)) { + strcat(edge_str, "-TP"); + } + sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d", + dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_); + + return char_ptr; +} + +int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt, + const Dawg *dawg, + NODE_REF parent_node, + LangModEdge **edge_array) { + int edge_cnt = 0; + NodeChildVector vec; + dawg->unichar_ids_of(parent_node, &vec); // find all children of the parent + for (int i = 0; i < vec.size(); ++i) { + const NodeChild &child = vec[i]; + if (child.unichar_id == INVALID_UNICHAR_ID) continue; + edge_array[edge_cnt] = + new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id); + if (edge_array[edge_cnt] != NULL) edge_cnt++; + } + return edge_cnt; +} +} diff --git a/cube/tess_lang_mod_edge.h b/cube/tess_lang_mod_edge.h new file mode 100644 index 0000000000..206a160ded --- /dev/null +++ b/cube/tess_lang_mod_edge.h @@ -0,0 +1,234 @@ +/********************************************************************** + * File: tess_lang_mod_edge.h + * Description: Declaration of the Tesseract Language Model Edge Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The TessLangModEdge models an edge in the Tesseract language models +// It inherits from the LangModEdge class + +#ifndef TESS_LANG_MOD_EDGE_H +#define TESS_LANG_MOD_EDGE_H + +#include "dawg.h" +#include "char_set.h" + +#include "lang_mod_edge.h" +#include "cube_reco_context.h" +#include "cube_utils.h" + +// Macros needed to identify punctuation in the langmodel state +#ifdef _HMSW32_H +#define LEAD_PUNC_EDGE_REF_MASK (inT64) 0x0000000100000000i64 +#define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000i64 +#define TRAIL_PUNC_REPEAT_MASK (inT64) 0xffff000000000000i64 +#else +#define LEAD_PUNC_EDGE_REF_MASK (inT64) 0x0000000100000000ll +#define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000ll +#define TRAIL_PUNC_REPEAT_MASK (inT64) 0xffff000000000000ll +#endif + +// Number state machine macros +#define NUMBER_STATE_SHIFT 0 +#define NUMBER_STATE_MASK 0x0000000fl +#define NUMBER_LITERAL_SHIFT 4 +#define NUMBER_LITERAL_MASK 0x000000f0l +#define NUMBER_REPEAT_SHIFT 8 +#define NUMBER_REPEAT_MASK 0x00000f00l +#define NUM_TRM -99 +#define TRAIL_PUNC_REPEAT_SHIFT 48 + +#define IsLeadingPuncEdge(edge_mask) \ + ((edge_mask & LEAD_PUNC_EDGE_REF_MASK) != 0) +#define IsTrailingPuncEdge(edge_mask) \ + ((edge_mask & TRAIL_PUNC_EDGE_REF_MASK) != 0) +#define TrailingPuncCount(edge_mask) \ + ((edge_mask & TRAIL_PUNC_REPEAT_MASK) >> TRAIL_PUNC_REPEAT_SHIFT) +#define TrailingPuncEdgeMask(Cnt) \ + (TRAIL_PUNC_EDGE_REF_MASK | ((Cnt) << TRAIL_PUNC_REPEAT_SHIFT)) + +// State machine IDs +#define DAWG_OOD 0 +#define DAWG_NUMBER 1 + +namespace tesseract { +class TessLangModEdge : public LangModEdge { + public: + // Different ways of constructing a TessLangModEdge + TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array, + EDGE_REF edge, int class_id); + TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array, + EDGE_REF start_edge_idx, EDGE_REF end_edge_idx, + int class_id); + TessLangModEdge(CubeRecoContext *cntxt, int class_id); + ~TessLangModEdge() {} + + // Accessors + inline bool IsRoot() const { + return root_; + } + inline void SetRoot(bool flag) { root_ = flag; } + + inline bool IsOOD() const { + return (dawg_ == (Dawg *)DAWG_OOD); + } + + inline bool IsNumber() const { + return (dawg_ == (Dawg *)DAWG_NUMBER); + } + + inline bool IsEOW() const { + return (IsTerminal() || (dawg_->end_of_word(end_edge_) != 0)); + } + + inline const Dawg *GetDawg() const { return dawg_; } + inline EDGE_REF StartEdge() const { return start_edge_; } + inline EDGE_REF EndEdge() const { return end_edge_; } + inline EDGE_REF EdgeMask() const { return edge_mask_; } + inline const char_32 * EdgeString() const { return str_; } + inline int ClassID () const { return class_id_; } + inline int PathCost() const { return path_cost_; } + inline void SetEdgeMask(EDGE_REF edge_mask) { edge_mask_ = edge_mask; } + inline void SetDawg(Dawg *dawg) { dawg_ = dawg; } + inline void SetStartEdge(EDGE_REF edge_idx) { start_edge_ = edge_idx; } + inline void SetEndEdge(EDGE_REF edge_idx) { end_edge_ = edge_idx; } + + // is this a terminal node: + // we can terminate at any OOD char, trailing punc or + // when the dawg terminates + inline bool IsTerminal() const { + return (IsOOD() || IsNumber() || IsTrailingPuncEdge(start_edge_) || + dawg_->next_node(end_edge_) == 0); + } + + // How many signals does the LM provide for tuning. These are flags like: + // OOD or not, Number of not that are used by the training to compute + // extra costs for each word. + inline int SignalCnt() const { + return 2; + } + + // returns the weight assigned to a specified signal + inline double SignalWgt(int signal) const { + CubeTuningParams *params = + reinterpret_cast(cntxt_->Params()); + if (params != NULL) { + switch (signal) { + case 0: + return params->OODWgt(); + break; + + case 1: + return params->NumWgt(); + break; + } + } + + return 0.0; + } + + // sets the weight assigned to a specified signal: Used in training + void SetSignalWgt(int signal, double wgt) { + CubeTuningParams *params = + reinterpret_cast(cntxt_->Params()); + if (params != NULL) { + switch (signal) { + case 0: + params->SetOODWgt(wgt); + break; + + case 1: + params->SetNumWgt(wgt); + break; + } + } + } + + // returns the actual value of a specified signal + int Signal(int signal) { + switch (signal) { + case 0: + return IsOOD() ? MIN_PROB_COST : 0; + break; + + case 1: + return IsNumber() ? MIN_PROB_COST : 0; + break; + + default: + return 0; + } + } + + // returns the Hash value of the edge. Used by the SearchNode hash table + // to quickly lookup exisiting edges to converge during search + inline unsigned int Hash() const { + return static_cast(((start_edge_ | end_edge_) ^ + ((reinterpret_cast(dawg_)))) ^ + ((unsigned int)edge_mask_) ^ + class_id_); + } + + // A verbal description of the edge: Used by visualizers + char *Description() const; + + // Is this edge identical to the specified edge + inline bool IsIdentical(LangModEdge *lang_mod_edge) const { + return (class_id_ == + reinterpret_cast(lang_mod_edge)->class_id_ && + str_ == reinterpret_cast(lang_mod_edge)->str_ && + dawg_ == reinterpret_cast(lang_mod_edge)->dawg_ && + start_edge_ == + reinterpret_cast(lang_mod_edge)->start_edge_ && + end_edge_ == + reinterpret_cast(lang_mod_edge)->end_edge_ && + edge_mask_ == + reinterpret_cast(lang_mod_edge)->edge_mask_); + } + + // Creates a set of fan-out edges for the specified edge + static int CreateChildren(CubeRecoContext *cntxt, + const Dawg *edges, + NODE_REF edge_reg, + LangModEdge **lm_edges); + + private: + bool root_; + CubeRecoContext *cntxt_; + const Dawg *dawg_; + EDGE_REF start_edge_; + EDGE_REF end_edge_; + EDGE_REF edge_mask_; + int path_cost_; + int class_id_; + const char_32 * str_; + // returns the cost of the lang_mod_edge + inline int Cost() const { + if (cntxt_ != NULL) { + CubeTuningParams *params = + reinterpret_cast(cntxt_->Params()); + if (dawg_ == (Dawg *)DAWG_OOD) { + return static_cast(params->OODWgt() * MIN_PROB_COST); + } else if (dawg_ == (Dawg *)DAWG_NUMBER) { + return static_cast(params->NumWgt() * MIN_PROB_COST); + } + } + return 0; + } +}; +} // namespace tesseract + +#endif // TESS_LANG_MOD_EDGE_H diff --git a/cube/tess_lang_model.cpp b/cube/tess_lang_model.cpp new file mode 100644 index 0000000000..1cf16fd11d --- /dev/null +++ b/cube/tess_lang_model.cpp @@ -0,0 +1,523 @@ +/********************************************************************** + * File: tess_lang_model.cpp + * Description: Implementation of the Tesseract Language Model Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The TessLangModel class abstracts the Tesseract language model. It inherits +// from the LangModel class. The Tesseract language model encompasses several +// Dawgs (words from training data, punctuation, numbers, document words). +// On top of this Cube adds an OOD state machine +// The class provides methods to traverse the language model in a generative +// fashion. Given any node in the DAWG, the language model can generate a list +// of children (or fan-out) edges + +#include +#include + +#include "char_samp.h" +#include "cube_utils.h" +#include "dict.h" +#include "tesseractclass.h" +#include "tess_lang_model.h" +#include "tessdatamanager.h" +#include "unicharset.h" + +namespace tesseract { +// max fan-out (used for preallocation). Initialized here, but modified by +// constructor +int TessLangModel::max_edge_ = 4096; + +// Language model extra State machines +const Dawg *TessLangModel::ood_dawg_ = reinterpret_cast(DAWG_OOD); +const Dawg *TessLangModel::number_dawg_ = reinterpret_cast(DAWG_NUMBER); + +// number state machine +const int TessLangModel::num_state_machine_[kStateCnt][kNumLiteralCnt] = { + {0, 1, 1, NUM_TRM, NUM_TRM}, + {NUM_TRM, 1, 1, 3, 2}, + {NUM_TRM, NUM_TRM, 1, NUM_TRM, 2}, + {NUM_TRM, NUM_TRM, 3, NUM_TRM, 2}, +}; +const int TessLangModel::num_max_repeat_[kStateCnt] = {3, 32, 8, 3}; + +// thresholds and penalties +int TessLangModel::max_ood_shape_cost_ = CubeUtils::Prob2Cost(1e-4); + +TessLangModel::TessLangModel(const string &lm_params, + const string &data_file_path, + bool load_system_dawg, + TessdataManager *tessdata_manager, + CubeRecoContext *cntxt) { + cntxt_ = cntxt; + has_case_ = cntxt_->HasCase(); + // Load the rest of the language model elements from file + LoadLangModelElements(lm_params); + // Load word_dawgs_ if needed. + if (tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) { + word_dawgs_ = new DawgVector(); + if (load_system_dawg && + tessdata_manager->SeekToStart(TESSDATA_CUBE_SYSTEM_DAWG)) { + // The last parameter to the Dawg constructor (the debug level) is set to + // false, until Cube has a way to express its preferred debug level. + *word_dawgs_ += new SquishedDawg(tessdata_manager->GetDataFilePtr(), + DAWG_TYPE_WORD, + cntxt_->Lang().c_str(), + SYSTEM_DAWG_PERM, false); + } + } else { + word_dawgs_ = NULL; + } +} + +// Cleanup an edge array +void TessLangModel::FreeEdges(int edge_cnt, LangModEdge **edge_array) { + if (edge_array != NULL) { + for (int edge_idx = 0; edge_idx < edge_cnt; edge_idx++) { + if (edge_array[edge_idx] != NULL) { + delete edge_array[edge_idx]; + } + } + delete []edge_array; + } +} + +// Determines if a sequence of 32-bit chars is valid in this language model +// starting from the specified edge. If the eow_flag is ON, also checks for +// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last +// edge +bool TessLangModel::IsValidSequence(LangModEdge *edge, + const char_32 *sequence, + bool eow_flag, + LangModEdge **final_edge) { + // get the edges emerging from this edge + int edge_cnt = 0; + LangModEdge **edge_array = GetEdges(NULL, edge, &edge_cnt); + + // find the 1st char in the sequence in the children + for (int edge_idx = 0; edge_idx < edge_cnt; edge_idx++) { + // found a match + if (sequence[0] == edge_array[edge_idx]->EdgeString()[0]) { + // if this is the last char + if (sequence[1] == 0) { + // succeed if we are in prefix mode or this is a terminal edge + if (eow_flag == false || edge_array[edge_idx]->IsEOW()) { + if (final_edge != NULL) { + (*final_edge) = edge_array[edge_idx]; + edge_array[edge_idx] = NULL; + } + + FreeEdges(edge_cnt, edge_array); + return true; + } + } else { + // not the last char continue checking + if (IsValidSequence(edge_array[edge_idx], sequence + 1, eow_flag, + final_edge) == true) { + FreeEdges(edge_cnt, edge_array); + return true; + } + } + } + } + + FreeEdges(edge_cnt, edge_array); + return false; +} + +// Determines if a sequence of 32-bit chars is valid in this language model +// starting from the root. If the eow_flag is ON, also checks for +// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last +// edge +bool TessLangModel::IsValidSequence(const char_32 *sequence, bool eow_flag, + LangModEdge **final_edge) { + if (final_edge != NULL) { + (*final_edge) = NULL; + } + + return IsValidSequence(NULL, sequence, eow_flag, final_edge); +} + +bool TessLangModel::IsLeadingPunc(const char_32 ch) { + return lead_punc_.find(ch) != string::npos; +} + +bool TessLangModel::IsTrailingPunc(const char_32 ch) { + return trail_punc_.find(ch) != string::npos; +} + +bool TessLangModel::IsDigit(const char_32 ch) { + return digits_.find(ch) != string::npos; +} + +// The general fan-out generation function. Returns the list of edges +// fanning-out of the specified edge and their count. If an AltList is +// specified, only the class-ids with a minimum cost are considered +LangModEdge ** TessLangModel::GetEdges(CharAltList *alt_list, + LangModEdge *lang_mod_edge, + int *edge_cnt) { + TessLangModEdge *tess_lm_edge = + reinterpret_cast(lang_mod_edge); + LangModEdge **edge_array = NULL; + (*edge_cnt) = 0; + + // if we are starting from the root, we'll instantiate every DAWG + // and get the all the edges that emerge from the root + if (tess_lm_edge == NULL) { + // get DAWG count from Tesseract + int dawg_cnt = NumDawgs(); + // preallocate the edge buffer + (*edge_cnt) = dawg_cnt * max_edge_; + edge_array = new LangModEdge *[(*edge_cnt)]; + if (edge_array == NULL) { + return NULL; + } + + for (int dawg_idx = (*edge_cnt) = 0; dawg_idx < dawg_cnt; dawg_idx++) { + const Dawg *curr_dawg = GetDawg(dawg_idx); + // Only look through word Dawgs (since there is a special way of + // handling numbers and punctuation). + if (curr_dawg->type() == DAWG_TYPE_WORD) { + (*edge_cnt) += FanOut(alt_list, curr_dawg, 0, 0, NULL, true, + edge_array + (*edge_cnt)); + } + } // dawg + + (*edge_cnt) += FanOut(alt_list, number_dawg_, 0, 0, NULL, true, + edge_array + (*edge_cnt)); + + // OOD: it is intentionally not added to the list to make sure it comes + // at the end + (*edge_cnt) += FanOut(alt_list, ood_dawg_, 0, 0, NULL, true, + edge_array + (*edge_cnt)); + + // set the root flag for all root edges + for (int edge_idx = 0; edge_idx < (*edge_cnt); edge_idx++) { + edge_array[edge_idx]->SetRoot(true); + } + } else { // not starting at the root + // preallocate the edge buffer + (*edge_cnt) = max_edge_; + // allocate memory for edges + edge_array = new LangModEdge *[(*edge_cnt)]; + if (edge_array == NULL) { + return NULL; + } + + // get the FanOut edges from the root of each dawg + (*edge_cnt) = FanOut(alt_list, + tess_lm_edge->GetDawg(), + tess_lm_edge->EndEdge(), tess_lm_edge->EdgeMask(), + tess_lm_edge->EdgeString(), false, edge_array); + } + return edge_array; +} + +// generate edges from an NULL terminated string +// (used for punctuation, operators and digits) +int TessLangModel::Edges(const char *strng, const Dawg *dawg, + EDGE_REF edge_ref, EDGE_REF edge_mask, + LangModEdge **edge_array) { + int edge_idx, + edge_cnt = 0; + + for (edge_idx = 0; strng[edge_idx] != 0; edge_idx++) { + int class_id = cntxt_->CharacterSet()->ClassID((char_32)strng[edge_idx]); + if (class_id != INVALID_UNICHAR_ID) { + // create an edge object + edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg, edge_ref, + class_id); + if (edge_array[edge_cnt] == NULL) { + return 0; + } + + reinterpret_cast(edge_array[edge_cnt])-> + SetEdgeMask(edge_mask); + edge_cnt++; + } + } + + return edge_cnt; +} + +// generate OOD edges +int TessLangModel::OODEdges(CharAltList *alt_list, EDGE_REF edge_ref, + EDGE_REF edge_ref_mask, LangModEdge **edge_array) { + int class_cnt = cntxt_->CharacterSet()->ClassCount(); + int edge_cnt = 0; + for (int class_id = 0; class_id < class_cnt; class_id++) { + // produce an OOD edge only if the cost of the char is low enough + if ((alt_list == NULL || + alt_list->ClassCost(class_id) <= max_ood_shape_cost_)) { + // create an edge object + edge_array[edge_cnt] = new TessLangModEdge(cntxt_, class_id); + if (edge_array[edge_cnt] == NULL) { + return 0; + } + + edge_cnt++; + } + } + + return edge_cnt; +} + +// computes and returns the edges that fan out of an edge ref +int TessLangModel::FanOut(CharAltList *alt_list, const Dawg *dawg, + EDGE_REF edge_ref, EDGE_REF edge_mask, + const char_32 *str, bool root_flag, + LangModEdge **edge_array) { + int edge_cnt = 0; + NODE_REF next_node = NO_EDGE; + + // OOD + if (dawg == reinterpret_cast(DAWG_OOD)) { + if (ood_enabled_ == true) { + return OODEdges(alt_list, edge_ref, edge_mask, edge_array); + } else { + return 0; + } + } else if (dawg == reinterpret_cast(DAWG_NUMBER)) { + // Number + if (numeric_enabled_ == true) { + return NumberEdges(edge_ref, edge_array); + } else { + return 0; + } + } else if (IsTrailingPuncEdge(edge_mask)) { + // a TRAILING PUNC MASK, generate more trailing punctuation and return + if (punc_enabled_ == true) { + EDGE_REF trail_cnt = TrailingPuncCount(edge_mask); + return Edges(trail_punc_.c_str(), dawg, edge_ref, + TrailingPuncEdgeMask(trail_cnt + 1), edge_array); + } else { + return 0; + } + } else if (root_flag == true || edge_ref == 0) { + // Root, generate leading punctuation and continue + if (root_flag) { + if (punc_enabled_ == true) { + edge_cnt += Edges(lead_punc_.c_str(), dawg, 0, LEAD_PUNC_EDGE_REF_MASK, + edge_array); + } + } + next_node = 0; + } else { + // a node in the main trie + bool eow_flag = (dawg->end_of_word(edge_ref) != 0); + + // for EOW + if (eow_flag == true) { + // generate trailing punctuation + if (punc_enabled_ == true) { + edge_cnt += Edges(trail_punc_.c_str(), dawg, edge_ref, + TrailingPuncEdgeMask((EDGE_REF)1), edge_array); + // generate a hyphen and go back to the root + edge_cnt += Edges("-/", dawg, 0, 0, edge_array + edge_cnt); + } + } + + // advance node + next_node = dawg->next_node(edge_ref); + if (next_node == 0 || next_node == NO_EDGE) { + return edge_cnt; + } + } + + // now get all the emerging edges if word list is enabled + if (word_list_enabled_ == true && next_node != NO_EDGE) { + // create child edges + int child_edge_cnt = + TessLangModEdge::CreateChildren(cntxt_, dawg, next_node, + edge_array + edge_cnt); + int strt_cnt = edge_cnt; + + // set the edge mask + for (int child = 0; child < child_edge_cnt; child++) { + reinterpret_cast(edge_array[edge_cnt++])-> + SetEdgeMask(edge_mask); + } + + // if we are at the root, create upper case forms of these edges if possible + if (root_flag == true) { + for (int child = 0; child < child_edge_cnt; child++) { + TessLangModEdge *child_edge = + reinterpret_cast(edge_array[strt_cnt + child]); + + if (has_case_ == true) { + const char_32 *edge_str = child_edge->EdgeString(); + if (edge_str != NULL && islower(edge_str[0]) != 0 && + edge_str[1] == 0) { + int class_id = + cntxt_->CharacterSet()->ClassID(toupper(edge_str[0])); + if (class_id != INVALID_UNICHAR_ID) { + // generate an upper case edge for lower case chars + edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg, + child_edge->StartEdge(), child_edge->EndEdge(), class_id); + + if (edge_array[edge_cnt] != NULL) { + reinterpret_cast(edge_array[edge_cnt])-> + SetEdgeMask(edge_mask); + edge_cnt++; + } + } + } + } + } + } + } + return edge_cnt; +} + +// Generate the edges fanning-out from an edge in the number state machine +int TessLangModel::NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array) { + EDGE_REF new_state, + state; + + int repeat_cnt, + new_repeat_cnt; + + state = ((edge_ref & NUMBER_STATE_MASK) >> NUMBER_STATE_SHIFT); + repeat_cnt = ((edge_ref & NUMBER_REPEAT_MASK) >> NUMBER_REPEAT_SHIFT); + + if (state < 0 || state >= kStateCnt) { + return 0; + } + + // go thru all valid transitions from the state + int edge_cnt = 0; + + EDGE_REF new_edge_ref; + + for (int lit = 0; lit < kNumLiteralCnt; lit++) { + // move to the new state + new_state = num_state_machine_[state][lit]; + if (new_state == NUM_TRM) { + continue; + } + + if (new_state == state) { + new_repeat_cnt = repeat_cnt + 1; + } else { + new_repeat_cnt = 1; + } + + // not allowed to repeat beyond this + if (new_repeat_cnt > num_max_repeat_[state]) { + continue; + } + + new_edge_ref = (new_state << NUMBER_STATE_SHIFT) | + (lit << NUMBER_LITERAL_SHIFT) | + (new_repeat_cnt << NUMBER_REPEAT_SHIFT); + + edge_cnt += Edges(literal_str_[lit]->c_str(), number_dawg_, + new_edge_ref, 0, edge_array + edge_cnt); + } + + return edge_cnt; +} + +// Loads Language model elements from contents of the .cube.lm file +bool TessLangModel::LoadLangModelElements(const string &lm_params) { + bool success = true; + // split into lines, each corresponding to a token type below + vector str_vec; + CubeUtils::SplitStringUsing(lm_params, "\r\n", &str_vec); + for (int entry = 0; entry < str_vec.size(); entry++) { + vector tokens; + // should be only two tokens: type and value + CubeUtils::SplitStringUsing(str_vec[entry], "=", &tokens); + if (tokens.size() != 2) + success = false; + if (tokens[0] == "LeadPunc") { + lead_punc_ = tokens[1]; + } else if (tokens[0] == "TrailPunc") { + trail_punc_ = tokens[1]; + } else if (tokens[0] == "NumLeadPunc") { + num_lead_punc_ = tokens[1]; + } else if (tokens[0] == "NumTrailPunc") { + num_trail_punc_ = tokens[1]; + } else if (tokens[0] == "Operators") { + operators_ = tokens[1]; + } else if (tokens[0] == "Digits") { + digits_ = tokens[1]; + } else if (tokens[0] == "Alphas") { + alphas_ = tokens[1]; + } else { + success = false; + } + } + + RemoveInvalidCharacters(&num_lead_punc_); + RemoveInvalidCharacters(&num_trail_punc_); + RemoveInvalidCharacters(&digits_); + RemoveInvalidCharacters(&operators_); + RemoveInvalidCharacters(&alphas_); + + // form the array of literal strings needed for number state machine + // It is essential that the literal strings go in the order below + literal_str_[0] = &num_lead_punc_; + literal_str_[1] = &num_trail_punc_; + literal_str_[2] = &digits_; + literal_str_[3] = &operators_; + literal_str_[4] = &alphas_; + + return success; +} + +void TessLangModel::RemoveInvalidCharacters(string *lm_str) { + CharSet *char_set = cntxt_->CharacterSet(); + tesseract::string_32 lm_str32; + CubeUtils::UTF8ToUTF32(lm_str->c_str(), &lm_str32); + + int len = CubeUtils::StrLen(lm_str32.c_str()); + char_32 *clean_str32 = new char_32[len + 1]; + if (!clean_str32) + return; + int clean_len = 0; + for (int i = 0; i < len; ++i) { + int class_id = char_set->ClassID((char_32)lm_str32[i]); + if (class_id != INVALID_UNICHAR_ID) { + clean_str32[clean_len] = lm_str32[i]; + ++clean_len; + } + } + clean_str32[clean_len] = 0; + if (clean_len < len) { + lm_str->clear(); + CubeUtils::UTF32ToUTF8(clean_str32, lm_str); + } + delete [] clean_str32; +} + +int TessLangModel::NumDawgs() const { + return (word_dawgs_ != NULL) ? + word_dawgs_->size() : cntxt_->TesseractObject()->getDict().NumDawgs(); +} + +// Returns the dawgs with the given index from either the dawgs +// stored by the Tesseract object, or the word_dawgs_. +const Dawg *TessLangModel::GetDawg(int index) const { + if (word_dawgs_ != NULL) { + ASSERT_HOST(index < word_dawgs_->size()); + return (*word_dawgs_)[index]; + } else { + ASSERT_HOST(index < cntxt_->TesseractObject()->getDict().NumDawgs()); + return cntxt_->TesseractObject()->getDict().GetDawg(index); + } +} +} diff --git a/cube/tess_lang_model.h b/cube/tess_lang_model.h new file mode 100644 index 0000000000..f44dc39dfd --- /dev/null +++ b/cube/tess_lang_model.h @@ -0,0 +1,144 @@ +/********************************************************************** + * File: tess_lang_model.h + * Description: Declaration of the Tesseract Language Model Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef TESS_LANG_MODEL_H +#define TESS_LANG_MODEL_H + +#undef EXIT + +#include + +#include "char_altlist.h" +#include "cube_reco_context.h" +#include "cube_tuning_params.h" +#include "dict.h" +#include "lang_model.h" +#include "tessdatamanager.h" +#include "tess_lang_mod_edge.h" + +namespace tesseract { + +const int kStateCnt = 4; +const int kNumLiteralCnt = 5; + +class TessLangModel : public LangModel { + public: + TessLangModel(const string &lm_params, + const string &data_file_path, + bool load_system_dawg, + TessdataManager *tessdata_manager, + CubeRecoContext *cntxt); + ~TessLangModel() { + if (word_dawgs_ != NULL) { + word_dawgs_->delete_data_pointers(); + delete word_dawgs_; + } + } + + // returns a pointer to the root of the language model + inline TessLangModEdge *Root() { + return NULL; + } + + // The general fan-out generation function. Returns the list of edges + // fanning-out of the specified edge and their count. If an AltList is + // specified, only the class-ids with a minimum cost are considered + LangModEdge **GetEdges(CharAltList *alt_list, + LangModEdge *edge, + int *edge_cnt); + // Determines if a sequence of 32-bit chars is valid in this language model + // starting from the root. If the eow_flag is ON, also checks for + // a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last + // edge + bool IsValidSequence(const char_32 *sequence, bool eow_flag, + LangModEdge **final_edge = NULL); + bool IsLeadingPunc(char_32 ch); + bool IsTrailingPunc(char_32 ch); + bool IsDigit(char_32 ch); + + void RemoveInvalidCharacters(string *lm_str); + private: + // static LM state machines + static const Dawg *ood_dawg_; + static const Dawg *number_dawg_; + static const int num_state_machine_[kStateCnt][kNumLiteralCnt]; + static const int num_max_repeat_[kStateCnt]; + // word_dawgs_ should only be loaded if cube has its own version of the + // unicharset (different from the one used by tesseract) and therefore + // can not use the dawgs loaded for tesseract (since the unichar ids + // encoded in the dawgs differ). + DawgVector *word_dawgs_; + + static int max_edge_; + static int max_ood_shape_cost_; + + // remaining language model elements needed by cube. These get loaded from + // the .lm file + string lead_punc_; + string trail_punc_; + string num_lead_punc_; + string num_trail_punc_; + string operators_; + string digits_; + string alphas_; + // String of characters in RHS of each line of .cube.lm + // Each element is hard-coded to correspond to a specific token type + // (see LoadLangModelElements) + string *literal_str_[kNumLiteralCnt]; + // Recognition context needed to access language properties + // (case, cursive,..) + CubeRecoContext *cntxt_; + bool has_case_; + + // computes and returns the edges that fan out of an edge ref + int FanOut(CharAltList *alt_list, + const Dawg *dawg, EDGE_REF edge_ref, EDGE_REF edge_ref_mask, + const char_32 *str, bool root_flag, LangModEdge **edge_array); + // generate edges from an NULL terminated string + // (used for punctuation, operators and digits) + int Edges(const char *strng, const Dawg *dawg, + EDGE_REF edge_ref, EDGE_REF edge_ref_mask, + LangModEdge **edge_array); + // Generate the edges fanning-out from an edge in the number state machine + int NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array); + // Generate OOD edges + int OODEdges(CharAltList *alt_list, EDGE_REF edge_ref, + EDGE_REF edge_ref_mask, LangModEdge **edge_array); + // Cleanup an edge array + void FreeEdges(int edge_cnt, LangModEdge **edge_array); + // Determines if a sequence of 32-bit chars is valid in this language model + // starting from the specified edge. If the eow_flag is ON, also checks for + // a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last + // edge + bool IsValidSequence(LangModEdge *edge, const char_32 *sequence, + bool eow_flag, LangModEdge **final_edge); + // Parse language model elements from the given string, which should + // have been loaded from .cube.lm file, e.g. in CubeRecoContext + bool LoadLangModelElements(const string &lm_params); + + // Returns the number of word Dawgs in the language model. + int NumDawgs() const; + + // Returns the dawgs with the given index from either the dawgs + // stored by the Tesseract object, or the word_dawgs_. + const Dawg *GetDawg(int index) const; +}; +} // tesseract + +#endif // TESS_LANG_MODEL_H diff --git a/cube/tuning_params.h b/cube/tuning_params.h new file mode 100644 index 0000000000..4d4943a229 --- /dev/null +++ b/cube/tuning_params.h @@ -0,0 +1,129 @@ +/********************************************************************** + * File: tuning_params.h + * Description: Declaration of the Tuning Parameters Base Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The TuningParams class abstracts all the parameters that can be learned or +// tuned during the training process. It is a base class that all TuningParams +// classes should inherit from. + +#ifndef TUNING_PARAMS_H +#define TUNING_PARAMS_H + +#include +#ifdef USE_STD_NAMESPACE +using std::string; +#endif + +namespace tesseract { +class TuningParams { + public: + enum type_classifer { + NN, + HYBRID_NN + }; + enum type_feature { + BMP, + CHEBYSHEV, + HYBRID + }; + + TuningParams() {} + virtual ~TuningParams() {} + // Accessor functions + inline double RecoWgt() const { return reco_wgt_; } + inline double SizeWgt() const { return size_wgt_; } + inline double CharBigramWgt() const { return char_bigrams_wgt_; } + inline double WordUnigramWgt() const { return word_unigrams_wgt_; } + inline int MaxSegPerChar() const { return max_seg_per_char_; } + inline int BeamWidth() const { return beam_width_; } + inline int TypeClassifier() const { return tp_classifier_; } + inline int TypeFeature() const { return tp_feat_; } + inline int ConvGridSize() const { return conv_grid_size_; } + inline int HistWindWid() const { return hist_wind_wid_; } + inline int MinConCompSize() const { return min_con_comp_size_; } + inline double MaxWordAspectRatio() const { return max_word_aspect_ratio_; } + inline double MinSpaceHeightRatio() const { return min_space_height_ratio_; } + inline double MaxSpaceHeightRatio() const { return max_space_height_ratio_; } + inline double CombinerRunThresh() const { return combiner_run_thresh_; } + inline double CombinerClassifierThresh() const { + return combiner_classifier_thresh_; } + + inline void SetRecoWgt(double wgt) { reco_wgt_ = wgt; } + inline void SetSizeWgt(double wgt) { size_wgt_ = wgt; } + inline void SetCharBigramWgt(double wgt) { char_bigrams_wgt_ = wgt; } + inline void SetWordUnigramWgt(double wgt) { word_unigrams_wgt_ = wgt; } + inline void SetMaxSegPerChar(int max_seg_per_char) { + max_seg_per_char_ = max_seg_per_char; + } + inline void SetBeamWidth(int beam_width) { beam_width_ = beam_width; } + inline void SetTypeClassifier(type_classifer tp_classifier) { + tp_classifier_ = tp_classifier; + } + inline void SetTypeFeature(type_feature tp_feat) {tp_feat_ = tp_feat;} + inline void SetHistWindWid(int hist_wind_wid) { + hist_wind_wid_ = hist_wind_wid; + } + + virtual bool Save(string file_name) = 0; + virtual bool Load(string file_name) = 0; + + protected: + // weight of recognition cost. This includes the language model cost + double reco_wgt_; + // weight of size cost + double size_wgt_; + // weight of character bigrams cost + double char_bigrams_wgt_; + // weight of word unigrams cost + double word_unigrams_wgt_; + // Maximum number of segments per character + int max_seg_per_char_; + // Beam width equal to the maximum number of nodes kept in the beam search + // trellis column after pruning + int beam_width_; + // Classifier type: See enum type_classifer for classifier types + type_classifer tp_classifier_; + // Feature types: See enum type_feature for feature types + type_feature tp_feat_; + // Grid size to scale a grapheme bitmap used by the BMP feature type + int conv_grid_size_; + // Histogram window size as a ratio of the word height used in computing + // the vertical pixel density histogram in the segmentation algorithm + int hist_wind_wid_; + // Minimum possible size of a connected component + int min_con_comp_size_; + // Maximum aspect ratio of a word (width / height) + double max_word_aspect_ratio_; + // Minimum ratio relative to the line height of a gap to be considered as + // a word break + double min_space_height_ratio_; + // Maximum ratio relative to the line height of a gap to be considered as + // a definite word break + double max_space_height_ratio_; + // When Cube and Tesseract are run in combined mode, only run + // combiner classifier when tesseract confidence is below this + // threshold. When Cube is run without Tesseract, this is ignored. + double combiner_run_thresh_; + // When Cube and tesseract are run in combined mode, threshold on + // output of combiner binary classifier (chosen from ROC during + // combiner training). When Cube is run without Tesseract, this is ignored. + double combiner_classifier_thresh_; +}; +} + +#endif // TUNING_PARAMS_H diff --git a/cube/word_altlist.cpp b/cube/word_altlist.cpp new file mode 100644 index 0000000000..07878e49c6 --- /dev/null +++ b/cube/word_altlist.cpp @@ -0,0 +1,121 @@ +/********************************************************************** + * File: word_altlist.cpp + * Description: Implementation of the Word Alternate List Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "word_altlist.h" + +namespace tesseract { +WordAltList::WordAltList(int max_alt) + : AltList(max_alt) { + word_alt_ = NULL; +} + +WordAltList::~WordAltList() { + if (word_alt_ != NULL) { + for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { + if (word_alt_[alt_idx] != NULL) { + delete []word_alt_[alt_idx]; + } + } + delete []word_alt_; + word_alt_ = NULL; + } +} + +// insert an alternate word with the specified cost and tag +bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) { + if (word_alt_ == NULL || alt_cost_ == NULL) { + word_alt_ = new char_32*[max_alt_]; + alt_cost_ = new int[max_alt_]; + alt_tag_ = new void *[max_alt_]; + + if (word_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) { + return false; + } + + memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_)); + } else { + // check if alt already exists + for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { + if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) { + // update the cost if we have a lower one + if (cost < alt_cost_[alt_idx]) { + alt_cost_[alt_idx] = cost; + alt_tag_[alt_idx] = tag; + } + return true; + } + } + } + + // determine length of alternate + int len = CubeUtils::StrLen(word_str); + + word_alt_[alt_cnt_] = new char_32[len + 1]; + if (word_alt_[alt_cnt_] == NULL) { + return false; + } + + if (len > 0) { + memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str)); + } + + word_alt_[alt_cnt_][len] = 0; + alt_cost_[alt_cnt_] = cost; + alt_tag_[alt_cnt_] = tag; + + alt_cnt_++; + + return true; +} + +// sort the alternate in descending order based on the cost +void WordAltList::Sort() { + for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { + for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) { + if (alt_cost_[alt_idx] > alt_cost_[alt]) { + char_32 *pchTemp = word_alt_[alt_idx]; + word_alt_[alt_idx] = word_alt_[alt]; + word_alt_[alt] = pchTemp; + + int temp = alt_cost_[alt_idx]; + alt_cost_[alt_idx] = alt_cost_[alt]; + alt_cost_[alt] = temp; + + void *tag = alt_tag_[alt_idx]; + alt_tag_[alt_idx] = alt_tag_[alt]; + alt_tag_[alt] = tag; + } + } + } +} + +void WordAltList::PrintDebug() { + for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { + char_32 *word_32 = word_alt_[alt_idx]; + string word_str; + CubeUtils::UTF32ToUTF8(word_32, &word_str); + int num_unichars = CubeUtils::StrLen(word_32); + fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx, + word_str.c_str(), alt_cost_[alt_idx], num_unichars); + for (int i = 0; i < num_unichars; ++i) + fprintf(stderr, "%d ", word_32[i]); + fprintf(stderr, "\n"); + } +} +} // namespace tesseract diff --git a/cube/word_altlist.h b/cube/word_altlist.h new file mode 100644 index 0000000000..7b1620fe78 --- /dev/null +++ b/cube/word_altlist.h @@ -0,0 +1,50 @@ +/********************************************************************** + * File: word_altlist.h + * Description: Declaration of the Word Alternate List Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The WordAltList abstracts a alternate list of words and their corresponding +// costs that result from the word recognition process. The class inherits +// from the AltList class +// It provides methods to add a new word alternate, its corresponding score and +// a tag. + +#ifndef WORD_ALT_LIST_H +#define WORD_ALT_LIST_H + +#include "altlist.h" + +namespace tesseract { +class WordAltList : public AltList { + public: + explicit WordAltList(int max_alt); + ~WordAltList(); + // Sort the list of alternates based on cost + void Sort(); + // insert an alternate word with the specified cost and tag + bool Insert(char_32 *char_ptr, int cost, void *tag = NULL); + // returns the alternate string at the specified position + inline char_32 * Alt(int alt_idx) { return word_alt_[alt_idx]; } + // print each entry of the altlist, both UTF8 and unichar ids, and + // their costs, to stderr + void PrintDebug(); + private: + char_32 **word_alt_; +}; +} // namespace tesseract + +#endif // WORD_ALT_LIST_H diff --git a/cube/word_list_lang_model.cpp b/cube/word_list_lang_model.cpp new file mode 100644 index 0000000000..18f85c1cee --- /dev/null +++ b/cube/word_list_lang_model.cpp @@ -0,0 +1,200 @@ +/********************************************************************** + * File: word_list_lang_model.cpp + * Description: Implementation of the Word List Language Model Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include "word_list_lang_model.h" +#include "cube_utils.h" + +#include "ratngs.h" +#include "trie.h" + +namespace tesseract { +WordListLangModel::WordListLangModel(CubeRecoContext *cntxt) { + cntxt_ = cntxt; + dawg_ = NULL; + init_ = false; +} + +WordListLangModel::~WordListLangModel() { + Cleanup(); +} + +// Cleanup +void WordListLangModel::Cleanup() { + if (dawg_ != NULL) { + delete dawg_; + dawg_ = NULL; + } + init_ = false; +} + +// Initialize the language model +bool WordListLangModel::Init() { + if (init_ == true) { + return true; + } + // The last parameter to the Trie constructor (the debug level) is set to + // false for now, until Cube has a way to express its preferred debug level. + dawg_ = new Trie(DAWG_TYPE_WORD, "", NO_PERM, + WordListLangModel::kMaxDawgEdges, + cntxt_->CharacterSet()->ClassCount(), false); + if (dawg_ == NULL) { + return false; + } + init_ = true; + return true; +} + +// return a pointer to the root +LangModEdge * WordListLangModel::Root() { + return NULL; +} + +// return the edges emerging from the current state +LangModEdge **WordListLangModel::GetEdges(CharAltList *alt_list, + LangModEdge *edge, + int *edge_cnt) { + // initialize if necessary + if (init_ == false) { + if (Init() == false) { + return false; + } + } + + (*edge_cnt) = 0; + + EDGE_REF edge_ref; + + TessLangModEdge *tess_lm_edge = reinterpret_cast(edge); + + if (tess_lm_edge == NULL) { + edge_ref = 0; + } else { + edge_ref = tess_lm_edge->EndEdge(); + + // advance node + edge_ref = dawg_->next_node(edge_ref); + if (edge_ref == 0) { + return 0; + } + } + + // allocate memory for edges + LangModEdge **edge_array = new LangModEdge *[kMaxEdge]; + if (edge_array == NULL) { + return NULL; + } + + // now get all the emerging edges + (*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref, + edge_array + (*edge_cnt)); + + return edge_array; +} + +// returns true if the char_32 is supported by the language model +// TODO(ahmadab) currently not implemented +bool WordListLangModel::IsValidSequence(const char_32 *sequence, + bool terminal, LangModEdge **edges) { + return false; +} + +// Recursive helper function for WordVariants(). +void WordListLangModel::WordVariants(const CharSet &char_set, + string_32 prefix_str32, + WERD_CHOICE *word_so_far, + string_32 str32, + vector *word_variants) { + int str_len = str32.length(); + if (str_len == 0) { + if (word_so_far->length() > 0) { + word_variants->push_back(*word_so_far); + } + } else { + // Try out all the possible prefixes of the str32. + for (int len = 1; len <= str_len; len++) { + // Check if prefix is supported in character set. + string_32 str_pref32 = str32.substr(0, len); + int class_id = char_set.ClassID(reinterpret_cast( + str_pref32.c_str())); + if (class_id <= 0) { + continue; + } else { + string_32 new_prefix_str32 = prefix_str32 + str_pref32; + string_32 new_str32 = str32.substr(len); + word_so_far->append_unichar_id(class_id, 1, 0.0, 0.0); + WordVariants(char_set, new_prefix_str32, word_so_far, new_str32, + word_variants); + word_so_far->remove_last_unichar_id(); + } + } + } +} + +// Compute all the variants of a 32-bit string in terms of the class-ids +// This is needed for languages that have ligatures. A word can then have more +// than one spelling in terms of the class-ids +void WordListLangModel::WordVariants(const CharSet &char_set, string_32 str32, + vector *word_variants) { + word_variants->clear(); + string_32 prefix_str32; + WERD_CHOICE word_so_far; + WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants); +} + +// add a new UTF-8 string to the lang model +bool WordListLangModel::AddString(const char *char_ptr) { + if (!init_ && !Init()) { // initialize if necessary + return false; + } + + string_32 str32; + CubeUtils::UTF8ToUTF32(char_ptr, &str32); + if (str32.length() < 1) { + return false; + } + return AddString32(str32.c_str()); +} + +// add a new UTF-32 string to the lang model +bool WordListLangModel::AddString32(const char_32 *char_32_ptr) { + if (char_32_ptr == NULL) { + return false; + } + // get all the word variants + vector word_variants; + WordVariants(*(cntxt_->CharacterSet()), char_32_ptr, &word_variants); + + if (word_variants.size() > 0) { + // find the shortest variant + int shortest_word = 0; + for (int word = 1; word < word_variants.size(); word++) { + if (word_variants[shortest_word].length() > + word_variants[word].length()) { + shortest_word = word; + } + } + // only add the shortest grapheme interpretation of string to the word list + dawg_->add_word_to_dawg(word_variants[shortest_word]); + } + return true; +} + +} diff --git a/cube/word_list_lang_model.h b/cube/word_list_lang_model.h new file mode 100644 index 0000000000..cc449dff06 --- /dev/null +++ b/cube/word_list_lang_model.h @@ -0,0 +1,87 @@ +/********************************************************************** + * File: word_list_lang_model.h + * Description: Declaration of the Word List Language Model Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The WordListLangModel class abstracts a language model that is based on +// a list of words. It inherits from the LangModel abstract class +// Besides providing the methods inherited from the LangModel abstract class, +// the class provided methods to add new strings to the Language Model: +// AddString & AddString32 + +#ifndef WORD_LIST_LANG_MODEL_H +#define WORD_LIST_LANG_MODEL_H + +#include + +#include "cube_reco_context.h" +#include "lang_model.h" +#include "tess_lang_mod_edge.h" + +namespace tesseract { + +class Trie; + +class WordListLangModel : public LangModel { + public: + explicit WordListLangModel(CubeRecoContext *cntxt); + ~WordListLangModel(); + // Returns an edge pointer to the Root + LangModEdge *Root(); + // Returns the edges that fan-out of the specified edge and their count + LangModEdge **GetEdges(CharAltList *alt_list, + LangModEdge *edge, + int *edge_cnt); + // Returns is a sequence of 32-bit characters are valid within this language + // model or net. And EndOfWord flag is specified. If true, the sequence has + // to end on a valid word. The function also optionally returns the list + // of language model edges traversed to parse the string + bool IsValidSequence(const char_32 *sequence, + bool eow_flag, + LangModEdge **edges); + bool IsLeadingPunc(char_32 ch) { return false; } // not yet implemented + bool IsTrailingPunc(char_32 ch) { return false; } // not yet implemented + bool IsDigit(char_32 ch) { return false; } // not yet implemented + // Adds a new UTF-8 string to the language model + bool AddString(const char *char_ptr); + // Adds a new UTF-32 string to the language model + bool AddString32(const char_32 *char_32_ptr); + // Compute all the variants of a 32-bit string in terms of the class-ids. + // This is needed for languages that have ligatures. A word can then have + // more than one spelling in terms of the class-ids. + static void WordVariants(const CharSet &char_set, string_32 str32, + vector *word_variants); + private: + // constants needed to configure the language model + static const int kMaxEdge = 512; + static const int kMaxDawgEdges = 20000; + + CubeRecoContext *cntxt_; + Trie *dawg_; + bool init_; + // Initialize the language model + bool Init(); + // Cleanup + void Cleanup(); + // Recursive helper function for WordVariants(). + static void WordVariants(const CharSet &char_set, + string_32 prefix_str32, WERD_CHOICE *word_so_far, + string_32 str32, vector *word_variants); +}; +} // tesseract + +#endif // WORD_LIST_LANG_MODEL_H diff --git a/cube/word_size_model.cpp b/cube/word_size_model.cpp new file mode 100644 index 0000000000..6b9a4530fc --- /dev/null +++ b/cube/word_size_model.cpp @@ -0,0 +1,301 @@ +/********************************************************************** + * File: word_size_model.cpp + * Description: Implementation of the Word Size Model Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include "word_size_model.h" +#include "cube_utils.h" + +namespace tesseract { + +WordSizeModel::WordSizeModel(CharSet * char_set, bool contextual) { + char_set_ = char_set; + contextual_ = contextual; +} + +WordSizeModel::~WordSizeModel() { + for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) { + FontPairSizeInfo fnt_info = font_pair_size_models_[fnt]; + delete []fnt_info.pair_size_info[0]; + delete []fnt_info.pair_size_info; + } +} + +WordSizeModel *WordSizeModel::Create(const string &data_file_path, + const string &lang, + CharSet *char_set, + bool contextual) { + WordSizeModel *obj = new WordSizeModel(char_set, contextual); + if (!obj) { + fprintf(stderr, "Cube ERROR (WordSizeModel::Create): unable to allocate " + "new word size model object\n"); + return NULL; + } + + if (!obj->Init(data_file_path, lang)) { + delete obj; + return NULL; + } + return obj; +} + +bool WordSizeModel::Init(const string &data_file_path, const string &lang) { + string stats_file_name; + stats_file_name = data_file_path + lang; + stats_file_name += ".cube.size"; + + // read file to memory + string str_data; + + if (!CubeUtils::ReadFileToString(stats_file_name, &str_data)) { + return false; + } + + // split to words + vector tokens; + CubeUtils::SplitStringUsing(str_data, "\t\r\n", &tokens); + if (tokens.size() < 1) { + fprintf(stderr, "Cube ERROR (WordSizeModel::Init): invalid " + "file contents: %s\n", stats_file_name.c_str()); + return false; + } + + font_pair_size_models_.clear(); + + // token count per line depends on whether the language is contextual or not + int token_cnt = contextual_ ? + (kExpectedTokenCount + 4) : kExpectedTokenCount; + // the count of size classes depends on whether the language is contextual + // or not. For non contextual languages (Ex: Eng), it is equal to the class + // count. For contextual languages (Ex: Ara), it is equal to the class count + // multiplied by the position count (4: start, middle, final, isolated) + int size_class_cnt = contextual_ ? + (char_set_->ClassCount() * 4) : char_set_->ClassCount(); + string fnt_name = ""; + + for (int tok = 0; tok < tokens.size(); tok += token_cnt) { + // a new font, write the old font data and re-init + if (tok == 0 || fnt_name != tokens[tok]) { + FontPairSizeInfo fnt_info; + + fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt]; + if (!fnt_info.pair_size_info) { + fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allcoating " + "memory for font pair size info\n"); + return false; + } + + fnt_info.pair_size_info[0] = + new PairSizeInfo[size_class_cnt * size_class_cnt]; + if (!fnt_info.pair_size_info[0]) { + fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allocating " + "memory for font pair size info\n"); + return false; + } + + memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt * + sizeof(PairSizeInfo)); + + for (int cls = 1; cls < size_class_cnt; cls++) { + fnt_info.pair_size_info[cls] = + fnt_info.pair_size_info[cls - 1] + size_class_cnt; + } + + // strip out path and extension + string stripped_font_name = tokens[tok].substr(0, tokens[tok].find('.')); + string::size_type strt_pos = stripped_font_name.find_last_of("/\\"); + if (strt_pos != string::npos) { + fnt_info.font_name = stripped_font_name.substr(strt_pos); + } else { + fnt_info.font_name = stripped_font_name; + } + font_pair_size_models_.push_back(fnt_info); + } + + // parse the data + int cls_0; + int cls_1; + double delta_top; + double wid_0; + double hgt_0; + double wid_1; + double hgt_1; + int size_code_0; + int size_code_1; + + // read and parse the tokens + if (contextual_) { + int start_0; + int end_0; + int start_1; + int end_1; + // The expected format for a character size bigram is as follows: + // ClassId0Start-flag0End-flag0String0(ignored) + // Width0Height0 + // ClassId1Start-flag1End-flag1String1(ignored) + // HeightDeltaWidth1Height0 + // In case of non-contextual languages, the Start and End flags are + // omitted + if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 || + sscanf(tokens[tok + 2].c_str(), "%d", &start_0) != 1 || + sscanf(tokens[tok + 3].c_str(), "%d", &end_0) != 1 || + sscanf(tokens[tok + 5].c_str(), "%lf", &wid_0) != 1 || + sscanf(tokens[tok + 6].c_str(), "%lf", &hgt_0) != 1 || + sscanf(tokens[tok + 7].c_str(), "%d", &cls_1) != 1 || + sscanf(tokens[tok + 8].c_str(), "%d", &start_1) != 1 || + sscanf(tokens[tok + 9].c_str(), "%d", &end_1) != 1 || + sscanf(tokens[tok + 11].c_str(), "%lf", &delta_top) != 1 || + sscanf(tokens[tok + 12].c_str(), "%lf", &wid_1) != 1 || + sscanf(tokens[tok + 13].c_str(), "%lf", &hgt_1) != 1 || + (start_0 != 0 && start_0 != 1) || (end_0 != 0 && end_0 != 1) || + (start_1 != 0 && start_1 != 1) || (end_1 != 0 && end_1 != 1)) { + fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at " + "line %d\n", 1 + (tok / token_cnt)); + return false; + } + size_code_0 = SizeCode(cls_0, start_0, end_0); + size_code_1 = SizeCode(cls_1, start_1, end_1); + } else { + if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 || + sscanf(tokens[tok + 3].c_str(), "%lf", &wid_0) != 1 || + sscanf(tokens[tok + 4].c_str(), "%lf", &hgt_0) != 1 || + sscanf(tokens[tok + 5].c_str(), "%d", &cls_1) != 1 || + sscanf(tokens[tok + 7].c_str(), "%lf", &delta_top) != 1 || + sscanf(tokens[tok + 8].c_str(), "%lf", &wid_1) != 1 || + sscanf(tokens[tok + 9].c_str(), "%lf", &hgt_1) != 1) { + fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at " + "line %d\n", 1 + (tok / token_cnt)); + return false; + } + size_code_0 = cls_0; + size_code_1 = cls_1; + } + + // copy the data to the size tables + FontPairSizeInfo fnt_info = font_pair_size_models_.back(); + fnt_info.pair_size_info[size_code_0][size_code_1].delta_top = + static_cast(delta_top * kShapeModelScale); + fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 = + static_cast(wid_0 * kShapeModelScale); + fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 = + static_cast(hgt_0 * kShapeModelScale); + fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 = + static_cast(wid_1 * kShapeModelScale); + fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 = + static_cast(hgt_1 * kShapeModelScale); + + fnt_name = tokens[tok]; + } + + return true; +} + +int WordSizeModel::Cost(CharSamp **samp_array, int samp_cnt) const { + if (samp_cnt < 2) { + return 0; + } + double best_dist = static_cast(WORST_COST); + int best_fnt = -1; + for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) { + const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt]; + double mean_dist = 0; + int pair_cnt = 0; + + for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) { + int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel()); + if (cls_0 < 1) { + continue; + } + // compute size code for samp 0 based on class id and position + int size_code_0; + if (contextual_) { + size_code_0 = SizeCode(cls_0, + samp_array[smp_0]->FirstChar() == 0 ? 0 : 1, + samp_array[smp_0]->LastChar() == 0 ? 0 : 1); + } else { + size_code_0 = cls_0; + } + + int char0_height = samp_array[smp_0]->Height(); + int char0_width = samp_array[smp_0]->Width(); + int char0_top = samp_array[smp_0]->Top(); + + for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) { + int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel()); + if (cls_1 < 1) { + continue; + } + // compute size code for samp 0 based on class id and position + int size_code_1; + if (contextual_) { + size_code_1 = SizeCode(cls_1, + samp_array[smp_1]->FirstChar() == 0 ? 0 : 1, + samp_array[smp_1]->LastChar() == 0 ? 0 : 1); + } else { + size_code_1 = cls_1; + } + double dist = PairCost( + char0_width, char0_height, char0_top, samp_array[smp_1]->Width(), + samp_array[smp_1]->Height(), samp_array[smp_1]->Top(), + fnt_info->pair_size_info[size_code_0][size_code_1]); + if (dist > 0) { + mean_dist += dist; + pair_cnt++; + } + } // smp_1 + } // smp_0 + if (pair_cnt == 0) { + continue; + } + mean_dist /= pair_cnt; + if (best_fnt == -1 || mean_dist < best_dist) { + best_dist = mean_dist; + best_fnt = fnt; + } + } + if (best_fnt == -1) { + return static_cast(WORST_COST); + } else { + return static_cast(best_dist); + } +} + +double WordSizeModel::PairCost(int width_0, int height_0, int top_0, + int width_1, int height_1, int top_1, + const PairSizeInfo& pair_info) { + double scale_factor = static_cast(pair_info.hgt_0) / + static_cast(height_0); + double dist = 0.0; + if (scale_factor > 0) { + double norm_width_0 = width_0 * scale_factor; + double norm_width_1 = width_1 * scale_factor; + double norm_height_1 = height_1 * scale_factor; + double norm_delta_top = (top_1 - top_0) * scale_factor; + + // accumulate the distance between the model character and the + // predicted one on all dimensions of the pair + dist += fabs(pair_info.wid_0 - norm_width_0); + dist += fabs(pair_info.wid_1 - norm_width_1); + dist += fabs(pair_info.hgt_1 - norm_height_1); + dist += fabs(pair_info.delta_top - norm_delta_top); + } + return dist; +} +} // namespace tesseract diff --git a/cube/word_size_model.h b/cube/word_size_model.h new file mode 100644 index 0000000000..fade595bf1 --- /dev/null +++ b/cube/word_size_model.h @@ -0,0 +1,100 @@ +/********************************************************************** + * File: word_size_model.h + * Description: Declaration of the Word Size Model Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The WordSizeModel class abstracts the geometrical relationships +// between characters/shapes in the same word (presumeably of the same font) +// A non-parametric bigram model describes the three geometrical properties of a +// character pair: +// 1- Normalized Width +// 2- Normalized Top +// 3- Normalized Height +// These dimensions are computed for each character pair in a word. These are +// then compared to the same information for each of the fonts that the size +// model knows about. The WordSizeCost is the cost of the font that matches +// best. + +#ifndef WORD_SIZE_MODEL_H +#define WORD_SIZE_MODEL_H + +#include +#include "char_samp.h" +#include "char_set.h" + +namespace tesseract { +struct PairSizeInfo { + int delta_top; + int wid_0; + int hgt_0; + int wid_1; + int hgt_1; +}; + +struct FontPairSizeInfo { + string font_name; + PairSizeInfo **pair_size_info; +}; + +class WordSizeModel { + public: + WordSizeModel(CharSet *, bool contextual); + virtual ~WordSizeModel(); + static WordSizeModel *Create(const string &data_file_path, + const string &lang, + CharSet *char_set, + bool contextual); + // Given a word and number of unichars, return the size cost, + // minimized over all fonts in the size model. + int Cost(CharSamp **samp_array, int samp_cnt) const; + // Given dimensions of a pair of character samples and a font size + // model for that character pair, return the pair's size cost for + // the font. + static double PairCost(int width_0, int height_0, int top_0, + int width_1, int height_1, int top_1, + const PairSizeInfo& pair_info); + bool Save(string file_name); + // Number of fonts in size model. + inline int FontCount() const { + return font_pair_size_models_.size(); + } + inline const FontPairSizeInfo *FontInfo() const { + return &font_pair_size_models_[0]; + } + // Helper functions to convert between size codes, class id and position + // codes + static inline int SizeCode(int cls_id, int start, int end) { + return (cls_id << 2) + (end << 1) + start; + } + + private: + // Scaling constant used to convert floating point ratios in size table + // to fixed point + static const int kShapeModelScale = 1000; + static const int kExpectedTokenCount = 10; + + // Language properties + bool contextual_; + CharSet *char_set_; + // Size ratios table + vector font_pair_size_models_; + + // Initialize the word size model object + bool Init(const string &data_file_path, const string &lang); +}; +} +#endif // WORD_SIZE_MODEL_H diff --git a/cube/word_unigrams.cpp b/cube/word_unigrams.cpp new file mode 100644 index 0000000000..d2d9b94311 --- /dev/null +++ b/cube/word_unigrams.cpp @@ -0,0 +1,267 @@ +/********************************************************************** + * File: word_unigrams.cpp + * Description: Implementation of the Word Unigrams Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include +#include +#include + +#include "const.h" +#include "cube_utils.h" +#include "ndminx.h" +#include "unicharset.h" +#include "word_unigrams.h" + +namespace tesseract { + +WordUnigrams::WordUnigrams() { + costs_ = NULL; + words_ = NULL; + word_cnt_ = 0; +} + +WordUnigrams::~WordUnigrams() { + if (words_ != NULL) { + if (words_[0] != NULL) { + delete []words_[0]; + } + + delete []words_; + words_ = NULL; + } + + if (costs_ != NULL) { + delete []costs_; + } +} + +// Load the word-list and unigrams from file and create an object +// The word list is assumed to be sorted in lexicographic order. +WordUnigrams *WordUnigrams::Create(const string &data_file_path, + const string &lang) { + string file_name; + string str; + + file_name = data_file_path + lang; + file_name += ".cube.word-freq"; + + // load the string into memory + if (CubeUtils::ReadFileToString(file_name, &str) == false) { + return NULL; + } + + // split into lines + vector str_vec; + CubeUtils::SplitStringUsing(str, "\r\n \t", &str_vec); + if (str_vec.size() < 2) { + return NULL; + } + + // allocate memory + WordUnigrams *word_unigrams_obj = new WordUnigrams(); + if (word_unigrams_obj == NULL) { + fprintf(stderr, "Cube ERROR (WordUnigrams::Create): could not create " + "word unigrams object.\n"); + return NULL; + } + + int full_len = str.length(); + int word_cnt = str_vec.size() / 2; + word_unigrams_obj->words_ = new char*[word_cnt]; + word_unigrams_obj->costs_ = new int[word_cnt]; + + if (word_unigrams_obj->words_ == NULL || + word_unigrams_obj->costs_ == NULL) { + fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error allocating " + "word unigram fields.\n"); + delete word_unigrams_obj; + return NULL; + } + + word_unigrams_obj->words_[0] = new char[full_len]; + if (word_unigrams_obj->words_[0] == NULL) { + fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error allocating " + "word unigram fields.\n"); + delete word_unigrams_obj; + return NULL; + } + + // construct sorted list of words and costs + word_unigrams_obj->word_cnt_ = 0; + char *char_buff = word_unigrams_obj->words_[0]; + word_cnt = 0; + int max_cost = 0; + + for (int wrd = 0; wrd < str_vec.size(); wrd += 2) { + word_unigrams_obj->words_[word_cnt] = char_buff; + + strcpy(char_buff, str_vec[wrd].c_str()); + char_buff += (str_vec[wrd].length() + 1); + + if (sscanf(str_vec[wrd + 1].c_str(), "%d", + word_unigrams_obj->costs_ + word_cnt) != 1) { + fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error reading " + "word unigram data.\n"); + delete word_unigrams_obj; + return NULL; + } + // update max cost + max_cost = MAX(max_cost, word_unigrams_obj->costs_[word_cnt]); + word_cnt++; + } + word_unigrams_obj->word_cnt_ = word_cnt; + + // compute the not-in-list-cost by assuming that a word not in the list + // [ahmadab]: This can be computed as follows: + // - Given that the distribution of words follow Zipf's law: + // (F = K / (rank ^ S)), where s is slightly > 1.0 + // - Number of words in the list is N + // - The mean frequency of a word that did not appear in the list is the + // area under the rest of the Zipf's curve divided by 2 (the mean) + // - The area would be the bound integral from N to infinity = + // (K * S) / (N ^ (S + 1)) ~= K / (N ^ 2) + // - Given that cost = -LOG(prob), the cost of an unlisted word would be + // = max_cost + 2*LOG(N) + word_unigrams_obj->not_in_list_cost_ = max_cost + + (2 * CubeUtils::Prob2Cost(1.0 / word_cnt)); + // success + return word_unigrams_obj; +} + +// Split input into space-separated tokens, strip trailing punctuation +// from each, determine case properties, call UTF-8 flavor of cost +// function on each word, and aggregate all into single mean word +// cost. +int WordUnigrams::Cost(const char_32 *key_str32, + LangModel *lang_mod, + CharSet *char_set, + UNICHARSET *unicharset) const { + if (!key_str32) + return 0; + // convert string to UTF8 to split into space-separated words + string key_str; + CubeUtils::UTF32ToUTF8(key_str32, &key_str); + vector words; + CubeUtils::SplitStringUsing(key_str, " \t", &words); + + // no words => no cost + if (words.size() <= 0) { + return 0; + } + + // aggregate the costs of all the words + int cost = 0; + for (int word_idx = 0; word_idx < words.size(); word_idx++) { + // convert each word back to UTF32 for analyzing case and punctuation + string_32 str32; + CubeUtils::UTF8ToUTF32(words[word_idx].c_str(), &str32); + int len = CubeUtils::StrLen(str32.c_str()); + + // strip all trailing punctuation + string clean_str; + int clean_len = len; + bool trunc = false; + while (clean_len > 0 && + lang_mod->IsTrailingPunc(str32.c_str()[clean_len - 1])) { + --clean_len; + trunc = true; + } + + // If either the original string was not truncated (no trailing + // punctuation) or the entire string was removed (all characters + // are trailing punctuation), evaluate original word as is; + // otherwise, copy all but the trailing punctuation characters + char_32 *clean_str32 = NULL; + if (clean_len == 0 || !trunc) { + clean_str32 = CubeUtils::StrDup(str32.c_str()); + } else { + clean_str32 = new char_32[clean_len + 1]; + for (int i = 0; i < clean_len; ++i) { + clean_str32[i] = str32[i]; + } + clean_str32[clean_len] = '\0'; + } + ASSERT_HOST(clean_str32 != NULL); + + string str8; + CubeUtils::UTF32ToUTF8(clean_str32, &str8); + int word_cost = CostInternal(str8.c_str()); + + // if case invariant, get costs of all-upper-case and all-lower-case + // versions and return the min cost + if (clean_len >= kMinLengthNumOrCaseInvariant && + CubeUtils::IsCaseInvariant(clean_str32, char_set, unicharset)) { + char_32 *lower_32 = CubeUtils::ToLower(clean_str32, char_set, unicharset); + if (lower_32) { + string lower_8; + CubeUtils::UTF32ToUTF8(lower_32, &lower_8); + word_cost = MIN(word_cost, CostInternal(lower_8.c_str())); + delete [] lower_32; + } + char_32 *upper_32 = CubeUtils::ToUpper(clean_str32, char_set, unicharset); + if (upper_32) { + string upper_8; + CubeUtils::UTF32ToUTF8(upper_32, &upper_8); + word_cost = MIN(word_cost, CostInternal(upper_8.c_str())); + delete [] upper_32; + } + } + + if (clean_len >= kMinLengthNumOrCaseInvariant) { + // if characters are all numeric, incur 0 word cost + bool is_numeric = true; + for (int i = 0; i < clean_len; ++i) { + if (!lang_mod->IsDigit(clean_str32[i])) + is_numeric = false; + } + if (is_numeric) + word_cost = 0; + } + delete [] clean_str32; + cost += word_cost; + } // word_idx + + // return the mean cost + return static_cast(cost / static_cast(words.size())); +} + +// Search for UTF-8 string using binary search of sorted words_ array. +int WordUnigrams::CostInternal(const char *key_str) const { + if (strlen(key_str) == 0) + return not_in_list_cost_; + int hi = word_cnt_ - 1; + int lo = 0; + while (lo <= hi) { + int current = (hi + lo) / 2; + int comp = strcmp(key_str, words_[current]); + // a match + if (comp == 0) { + return costs_[current]; + } + if (comp < 0) { + // go lower + hi = current - 1; + } else { + // go higher + lo = current + 1; + } + } + return not_in_list_cost_; +} +} // namespace tesseract diff --git a/cube/word_unigrams.h b/cube/word_unigrams.h new file mode 100644 index 0000000000..0d304ef170 --- /dev/null +++ b/cube/word_unigrams.h @@ -0,0 +1,69 @@ + /********************************************************************** + * File: word_unigrams.h + * Description: Declaration of the Word Unigrams Class + * Author: Ahmad Abdulkader + * Created: 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// The WordUnigram class holds the unigrams of the most frequent set of words +// in a language. It is an optional component of the Cube OCR engine. If +// present, the unigram cost of a word is aggregated with the other costs +// (Recognition, Language Model, Size) to compute a cost for a word. +// The word list is assumed to be sorted in lexicographic order. + +#ifndef WORD_UNIGRAMS_H +#define WORD_UNIGRAMS_H + +#include +#include "char_set.h" +#include "lang_model.h" + +namespace tesseract { +class WordUnigrams { + public: + WordUnigrams(); + ~WordUnigrams(); + // Load the word-list and unigrams from file and create an object + // The word list is assumed to be sorted + static WordUnigrams *Create(const string &data_file_path, + const string &lang); + // Compute the unigram cost of a UTF-32 string. Splits into + // space-separated tokens, strips trailing punctuation from each + // token, evaluates case properties, and calls internal Cost() + // function on UTF-8 version. To avoid unnecessarily penalizing + // all-one-case words or capitalized words (first-letter + // upper-case and remaining letters lower-case) when not all + // versions of the word appear in the .cube.word-freq file, a + // case-invariant cost is computed in those cases, assuming the word + // meets a minimum length. + int Cost(const char_32 *str32, LangModel *lang_mod, + CharSet *char_set, UNICHARSET *unicharset) const; + protected: + // Compute the word unigram cost of a UTF-8 string with binary + // search of sorted words_ array. + int CostInternal(const char *str) const; + private: + // Only words this length or greater qualify for all-numeric or + // case-invariant word unigram cost. + static const int kMinLengthNumOrCaseInvariant = 4; + + int word_cnt_; + char **words_; + int *costs_; + int not_in_list_cost_; +}; +} + +#endif // WORD_UNIGRAMS_H diff --git a/cutil/Makefile.am b/cutil/Makefile.am index a0d938e185..1da5b1edd4 100644 --- a/cutil/Makefile.am +++ b/cutil/Makefile.am @@ -1,17 +1,15 @@ SUBDIRS = AM_CPPFLAGS = -I$(top_srcdir)/ccutil -EXTRA_DIST = cutil.vcproj - include_HEADERS = \ bitvec.h callcpp.h const.h cutil.h cutil_class.h danerror.h efio.h \ - emalloc.h freelist.h funcdefs.h general.h globals.h listio.h \ + emalloc.h freelist.h globals.h listio.h \ oldheap.h oldlist.h structures.h tessarray.h lib_LTLIBRARIES = libtesseract_cutil.la libtesseract_cutil_la_SOURCES = \ bitvec.cpp cutil.cpp cutil_class.cpp danerror.cpp efio.cpp \ - emalloc.cpp freelist.cpp globals.cpp listio.cpp oldheap.cpp \ + emalloc.cpp freelist.cpp listio.cpp oldheap.cpp \ oldlist.cpp structures.cpp tessarray.cpp libtesseract_cutil_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) diff --git a/cutil/Makefile.in b/cutil/Makefile.in index fe3e22f486..6aa7a392f0 100644 --- a/cutil/Makefile.in +++ b/cutil/Makefile.in @@ -72,8 +72,8 @@ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) libtesseract_cutil_la_LIBADD = am_libtesseract_cutil_la_OBJECTS = bitvec.lo cutil.lo cutil_class.lo \ - danerror.lo efio.lo emalloc.lo freelist.lo globals.lo \ - listio.lo oldheap.lo oldlist.lo structures.lo tessarray.lo + danerror.lo efio.lo emalloc.lo freelist.lo listio.lo \ + oldheap.lo oldlist.lo structures.lo tessarray.lo libtesseract_cutil_la_OBJECTS = $(am_libtesseract_cutil_la_OBJECTS) libtesseract_cutil_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ @@ -246,7 +246,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -264,16 +263,15 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = AM_CPPFLAGS = -I$(top_srcdir)/ccutil -EXTRA_DIST = cutil.vcproj include_HEADERS = \ bitvec.h callcpp.h const.h cutil.h cutil_class.h danerror.h efio.h \ - emalloc.h freelist.h funcdefs.h general.h globals.h listio.h \ + emalloc.h freelist.h globals.h listio.h \ oldheap.h oldlist.h structures.h tessarray.h lib_LTLIBRARIES = libtesseract_cutil.la libtesseract_cutil_la_SOURCES = \ bitvec.cpp cutil.cpp cutil_class.cpp danerror.cpp efio.cpp \ - emalloc.cpp freelist.cpp globals.cpp listio.cpp oldheap.cpp \ + emalloc.cpp freelist.cpp listio.cpp oldheap.cpp \ oldlist.cpp structures.cpp tessarray.cpp libtesseract_cutil_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) @@ -358,7 +356,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/efio.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/emalloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freelist.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/globals.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/listio.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oldheap.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oldlist.Plo@am__quote@ diff --git a/cutil/bitvec.cpp b/cutil/bitvec.cpp index 10d2bc5ed8..75b10a787b 100644 --- a/cutil/bitvec.cpp +++ b/cutil/bitvec.cpp @@ -27,11 +27,6 @@ #include "freelist.h" #include "tprintf.h" -/*----------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -static int BitVectorCount = 0; - /*----------------------------------------------------------------------------- Public Code -----------------------------------------------------------------------------*/ @@ -74,15 +69,14 @@ void FreeBitVector(BIT_VECTOR BitVector) { */ if (BitVector) { Efree(BitVector); - BitVectorCount--; - } else { - tprintf("%6d BITVECTOR elements in use\n", BitVectorCount); } } /* FreeBitVector */ - /*hamming_distance(array1,array2,length) computes the hamming distance - between two bit strings */ +/** + * hamming_distance(array1,array2,length) computes the hamming distance + * between two bit strings. + */ /*--------------------------------------------------------------------------*/ int hamming_distance(uinT32* array1, uinT32* array2, int length) { register uinT32 diff; /*bit difference */ @@ -115,7 +109,6 @@ int hamming_distance(uinT32* array1, uinT32* array2, int length) { * @note History: Tue Oct 23 16:51:27 1990, DSJ, Created. */ BIT_VECTOR NewBitVector(int NumBits) { - BitVectorCount++; return ((BIT_VECTOR) Emalloc(sizeof(uinT32) * WordsInVectorOfSize(NumBits))); } /* NewBitVector */ diff --git a/cutil/bitvec.h b/cutil/bitvec.h index 25c806769c..2b058930d9 100644 --- a/cutil/bitvec.h +++ b/cutil/bitvec.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: bitvec.h - ** Purpose: Routines for manipulating bit vectors - ** Author: Dan Johnson - ** History: Wed Mar 7 17:52:45 1990, DSJ, Created. + ** Filename: bitvec.h + ** Purpose: Routines for manipulating bit vectors + ** Author: Dan Johnson + ** History: Wed Mar 7 17:52:45 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -23,6 +23,7 @@ /*----------------------------------------------------------------------------- Include Files and Type Defines -----------------------------------------------------------------------------*/ +// TODO(rays) Rename BITSINLONG to BITSINuinT32, and use sizeof. #define BITSINLONG 32 /**< no of bits in a long */ typedef uinT32 *BIT_VECTOR; @@ -31,26 +32,26 @@ typedef uinT32 *BIT_VECTOR; -----------------------------------------------------------------------------*/ #define zero_all_bits(array,length) \ {\ - register int index; /*temporary index*/\ + register int index; /*temporary index*/\ \ for (index=0;index #endif #include "host.h" -#include "varable.h" +#include "params.h" #include "unichar.h" class ScrollView; @@ -80,27 +80,6 @@ typedef enum { Green_YELLOW } C_COL; /*starbase colours */ -#define INTEL 0x4949 -#define MOTO 0x4d4d - -extern INT_VAR_H (tess_cp_mapping0, 0, "Mappings for class pruner distance"); -extern INT_VAR_H (tess_cp_mapping1, 1, "Mappings for class pruner distance"); -extern INT_VAR_H (tess_cp_mapping2, 2, "Mappings for class pruner distance"); -extern INT_VAR_H (tess_cp_mapping3, 3, "Mappings for class pruner distance"); -extern INT_VAR_H (record_matcher_output, 0, "Record detailed matcher info"); -extern INT_VAR_H (il1_adaption_test, 0, -"Dont adapt to i/I at beginning of word"); -extern double_VAR_H (permuter_pending_threshold, 0.0, -"Worst conf for using pending dictionary"); - -//Global matcher info from the class pruner. -extern inT32 cp_maps[4]; -//Global info to control writes of matcher info -extern char blob_answer[UNICHAR_LEN + 1]; //correct char -extern char *word_answer; //correct word -extern inT32 bits_in_states; //no of bits in states - -void setup_cp_maps(); void cprintf ( //Trace printf const char *format, ... //special message ); diff --git a/cutil/const.h b/cutil/const.h index af9f56fae0..33f8fa443c 100644 --- a/cutil/const.h +++ b/cutil/const.h @@ -14,95 +14,10 @@ #define CONST_H /*This file contains constants which are global to the entire system*/ +#define SPLINESIZE 23 // max spline parts to a line -#define PATHSIZE 8192 /*max elements in edge path */ -#define OUTLINESIZE 256 /*max elements in aprroximated path */ -#define BLOBSIZE 10000 /*max no of blobs on a page */ +#define PI 3.14159265359 // pi -#if 0 -#define FIRSTCHAR '!' /*first char in set */ -#define LASTCHAR '~' /*last char in set */ -#define CHARSETSIZE (LASTCHAR-FIRSTCHAR+1) - no of chars in set -#define MERGESIZE 10 /*max chars in a class */ -#define MAXCHARSIZE 159 /*max size of any char */ -#define CLASSIZE 256 /*max no of classes */ -#define SPLITSIZE 4 /*no of to_classes per class */ -#define BADCLASS 255 /*null class */ -#define BADMATCH 255 /*no match */ -#define CLASSLENGTH 16 /*max chars in a class string */ -#endif - -#define VECSCALE 3 /*vector scaling factor in fx */ -#define REALSCALE ((double)(1< #include -#include "general.h" +#include "host.h" #include "tprintf.h" /*---------------------------------------------------------------------- diff --git a/cutil/cutil_class.h b/cutil/cutil_class.h index efea5e2d42..06d3254366 100644 --- a/cutil/cutil_class.h +++ b/cutil/cutil_class.h @@ -20,7 +20,6 @@ #define TESSERACT_CUTIL_CUTIL_CLASS_H__ #include "ccutil.h" -#include "tessclas.h" #include "const.h" #include "strngs.h" @@ -31,29 +30,6 @@ class CUtil : public CCUtil { CUtil(); ~CUtil(); void read_variables(const char *filename, bool global_only); - public: - TBLOB *pageblobs; /*first blob on page */ - TEXTBLOCK *pageblocks; /*first block on page */ - - int resolution; /*scanner res in dpi */ - int acts[MAXPROC]; /*action flags */ - int debugs[MAXPROC]; /*debug flags */ - int plots[MAXPROC]; /*plot flags */ - - int corners[4]; /*corners of scan window */ - - char *debugfile; /* debug file name */ - - int plots_fx; - int plots_ocr; - - int debugs_fx; - int debugs_ocr; - - int acts_fx; - int acts_ocr; - - char *demodir; /*demo home directory */ }; } // namespace tesseract diff --git a/cutil/danerror.cpp b/cutil/danerror.cpp index 0730b37289..58f8882fee 100644 --- a/cutil/danerror.cpp +++ b/cutil/danerror.cpp @@ -18,55 +18,15 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "general.h" +#include "host.h" #include "danerror.h" -#include "callcpp.h" +#include "tprintf.h" #include "globaloc.h" #ifdef __UNIX__ #include "assert.h" #endif #include -#include - -#define MAXTRAPDEPTH 100 - -#define ERRORTRAPDEPTH 1000 - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -static jmp_buf ErrorTrapStack[MAXTRAPDEPTH]; -static VOID_PROC ProcTrapStack[MAXTRAPDEPTH]; -static inT32 CurrentTrapDepth = 0; - -/**---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -void ReleaseErrorTrap() { -/* - ** Parameters: - ** None - ** Globals: - ** CurrentTrapDepth number of traps on the stack - ** Operation: - ** This routine removes the current error trap from the - ** error trap stack, thus returning control to the previous - ** error trap. If the error trap stack is empty, nothing is - ** done. - ** Return: - ** None - ** Exceptions: - ** None - ** History: - ** 4/3/89, DSJ, Created. - */ - if (CurrentTrapDepth > 0) { - CurrentTrapDepth--; - } -} /* ReleaseErrorTrap */ - /*---------------------------------------------------------------------------*/ void DoError(int Error, const char *Message) { @@ -90,55 +50,8 @@ void DoError(int Error, const char *Message) { ** 4/3/89, DSJ, Created. */ if (Message != NULL) { - cprintf ("\nError: %s!\n", Message); + tprintf("\nError: %s!\n", Message); } - if (CurrentTrapDepth <= 0) { - cprintf ("\nFatal error: No error trap defined!\n"); - - /* SPC 20/4/94 - There used to be a call to abort() here. I've changed it to call into the - C++ error code to generate a meaningful status code - */ - signal_termination_handler(Error); - } - - if (ProcTrapStack[CurrentTrapDepth - 1] != DO_NOTHING) - (*ProcTrapStack[CurrentTrapDepth - 1]) (); - - longjmp (ErrorTrapStack[CurrentTrapDepth - 1], 1); + signal_termination_handler(Error); } /* DoError */ - - -/**---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -jmp_buf &PushErrorTrap(VOID_PROC Procedure) { -/* - ** Parameters: - ** Procedure trap procedure to execute - ** Globals: - ** ErrorTrapStack stack of error traps - ** CurrentTrapDepth number of traps on the stack - ** Operation: - ** This routine pushes a new error trap onto the top of - ** the error trap stack. This new error trap can then be - ** used in a call to setjmp. This trap is then in effect - ** until ReleaseErrorTrap is called. WARNING: a procedure - ** that calls PushErrorTrap should never exit before calling - ** ReleaseErrorTrap. - ** Return: - ** Pointer to a new error trap buffer - ** Exceptions: - ** Traps an error if the error trap stack is already full - ** History: - ** 3/17/89, DSJ, Created. - ** 9/12/90, DSJ, Added trap procedure parameter. - */ - if (CurrentTrapDepth >= MAXTRAPDEPTH) - DoError (ERRORTRAPDEPTH, "Error trap depth exceeded"); - ProcTrapStack[CurrentTrapDepth] = Procedure; - return ErrorTrapStack[CurrentTrapDepth++]; - -} /* PushErrorTrap */ diff --git a/cutil/danerror.h b/cutil/danerror.h index 23cd014d65..1e4a547b89 100644 --- a/cutil/danerror.h +++ b/cutil/danerror.h @@ -21,9 +21,6 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include - -#define SetErrorTrap(Proc) setjmp(PushErrorTrap(Proc)) #define NOERROR 0 #define DO_NOTHING 0 @@ -33,9 +30,6 @@ typedef void (*VOID_PROC) (); /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -void ReleaseErrorTrap(); - void DoError(int Error, const char *Message); -jmp_buf &PushErrorTrap(VOID_PROC Procedure); #endif diff --git a/cutil/freelist.cpp b/cutil/freelist.cpp index ff783abdd9..55eb5ee3b2 100644 --- a/cutil/freelist.cpp +++ b/cutil/freelist.cpp @@ -17,8 +17,6 @@ #include "memry.h" #include "tprintf.h" -static int mem_alloc_counter = 0; - /********************************************************************** * memalloc @@ -26,7 +24,6 @@ static int mem_alloc_counter = 0; * Memory allocator with protection. **********************************************************************/ int *memalloc(int size) { - mem_alloc_counter++; return ((int *) alloc_mem (size)); } @@ -56,11 +53,6 @@ int *memrealloc(void *ptr, int size, int oldsize) { void memfree(void *element) { if (element) { free_mem(element); - mem_alloc_counter--; - } - else { - tprintf ("%d MEM_ALLOC's used\n", mem_alloc_counter); - DoError (0, "Memfree of NULL pointer"); } } diff --git a/cutil/funcdefs.h b/cutil/funcdefs.h deleted file mode 100644 index 7964f75bc0..0000000000 --- a/cutil/funcdefs.h +++ /dev/null @@ -1,35 +0,0 @@ -/****************************************************************************** - ** Filename: funcdefs.h - ** Purpose: Definition of function types for passing as params. - ** Author: Dan Johnson - ** History: Fri Sep 14 10:04:47 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef FUNCDEFS_H -#define FUNCDEFS_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "general.h" - -typedef void (*VOID_FUNC) (); - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -#endif diff --git a/cutil/general.h b/cutil/general.h deleted file mode 100644 index 2c51d36a45..0000000000 --- a/cutil/general.h +++ /dev/null @@ -1,33 +0,0 @@ -/****************************************************************************** - ** Filename: General.h - ** Purpose: this is the system independent typedefs and defines - ** Author: Mike Niquette / Dan Johnson - ** History: Creation Date: 09/13/1988, MLN - ** Added UNIX: 11/10/88, DSJ - ** Changed name to General.h 11/24/88, DSJ - ** Added BOOL, CHAR, TRUE, FALSE, 11/24/88, DSJ - ** STATUS - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef GENERAL_H -#define GENERAL_H - -#include "host.h" - -typedef char CHAR; -typedef int STATUS; - -#ifndef NULL -#define NULL 0 -#endif -#endif diff --git a/cutil/globals.cpp b/cutil/globals.cpp deleted file mode 100644 index 6212fdfc8f..0000000000 --- a/cutil/globals.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* -################################################################################ -# -# File: globals.c -# Description: Global flag definitions -# Author: Mark Seaman, OCR Technology -# Created: Thu Oct 19 16:51:26 1989 -# Modified: Fri Jan 26 13:16:37 1990 (Mark Seaman) marks@hpgrlt -# Language: Text -# Package: N/A -# Status: Experimental (Do Not Distribute) -# -# (c) Copyright 1989, Hewlett-Packard Company. -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** http://www.apache.org/licenses/LICENSE-2.0 -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -# -################################################################################ -**************************************************************************/ -#include -#include "globals.h" - -/* This file contains the global declarations used by all demonstrator files*/ - -//IMAGE info; /*image info record*/ -//TBLOB *pageblobs; /*first blob on page */ -//TEXTBLOCK *pageblocks; /*first block on page */ -// char classes[CLASSIZE][CLASSLENGTH]; - /*class definitions */ - /*indices to to_classes */ - -//int resolution; /*scanner res in dpi */ -//int acts[MAXPROC]; /*action flags */ -//int debugs[MAXPROC]; /*debug flags */ -//int plots[MAXPROC]; /*plot flags */ - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -//int corners[4]; /*corners of scan window */ - -//char imagefile[FILENAMESIZE]; /*image file name */ -//char directory[FILENAMESIZE]; /* main directory */ -//char *debugfile; /* debug file name */ - -//int plots_fx; -//int plots_ocr; - -//int debugs_fx; -//int debugs_ocr; - -//int acts_fx; -//int acts_ocr; - -//char *demodir; /*demo home directory */ - -//int edgefd; /*edges window */ -//int debugfd; /*debug window fd */ -//FILE *debugfp; /*debug log file */ - -//UNICHARSET unicharset; - -//STRING language_data_path_prefix; diff --git a/cutil/globals.h b/cutil/globals.h index f30171b374..3907697e9e 100644 --- a/cutil/globals.h +++ b/cutil/globals.h @@ -25,46 +25,11 @@ #ifndef GLOBALS_H #define GLOBALS_H -#include "tessclas.h" #include "const.h" #include "unicharset.h" #include "strngs.h" #include -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -//extern IMAGE info; /*image info record*/ -//extern TBLOB *pageblobs; /*first blob on page */ -//extern TEXTBLOCK *pageblocks; /*first block on page */ -// /*class definitions */ -///* extern char classes[CLASSIZE][CLASSLENGTH]; */ -//extern int resolution; /*scanner res in dpi */ -//extern int acts[MAXPROC]; /*action flags */ -//extern int debugs[MAXPROC]; /*debug flags */ -//extern int plots[MAXPROC]; /*plot flags */ -//extern int corners[4]; /*corners of scan window */ -// /*image file name */ -//extern char imagefile[FILENAMESIZE]; -// /* main directory */ -//extern char directory[FILENAMESIZE]; -//extern char *debugfile; /* debug file name */ - -//extern int plots_fx; -//extern int plots_ocr; - -//extern int debugs_fx; -//extern int debugs_ocr; - -//extern int acts_fx; -//extern int acts_ocr; - -//extern char *demodir; -//extern FILE *debugfp; /*debug log file */ - -//extern UNICHARSET unicharset; /* The UNICHARSET variable that Tesseract uses internally */ - -//extern STRING language_data_path_prefix; #endif diff --git a/cutil/listio.cpp b/cutil/listio.cpp index 5c9894dffe..eb8c1ee823 100644 --- a/cutil/listio.cpp +++ b/cutil/listio.cpp @@ -49,9 +49,9 @@ LIST read_list(const char *filename) { char *chopAt250(); if ((infile = open_file (filename, "r")) == NULL) - return (NIL); + return (NIL_LIST); - list = NIL; + list = NIL_LIST; while (fgets (s, CHARS_PER_LINE, infile) != NULL) { s[CHARS_PER_LINE - 1] = '\0'; if (strlen (s) > 0) { diff --git a/cutil/oldheap.cpp b/cutil/oldheap.cpp index 708d425240..3117987785 100644 --- a/cutil/oldheap.cpp +++ b/cutil/oldheap.cpp @@ -108,7 +108,7 @@ int HeapPop(HEAP *Heap, FLOAT32 *Key, void *out_ptr) { } Heap->Entry[Hole].Key = HoleKey; Heap->Entry[Hole].Data = Heap->Entry[Heap->FirstFree].Data; - return (OK); + return (TESS_HEAP_OK); } /* HeapPop */ @@ -161,10 +161,18 @@ int HeapPopWorst(HEAP *Heap, FLOAT32 *Key, void *out_ptr) { Hole = Father; Father = FATHER (Hole); } - return (OK); + return (TESS_HEAP_OK); } /* HeapPop */ +// Pushes data onto the heap only if there is free space left. +// Returns true if data was added to the heap, false if the heap was full. +bool HeapPushCheckSize(HEAP *Heap, FLOAT32 Key, void *Data) { + if (Heap->FirstFree > Heap->Size) return false; + HeapPush(Heap, Key, Data); + return true; +} + /*---------------------------------------------------------------------------*/ /** * This routine stores Data into Heap and associates it @@ -296,7 +304,7 @@ int GetTopOfHeap(HEAP *Heap, HEAPENTRY *Entry) { } Heap->Entry[Hole].Key = HoleKey; Heap->Entry[Hole].Data = Heap->Entry[Heap->FirstFree].Data; - return (OK); + return (TESS_HEAP_OK); } /* GetTopOfHeap */ diff --git a/cutil/oldheap.h b/cutil/oldheap.h index 5ada445646..da4974f473 100644 --- a/cutil/oldheap.h +++ b/cutil/oldheap.h @@ -21,32 +21,24 @@ /*----------------------------------------------------------------------------- Include Files and Type Defines -----------------------------------------------------------------------------*/ -#include "general.h" +#include "host.h" #include "cutil.h" #define HEAPFULL 3000 -#define OK 0 #define EMPTY -1 +#define TESS_HEAP_OK 0 -typedef struct -{ +struct HEAPENTRY { FLOAT32 Key; void *Data; -} +}; - -HEAPENTRY; - -typedef struct -{ +struct HEAP { inT32 Size; inT32 FirstFree; HEAPENTRY Entry[1]; -} - - -HEAP; +}; /*----------------------------------------------------------------------------- Macros @@ -83,44 +75,6 @@ int GetTopOfHeap(HEAP *Heap, HEAPENTRY *Entry); void FreeHeapData(HEAP *Heap, void_dest destructor); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* heap.c -HEAP *MakeHeap - _ARGS((int Size)); - -int HeapPop - _ARGS((HEAP *Heap, - FLOAT32 *Key, - char **Data)); - -int HeapPopWorst - _ARGS((HEAP *Heap, - FLOAT32 *Key, - char **Data)); - -void HeapPush - _ARGS((HEAP *Heap, - FLOAT32 Key, - char *Data)); - -void HeapStore - _ARGS((HEAP *Heap, - HEAPENTRY *Entry)); - -int GetTopOfHeap - _ARGS((HEAP *Heap, - HEAPENTRY *Entry)); - -void FreeHeapData - _ARGS((HEAP *Heap, - void (*Deallocator )())); - -#undef _ARGS -*/ +bool HeapPushCheckSize(HEAP *Heap, FLOAT32 Key, void *Data); + #endif diff --git a/cutil/oldlist.cpp b/cutil/oldlist.cpp index a7356b520e..cf93ffb518 100644 --- a/cutil/oldlist.cpp +++ b/cutil/oldlist.cpp @@ -33,8 +33,8 @@ This file contains a set of general purpose list manipulation routines. These routines can be used in a wide variety of ways to provide several different popular data structures. A new list can be created by declaring - a variable of type 'LIST', and can be initialized with the value 'NIL'. - All of these routines check for the NIL condition before dereferencing + a variable of type 'LIST', and can be initialized with the value 'NIL_LIST'. + All of these routines check for the NIL_LIST condition before dereferencing pointers. NOTE: There is a users' manual available in printed form from Mark Seaman at (303) 350-4492 at Greeley Hard Copy. @@ -53,7 +53,7 @@ To implement LISP like functions use: first_node CAR x = (int) first_node (l); - rest CDR l = rest (l); + rest CDR l = list_rest (l); push CONS l = push (l, (LIST) this); last LAST x = last (l); concat APPEND l = concat (r, s); @@ -77,8 +77,8 @@ The following rules of closure exist for the functions provided. a = first_node (push (a, b)) - b = rest (push (a, b)) - a = push (pop (a), a)) For all a <> NIL + b = list_rest (push (a, b)) + a = push (pop (a), a)) For all a <> NIL_LIST a = reverse (reverse (a)) ******************************************************************************/ @@ -95,7 +95,7 @@ M a c r o s ----------------------------------------------------------------------*/ #define add_on(l,x) l = push (l,first_node (x)) -#define next_one(l) l = rest (l) +#define next_one(l) l = list_rest (l) /*---------------------------------------------------------------------- F u n c t i o n s @@ -123,25 +123,25 @@ int count(LIST var_list) { * NULL is supplied for is_equal, the is_key routine will be used. **********************************************************************/ LIST delete_d(LIST list, void *key, int_compare is_equal) { - LIST result = NIL; - LIST last_one = NIL; + LIST result = NIL_LIST; + LIST last_one = NIL_LIST; if (is_equal == NULL) is_equal = is_same; - while (list != NIL) { + while (list != NIL_LIST) { if (!(*is_equal) (first_node (list), key)) { - if (last_one == NIL) { + if (last_one == NIL_LIST) { last_one = list; - list = rest (list); + list = list_rest (list); result = last_one; - set_rest(last_one, NIL); + set_rest(last_one, NIL_LIST); } else { set_rest(last_one, list); last_one = list; - list = rest (list); - set_rest(last_one, NIL); + list = list_rest (list); + set_rest(last_one, NIL_LIST); } } else { @@ -152,23 +152,23 @@ LIST delete_d(LIST list, void *key, int_compare is_equal) { } LIST delete_d(LIST list, void *key, - ResultCallback2* is_equal) { - LIST result = NIL; - LIST last_one = NIL; + TessResultCallback2* is_equal) { + LIST result = NIL_LIST; + LIST last_one = NIL_LIST; - while (list != NIL) { + while (list != NIL_LIST) { if (!(*is_equal).Run (first_node (list), key)) { - if (last_one == NIL) { + if (last_one == NIL_LIST) { last_one = list; - list = rest (list); + list = list_rest (list); result = last_one; - set_rest(last_one, NIL); + set_rest(last_one, NIL_LIST); } else { set_rest(last_one, list); last_one = list; - list = rest (list); - set_rest(last_one, NIL); + list = list_rest (list); + set_rest(last_one, NIL_LIST); } } else { @@ -187,12 +187,12 @@ LIST delete_d(LIST list, void *key, LIST destroy(LIST list) { LIST next; - while (list != NIL) { - next = rest (list); + while (list != NIL_LIST) { + next = list_rest (list); free_cell(list); list = next; } - return (NIL); + return (NIL_LIST); } @@ -205,7 +205,7 @@ void destroy_nodes(LIST list, void_dest destructor) { if (destructor == NULL) destructor = memfree; - while (list != NIL) { + while (list != NIL_LIST) { (*destructor) (first_node (list)); list = pop (list); } @@ -221,12 +221,12 @@ void destroy_nodes(LIST list, void_dest destructor) { void insert(LIST list, void *node) { LIST element; - if (list != NIL) { - element = push (NIL, node); - set_rest (element, rest (list)); + if (list != NIL_LIST) { + element = push (NIL_LIST, node); + set_rest (element, list_rest (list)); set_rest(list, element); node = first_node (list); - list->node = first_node (rest (list)); + list->node = first_node (list_rest (list)); list->next->node = (LIST) node; } } @@ -262,7 +262,7 @@ int is_same(void *item1, void *item2) { * first list updated. **********************************************************************/ LIST join(LIST list1, LIST list2) { - if (list1 == NIL) + if (list1 == NIL_LIST) return (list2); set_rest (last (list1), list2); return (list1); @@ -275,8 +275,8 @@ LIST join(LIST list1, LIST list2) { * Return the last list item (this is list type). **********************************************************************/ LIST last(LIST var_list) { - while (rest (var_list) != NIL) - var_list = rest (var_list); + while (list_rest (var_list) != NIL_LIST) + var_list = list_rest (var_list); return (var_list); } @@ -305,9 +305,9 @@ void *nth_cell(LIST var_list, int item_num) { LIST pop(LIST list) { LIST temp; - temp = rest (list); + temp = list_rest (list); - if (list != NIL) { + if (list != NIL_LIST) { free_cell(list); } return (temp); @@ -338,13 +338,13 @@ LIST push(LIST list, void *element) { LIST push_last(LIST list, void *item) { LIST t; - if (list != NIL) { + if (list != NIL_LIST) { t = last (list); - t->next = push (NIL, item); + t->next = push (NIL_LIST, item); return (list); } else - return (push (NIL, item)); + return (push (NIL_LIST, item)); } @@ -355,7 +355,7 @@ LIST push_last(LIST list, void *item) { * destroyed. **********************************************************************/ LIST reverse(LIST list) { - LIST newlist = NIL; + LIST newlist = NIL_LIST; iterate (list) copy_first (list, newlist); return (newlist); @@ -405,7 +405,7 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare) { /********************************************************************** * s e a r c h * - * Search list, return NIL if not found. Return the list starting from + * Search list, return NIL_LIST if not found. Return the list starting from * the item if found. The compare routine "is_equal" is passed in as * the third paramter to this routine. If the value NULL is supplied * for is_equal, the is_key routine will be used. @@ -416,11 +416,11 @@ LIST search(LIST list, void *key, int_compare is_equal) { iterate (list) if ((*is_equal) (first_node (list), key)) return (list); - return (NIL); + return (NIL_LIST); } -LIST search(LIST list, void *key, ResultCallback2* is_equal) { +LIST search(LIST list, void *key, TessResultCallback2* is_equal) { iterate (list) if ((*is_equal).Run(first_node (list), key)) return (list); - return (NIL); + return (NIL_LIST); } diff --git a/cutil/oldlist.h b/cutil/oldlist.h index 5df97537a0..103dd72592 100644 --- a/cutil/oldlist.h +++ b/cutil/oldlist.h @@ -35,7 +35,7 @@ * BASICS: * ------- * first_node - Macro to return the first list node (not the cell). - * rest - Macro the return the second list cell + * list_rest - Macro the return the second list cell * pop - Destroy one list cell * push - Create one list cell and set the node and next fields * @@ -70,7 +70,7 @@ * join - Concatenates list 1 and list 2. * delete_d - Removes the requested elements from the list. * transform_d - Modifies the list by applying a function to each node. - * insert - Add a new element into this spot in a list. (not NIL) + * insert - Add a new element into this spot in a list. (not NIL_LIST) * push_last - Add a new element onto the end of a list. * reverse_d - Reverse a list and destroy the old one. * @@ -118,12 +118,12 @@ #define LIST_H #include "cutil.h" -#include "callback.h" +#include "tesscallback.h" /*---------------------------------------------------------------------- T y p e s ----------------------------------------------------------------------*/ -#define NIL (LIST) 0 +#define NIL_LIST (LIST) 0 struct list_rec { struct list_rec *node; @@ -135,8 +135,8 @@ typedef list_rec *LIST; M a c r o s ----------------------------------------------------------------------*/ /* Predefinitions */ -#define rest(l) ((l) ? (l)->next : NIL) -#define first_node(l) ((l) ? (l)->node : NIL) +#define list_rest(l) ((l) ? (l)->next : NIL_LIST) +#define first_node(l) ((l) ? (l)->node : NIL_LIST) /********************************************************************** * c o p y f i r s t @@ -153,11 +153,11 @@ typedef list_rec *LIST; * i t e r a t e * * Visit each node in the list. Replace the old list with the list - * minus the head. Continue until the list is NIL. + * minus the head. Continue until the list is NIL_LIST. **********************************************************************/ #define iterate(l) \ -for (; (l) != NIL; (l) = rest (l)) +for (; (l) != NIL_LIST; (l) = list_rest (l)) /********************************************************************** * i t e r a t e l i s t @@ -168,7 +168,7 @@ for (; (l) != NIL; (l) = rest (l)) **********************************************************************/ #define iterate_list(x,l) \ -for ((x)=(l); (x)!=0; (x)=rest(x)) +for ((x)=(l); (x)!=0; (x)=list_rest(x)) /********************************************************************** * j o i n o n @@ -205,11 +205,11 @@ for ((x)=(l); (x)!=0; (x)=rest(x)) * * Return the contents of the second list element. * - * #define second_node(l) first_node (rest (l)) + * #define second_node(l) first_node (list_rest (l)) **********************************************************************/ #define second_node(l) \ -first_node (rest (l)) +first_node (list_rest (l)) /********************************************************************** * s e t r e s t @@ -227,11 +227,11 @@ first_node (rest (l)) * * Return the contents of the third list element. * - * #define third(l) first_node (rest (rest (l))) + * #define third(l) first_node (list_rest (list_rest (l))) **********************************************************************/ #define third(l) \ -first_node (rest (rest (l))) +first_node (list_rest (list_rest (l))) /*---------------------------------------------------------------------- Public Funtion Prototypes @@ -241,7 +241,7 @@ int count(LIST var_list); LIST delete_d(LIST list, void *key, int_compare is_equal); LIST delete_d(LIST list, void *key, - ResultCallback2* is_equal); + TessResultCallback2* is_equal); LIST destroy(LIST list); @@ -273,7 +273,7 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare); LIST search(LIST list, void *key, int_compare is_equal); -LIST search(LIST list, void *key, ResultCallback2*); +LIST search(LIST list, void *key, TessResultCallback2*); /* #if defined(__STDC__) || defined(__cplusplus) diff --git a/cutil/structures.cpp b/cutil/structures.cpp index e4363bc69a..d11aed7e46 100644 --- a/cutil/structures.cpp +++ b/cutil/structures.cpp @@ -26,41 +26,11 @@ I n c l u d e s ----------------------------------------------------------------------*/ #include "structures.h" -#include "callcpp.h" #include -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -#define BLOBBLOCK 64 /*no allocated together */ -#define OUTLINEBLOCK 300 /*of each type */ -#define NODEBLOCK 36 /*blocks all about 1K bytes */ -#define EDGEPTBLOCK 50 -#define WERDBLOCK 42 -#define LISTBLOCK 300 - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -int structblockcount = 0; -void_void memory_print_functions[NUM_DATA_TYPES]; -int max_data_types = 0; /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -makestructure (newword, oldword, printword, TWERD, -freeword, WERDBLOCK, "TWERD", wordcount) -makestructure (newoutline, oldoutline, printol, TESSLINE, -freeoutline, OUTLINEBLOCK, "TESSLINE", outlinecount); - -makestructure (new_cell, free_cell, printcell, list_rec, -freelist, LISTBLOCK, "LIST", listcount); - -newstructure (newblob, TBLOB, freeblob, BLOBBLOCK, "newblob", blobcount); -oldstructure (oldblob, TBLOB, freeblob, "BLOB", blobcount); - -newstructure (newedgept, EDGEPT, freeedgept, EDGEPTBLOCK, "newedgept", -edgeptcount); -oldstructure (oldedgept, EDGEPT, freeedgept, "EDGEPT", edgeptcount); +makestructure(new_cell, free_cell, list_rec); diff --git a/cutil/structures.h b/cutil/structures.h index ef666fc46f..c7e0cde919 100644 --- a/cutil/structures.h +++ b/cutil/structures.h @@ -28,16 +28,10 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ -#include "tessclas.h" #include "oldlist.h" #include "freelist.h" #include "danerror.h" -#define NUM_DATA_TYPES 20 - -extern int max_data_types; -extern void_void memory_print_functions[NUM_DATA_TYPES]; - /*---------------------------------------------------------------------- M a c r o s ----------------------------------------------------------------------*/ @@ -49,7 +43,7 @@ extern void_void memory_print_functions[NUM_DATA_TYPES]; * for each new data type. **********************************************************************/ -#define makestructure(newfunc,old,print,type,nextfree,blocksize,typestring,usecount) \ +#define makestructure(newfunc, old, type) \ type *newfunc() \ { \ return new type; \ @@ -62,51 +56,9 @@ void old(type* deadelement) delete deadelement; \ } \ - -/********************************************************************** - * newstructure - * - * Allocate a chunk of memory for a particular data type. - **********************************************************************/ - -#define newstructure(name,type,nextfree,blocksize,errorstring,usecount)\ -type *name() /*returns a new type*/\ -{\ - return new type;\ -} - -/********************************************************************** - * oldstructure - * - * Returns a structure to the freelist - **********************************************************************/ - -#define oldstructure(name,type,nextfree,stringtype,usecount)\ -\ -type *name(type* deadelement)\ -{\ - type *returnelement; /*return next ptr*/\ -\ - returnelement=deadelement->next; /*return link*/\ - delete deadelement; \ - return returnelement;\ -} - /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -extern TBLOB *newblob(); -extern TBLOB *oldblob(TBLOB *); - -extern TESSLINE *newoutline(); -extern void oldoutline(TESSLINE *); - -extern EDGEPT *newedgept(); -extern EDGEPT *oldedgept(EDGEPT *); - -extern TWERD *newword(); -extern void oldword(TWERD *); - extern LIST new_cell(); extern void free_cell(LIST); #endif diff --git a/dict/Makefile.am b/dict/Makefile.am index 0ecb5c70ff..984ce60040 100644 --- a/dict/Makefile.am +++ b/dict/Makefile.am @@ -2,16 +2,13 @@ SUBDIRS = AM_CPPFLAGS = -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \ -I$(top_srcdir)/ccstruct -I$(top_srcdir)/viewer -I$(top_srcdir)/image -EXTRA_DIST = dict.vcproj - include_HEADERS = \ - choicearr.h choices.h context.h conversion.h \ dawg.h dict.h matchdefs.h \ - permdawg.h permngram.h permute.h states.h stopper.h trie.h + permngram.h permute.h states.h stopper.h trie.h lib_LTLIBRARIES = libtesseract_dict.la libtesseract_dict_la_SOURCES = \ - choices.cpp context.cpp conversion.cpp \ + context.cpp \ dawg.cpp dict.cpp hyphen.cpp \ permdawg.cpp permngram.cpp permute.cpp states.cpp stopper.cpp trie.cpp diff --git a/dict/Makefile.in b/dict/Makefile.in index 3b091e489c..0270af4f5a 100644 --- a/dict/Makefile.in +++ b/dict/Makefile.in @@ -71,9 +71,9 @@ am__base_list = \ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) libtesseract_dict_la_LIBADD = -am_libtesseract_dict_la_OBJECTS = choices.lo context.lo conversion.lo \ - dawg.lo dict.lo hyphen.lo permdawg.lo permngram.lo permute.lo \ - states.lo stopper.lo trie.lo +am_libtesseract_dict_la_OBJECTS = context.lo dawg.lo dict.lo hyphen.lo \ + permdawg.lo permngram.lo permute.lo states.lo stopper.lo \ + trie.lo libtesseract_dict_la_OBJECTS = $(am_libtesseract_dict_la_OBJECTS) libtesseract_dict_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ @@ -246,7 +246,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -266,15 +265,13 @@ SUBDIRS = AM_CPPFLAGS = -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \ -I$(top_srcdir)/ccstruct -I$(top_srcdir)/viewer -I$(top_srcdir)/image -EXTRA_DIST = dict.vcproj include_HEADERS = \ - choicearr.h choices.h context.h conversion.h \ dawg.h dict.h matchdefs.h \ - permdawg.h permngram.h permute.h states.h stopper.h trie.h + permngram.h permute.h states.h stopper.h trie.h lib_LTLIBRARIES = libtesseract_dict.la libtesseract_dict_la_SOURCES = \ - choices.cpp context.cpp conversion.cpp \ + context.cpp \ dawg.cpp dict.cpp hyphen.cpp \ permdawg.cpp permngram.cpp permute.cpp states.cpp stopper.cpp trie.cpp @@ -353,9 +350,7 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/choices.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/context.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conversion.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dawg.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dict.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hyphen.Plo@am__quote@ diff --git a/dict/choicearr.h b/dict/choicearr.h deleted file mode 100644 index c573b6345e..0000000000 --- a/dict/choicearr.h +++ /dev/null @@ -1,96 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: choicearr.h (Formerly choicearr.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Mar 19 15:27:49 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - */ - -#ifndef CHOICEARR_H -#define CHOICEARR_H - -/* ----------------------------------------------------------------------- - I n c l u d e s ----------------------------------------------------------------------- -*/ - -#include "tessarray.h" -#include "choices.h" - -/* ----------------------------------------------------------------------- - T y p e s ----------------------------------------------------------------------- -*/ - -typedef ARRAY CHOICES_LIST; - -#define CHOICES_PER_LIST 40 - -/* ----------------------------------------------------------------------- - M a c r o s ----------------------------------------------------------------------- -*/ - -/********************************************************************** - * free_choice_list - * - * Free a list of choices. Free the array structure but not each of the - * sublists of choices. - **********************************************************************/ - -#define free_choice_list(choice_list) \ -array_free (choice_list) - -/********************************************************************** - * for_each_choice - * - * Iterate through each of the possible choices. - **********************************************************************/ - -#define for_each_choice(array,index) \ -array_loop (array, index) - -/********************************************************************** - * free_all_choices - * - * Free an array of choices (deep free). - **********************************************************************/ - -#define free_all_choices(choices,index) \ -for_each_choice (choices, index) { \ - free_choices ((CHOICES) array_value (choices, index)); \ -} \ -array_free (choices) \ - - -/********************************************************************** - * new_choice_list - * - * Return a new array structure that is a list of choices. Each set of - * choices will be of type CHOICES. - **********************************************************************/ - -#define new_choice_list() \ -array_new (CHOICES_PER_LIST) -#endif diff --git a/dict/choices.cpp b/dict/choices.cpp deleted file mode 100644 index 9b206e13ea..0000000000 --- a/dict/choices.cpp +++ /dev/null @@ -1,269 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: choices.c (Formerly choices.c) - * Description: Handle the new ratings choices for Wise Owl - * Author: Mark Seaman, OCR Technology - * Created: Fri Sep 22 14:05:51 1989 - * Modified: Wed May 22 14:12:34 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#include "choices.h" -#include "emalloc.h" -#include "globals.h" -#include "host.h" -#include "danerror.h" -#include "structures.h" -#include "tordvars.h" -#include "tprintf.h" -#include "unicharset.h" -#include "dict.h" -#include "image.h" - -/*---------------------------------------------------------------------- - Variables -------------------------------------------------------------------------*/ -#define CHOICEBLOCK 100 /* Cells per block */ - -makestructure (newchoice, oldchoice, printchoice, A_CHOICE, -freechoice, CHOICEBLOCK, "A_CHOICE", choicecount) -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/** - * append_char_choice - * - * Create a new choice record. Store the string value in a safe place. - * Add the new choice record to the list. - * - * NB - This is only used by matchers, so permuter is always NO_PERM - * SPC 16/9/92 - */ -CHOICES append_char_choice(CHOICES ratings, - const char *string, - const char *lengths, - float rating, - float certainty, - inT8 config, - int script_id) { - A_CHOICE *this_choice; - - this_choice = new_choice (string, lengths, rating, certainty, - config, script_id, NO_PERM, false, NULL); - ratings = push_last (ratings, (LIST) this_choice); - return (ratings); -} - -/** - * copy_choices - * - * Copy a list of choices. This means that there will be two copies - * in memory. - */ -CHOICES copy_choices(CHOICES choices) { - CHOICES l; - CHOICES result = NIL; - - iterate_list(l, choices) { - A_CHOICE *choice = (A_CHOICE *)(first_node(l)); - result = push (result, - (LIST) new_choice (class_string(choice), - class_lengths(choice), - class_rating(choice), - class_certainty(choice), - class_config(choice), - class_script_id(choice), - class_permuter(choice), - class_fragment_mark(choice), - class_fragment_lengths(choice))); - } - return (reverse_d (result)); -} - -/** - * clone_choice - * - * Copy the contents of the given values to the corresponding values in - * a given choice replacing any previous values it might have had. - */ -void clone_choice(A_CHOICE *choice, const char *string, - const char *lengths, float rating, float certainty, - inT8 permuter, bool fragment_mark, - const char *fragment_lengths) { - if (choice->string) strfree (class_string (choice)); - if (choice->lengths) strfree (class_lengths (choice)); - if (choice->fragment_lengths) strfree(choice->fragment_lengths); - - choice->string = strsave (string); - choice->lengths = strsave (lengths); - choice->rating = rating; - choice->certainty = certainty; - choice->permuter = permuter; - choice->fragment_mark = fragment_mark; - choice->fragment_lengths = - fragment_lengths ? strsave(fragment_lengths) : NULL; -} - -/** - * clear_choice - * - * Set the fields in this choice to be defaulted bad initial values. - */ -void clear_choice(A_CHOICE *choice) { - choice->string = NULL; - choice->lengths = NULL; - choice->rating = MAX_FLOAT32; - choice->certainty = -MAX_FLOAT32; - choice->fragment_mark = false; - choice->fragment_lengths = NULL; -} - - -/** - * free_choice - * - * Free up the memory taken by one choice rating. - */ -void free_choice(void *arg) { //LIST choice) - A_CHOICE *this_choice; - LIST choice = (LIST) arg; - - this_choice = (A_CHOICE *) choice; - if (this_choice) { - if (this_choice->string) - strfree (this_choice->string); - if (this_choice->lengths) - strfree (this_choice->lengths); - if (this_choice->fragment_lengths) - strfree (this_choice->fragment_lengths); - oldchoice(this_choice); - } -} - -/** - * get_best_free_other - * - * Returns the best of two choices and frees the other (worse) choice. - * A choice is better if it has a non-NULL string and has a lower rating - * than the other choice. - */ -A_CHOICE *get_best_free_other(A_CHOICE *choice_1, A_CHOICE *choice_2) { - if (!choice_1) return choice_2; - if (!choice_2) return choice_1; - if (class_rating (choice_1) < class_rating (choice_2) || - class_string (choice_2) == NULL) { - free_choice(choice_2); - return choice_1; - } else { - free_choice(choice_1); - return choice_2; - } -} - -/** - * new_choice - * - * Create a new choice record. Store the string value in a safe place. - */ -A_CHOICE *new_choice(const char *string, - const char *lengths, - float rating, - float certainty, - inT8 config, - int script_id, - char permuter, - bool fragment_mark, - const char *fragment_lengths) { - A_CHOICE *this_choice; - - this_choice = newchoice(); - this_choice->string = strsave(string); - this_choice->lengths = strsave(lengths); - this_choice->rating = rating; - this_choice->certainty = certainty; - this_choice->config = config; - this_choice->permuter = permuter; - this_choice->script_id = script_id; - this_choice->fragment_mark = fragment_mark; - this_choice->fragment_lengths = - fragment_lengths ? strsave(fragment_lengths) : NULL; - - return (this_choice); -} - -A_CHOICE *new_choice(const char *string, - const char *lengths, - float rating, - float certainty, - inT8 config, - char permuter) { - return new_choice(string, lengths, rating, certainty, - config, -1, permuter, false, NULL); -} - - -/** - * print_choices - * - * Print the rating for a particular blob or word. - */ -namespace tesseract { -void Dict::print_choices(const char *label, - CHOICES choices) { // List of (A_CHOICE*). - tprintf("%s\n", label); - if (choices == NIL) - tprintf(" No rating "); - - iterate(choices) { - tprintf("%.2f %.2f", best_rating(choices), best_certainty(choices)); - print_word_string(best_string(choices)); - } - tprintf("\n"); -} - -/** - * print_word_choice - * - * Print the string in a human-readable format and ratings for a word. - */ -void Dict::print_word_choice(const char *label, A_CHOICE* choice) { - tprintf("%s : ", label); - if (choice == NULL) { - tprintf("No rating\n"); - } else { - tprintf("%.2f %.2f", class_rating(choice), class_certainty(choice)); - print_word_string(class_string(choice)); - tprintf("\n"); - } -} - -/** - * print_word_string - * - * Print the string in a human-readable format. - * The output is not newline terminated. - */ -void Dict::print_word_string(const char* str) { - int step = 1; - for (int i = 0; str[i] != '\0'; i += step) { - step = (getUnicharset().get_fragment(str) ? - strlen(str) : getUnicharset().step(str + i)); - int unichar_id = getUnicharset().unichar_to_id(str + i, step); - tprintf(" : %s ", getUnicharset().debug_str(unichar_id).string()); - } -} -} // namespace tesseract diff --git a/dict/choices.h b/dict/choices.h deleted file mode 100644 index b733fb379d..0000000000 --- a/dict/choices.h +++ /dev/null @@ -1,246 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: choices.h (Formerly choices.h) - * Description: Handle the new ratings choices for Wise Owl - * Author: Mark Seaman, OCR Technology - * Created: Fri Sep 22 14:05:51 1989 - * Modified: Fri Jan 4 12:04:01 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1989, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - * - * FUNCTIONS TO CALL - * ----------------- - * append_char_choice - Create a new choice for a character and add it to the list. - * class_rating - Return the rating of a given character class. - * class_string - Return the string corresponding to a character choice. - * free_choice - Free up the memory taken by one choice rating. - * new_choice - Create one choice record one set up the fields. - * - *********************************************************************************/ - -#ifndef CHOICES_H -#define CHOICES_H - -#include -#include - -#include "oldlist.h" -#include "unicharset.h" - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -typedef LIST CHOICES; /* CHOICES */ -//typedef float PROBABILITY; /* PROBABILITY */ -//typedef char PERM_TYPE; /* PERMUTER CODE */ - -typedef struct choicestruct -{ /* A_CHOICE */ - float rating; - float certainty; - char permuter; - inT8 config; - char *string; - char *lengths; //< length of each unichar in the string - int script_id; - char *fragment_lengths; //< length of fragments for each unichar in string - /** - * if true, indicates that this choice - * was chosen over a better one that - * contained a fragment - */ - bool fragment_mark; - -} A_CHOICE; - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/** - * best_string - * - * Return the string corresponding to the best choice. - */ -#define best_string(choices) \ -(first_node (choices) ? ((A_CHOICE*) (first_node (choices)))->string : NULL) - -/** - * best_lengths - * - * Return the lengths corresponding to the best choice. - */ -#define best_lengths(choices) \ -(first_node (choices) ? ((A_CHOICE*) (first_node (choices)))->lengths : NULL) - -/** - * best_rating - * - * Return the rating of the best choice. - */ -#define best_rating(choices) \ -(((A_CHOICE*) (first_node (choices)))->rating) - -/** - * best_certainty - * - * Return the certainty of the best choice. - */ -#define best_certainty(choices) \ -(((A_CHOICE*) (first_node (choices)))->certainty) - -/** - * class_rating - * - * Return the rating of a given character class. - */ -#define class_rating(choice) \ -(((A_CHOICE*) (choice))->rating) - -/** - * class_certainty - * - * Return the certainty of a given character class. - */ -#define class_certainty(choice) \ -(((A_CHOICE*) (choice))->certainty) - -/** - * class_string - * - * Return the string of a given character class. - */ -#define class_string(choice) \ -(((A_CHOICE*) (choice))->string) - -/** - * class_lengths - * - * Return the lengths of a given character class. - */ -#define class_lengths(choice) \ -(((A_CHOICE*) (choice))->lengths) - -/** - * class_permuter - * - * Return the permuter of a given character class. - */ -#define class_permuter(choice) \ -(((A_CHOICE*) (choice))->permuter) - -/** - * class_config - * - * Return the config of a given character class. - */ -#define class_config(choice) \ -(((A_CHOICE*) (choice))->config) - -/** - * class_script - * - * Return the script of a given character class. - */ -#define class_script_id(choice) \ -(((A_CHOICE*) (choice))->script_id) - -/** - * free_choices - * - * Free a list of choices. - */ -#define free_choices(c) \ -destroy_nodes ((c), free_choice) - -/** - * print_bold - * - * Print a string in bold type by using escape sequences. This only - * works for certain output devices. - */ -#define print_bold(string) \ -cprintf ("\033&dB%s\033&d@", string) - - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ - -/// Returns true if fragment_mark is set for the given choice. -inline bool class_fragment_mark(A_CHOICE *choice) { - return choice->fragment_mark; -} - -/// Sets fragment_mark of choice to the given value. -inline void set_class_fragment_mark(A_CHOICE *choice, bool mark) { - choice->fragment_mark = mark; -} - -/// Returns fragment_lengths of the given class. -inline const char *class_fragment_lengths(A_CHOICE *choice) { - return choice->fragment_lengths; -} - -CHOICES append_char_choice(CHOICES ratings, - const char *string, - const char *lengths, - float rating, - float certainty, - inT8 config, - int script_id); - -CHOICES copy_choices(CHOICES choices); - -/// Copy the given values into corresponding fields of choice. -void clone_choice(A_CHOICE *choice, const char *string, - const char *lengths, float rating, float certainty, - inT8 permuter, bool fragment_mark, - const char *fragment_lengths); - -/// Copy the contents of choice_1 into choice_2. -inline void clone_choice(A_CHOICE *choice_2, A_CHOICE *choice_1) { - clone_choice(choice_2, class_string(choice_1), class_lengths(choice_1), - class_rating(choice_1), class_certainty(choice_1), - class_permuter(choice_1), class_fragment_mark(choice_1), - class_fragment_lengths(choice_1)); -} - -void clear_choice(A_CHOICE *choice); - -void free_choice(void *arg); - -A_CHOICE *get_best_free_other(A_CHOICE *choice_1, A_CHOICE *choice_2); - -A_CHOICE *new_choice(const char *string, - const char *lengths, - float rating, - float certainty, - inT8 config, - int script_id, - char permuter, - bool fragment_mark, - const char *fragment_lengths); - -A_CHOICE *new_choice(const char *string, - const char *lengths, - float rating, - float certainty, - inT8 config, - char permuter); - -#endif diff --git a/dict/context.cpp b/dict/context.cpp index bef4c8c8c1..dd36cb88e1 100644 --- a/dict/context.cpp +++ b/dict/context.cpp @@ -22,68 +22,20 @@ ** limitations under the License. * *********************************************************************************/ -#include "context.h" -#include "callcpp.h" -#include "ccutil.h" #include "dict.h" -#include "globals.h" -#include "image.h" -#include "ratngs.h" -#include "tordvars.h" +#include "tprintf.h" #include "unicharset.h" -#include -#include -#include -#include - -// Initialize probability_in_context to point to a default implementation (a -// main program can override this). -PROBABILITY_IN_CONTEXT_FUNCTION probability_in_context = &def_probability_in_context; - -double def_probability_in_context(const char* context, - int context_bytes, - const char* character, - int character_bytes) { - (void) context; - (void) context_bytes; - (void) character; - (void) character_bytes; - return 0.0; -} - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -static FILE *choice_file = NULL; /* File to save choices */ - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/********************************************************************** - * close_choices - * - * Close the choices file. - **********************************************************************/ -void close_choices() { - if (choice_file) - fclose(choice_file); -} - namespace tesseract { -/********************************************************************** - * case_ok - * - * Check a string to see if it matches a set of lexical rules. - **********************************************************************/ -int Context::case_ok(const WERD_CHOICE &word, - const UNICHARSET &unicharset) { - static int case_state_table[6][4] = { { - /* 0. Begining of word */ - /* P U L D */ - /* -1. Error on case */ +static const int kMinAbsoluteGarbageWordLength = 10; +static const float kMinAbsoluteGarbageAlphanumFrac = 0.5f; + +const int case_state_table[6][4] = { { + /* 0. Begining of word */ + /* P U L D */ + /* -1. Error on case */ 0, 1, 5, 4 }, { /* 1. After initial capital */ @@ -103,10 +55,10 @@ int Context::case_ok(const WERD_CHOICE &word, }, }; - register int last_state = 0; - register int state = 0; - register int x; - +int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) { + int last_state = 0; + int state = 0; + int x; for (x = 0; x < word.length(); ++x) { UNICHAR_ID ch_id = word.unichar_id(x); if (unicharset.get_isupper(ch_id)) @@ -117,37 +69,22 @@ int Context::case_ok(const WERD_CHOICE &word, state = case_state_table[state][3]; else state = case_state_table[state][0]; - - if (tord_debug_3) - tprintf("Case state = %d, char = %s\n", state, - unicharset.id_to_unichar(ch_id)); - if (state == -1) { - /* Handle ACCRONYMs */ -#if 0 - if (word[x] == 's' && - !isalpha (word[x + 1]) && !isdigit (word[x + 1])) - state = last_state; - else -#endif - return (FALSE); - } - + if (state == -1) return false; last_state = state; } - return state != 5; /*single lower is bad */ + return state != 5; // single lower is bad } -} // namespace tesseract - -/********************************************************************** - * write_choice_line - * - * Write a blank line to the choices file. This will indicate that - * there is a new word that is following. - **********************************************************************/ -void write_choice_line() { - if (choice_file) { - fprintf (choice_file, "\n"); - fflush(choice_file); +bool Dict::absolute_garbage(const WERD_CHOICE &word, + const UNICHARSET &unicharset) { + if (word.length() < kMinAbsoluteGarbageWordLength) return false; + int num_alphanum = 0; + for (int x = 0; x < word.length(); ++x) { + num_alphanum += (unicharset.get_isalpha(word.unichar_id(x)) || + unicharset.get_isdigit(word.unichar_id(x))); } + return (static_cast(num_alphanum) / + static_cast(word.length()) < kMinAbsoluteGarbageAlphanumFrac); } + +} // namespace tesseract diff --git a/dict/context.h b/dict/context.h deleted file mode 100644 index 0bd1df34eb..0000000000 --- a/dict/context.h +++ /dev/null @@ -1,85 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: context.h (Formerly context.h) - * Description: Context checking functions - * Author: Mark Seaman, OCR Technology - * Created: Thu Feb 15 11:18:24 1990 - * Modified: Tue Jul 9 17:00:38 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - */ - -#ifndef CONTEXT_H -#define CONTEXT_H - -#include "choices.h" -#include "ratngs.h" - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -namespace tesseract { - -class Context { - public: - static int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset); -}; -} // namespace tesseract - -void close_choices(); - -void write_choice_line(); - -typedef double (*PROBABILITY_IN_CONTEXT_FUNCTION)(const char* context, - int context_bytes, - const char* character, - int character_bytes); - -extern PROBABILITY_IN_CONTEXT_FUNCTION probability_in_context; - -extern double def_probability_in_context(const char* context, - int context_bytes, - const char* character, - int character_bytes); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* context.c -void close_choices - _ARGS((void)); - -void fix_quotes - _ARGS((char *str)); - -int punctuation_ok - _ARGS((char *word)); - -int case_ok - _ARGS((char *word)); - -void write_choice_line - _ARGS((void)); - -#undef _ARGS -*/ -#endif diff --git a/dict/conversion.cpp b/dict/conversion.cpp deleted file mode 100644 index d3dca4178d..0000000000 --- a/dict/conversion.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: conversion.cpp -// Description: Collection of utility functions for A_CHOICE conversions. -// TODO(daria): delete this file when conversion to unichar_ids -// is finished and all permuters are completely updated/replaced. -// Author: Daria Antonova -// Created: Mon Jun 23 11:26:43 PDT 2008 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifdef _MSC_VER -#pragma warning(disable:4244) // Conversion warnings -#pragma warning(disable:4018) // signed/unsigned warnings -#endif - -#include "conversion.h" - -#include "callcpp.h" -#include "choicearr.h" -#include "choices.h" -#include "dict.h" -#include "ratngs.h" -#include "stopper.h" -#include "unicharset.h" - -namespace tesseract { -int Dict::valid_word(const char *string) { - WERD_CHOICE word(string, getUnicharset()); - return valid_word(word); -} - -void Dict::LogNewWordChoice(A_CHOICE *a_choice, - FLOAT32 adjust_factor, - const float certainties[], - const UNICHARSET &unicharset) { - WERD_CHOICE word_choice(strlen(a_choice->lengths)); - convert_to_word_choice(a_choice, unicharset, &word_choice); - LogNewChoice(word_choice, adjust_factor, certainties, false); -} -} // namespace tesseract - -// Fills in the given WERD_CHOICE with information from the given A_CHOICE. -// Assumes that word_choice pointer is not NULL. -void convert_to_word_choice(const A_CHOICE *a_choice, - const UNICHARSET ¤t_unicharset, - WERD_CHOICE *word_choice) { - if (a_choice == NULL) return; - const char *string = a_choice->string; - const char *lengths = a_choice->lengths; - const char *fragment_lengths = a_choice->fragment_lengths; - int offset = 0; - for (int x = 0; x < strlen(a_choice->lengths); ++x) { - UNICHAR_ID unichar_id = - current_unicharset.unichar_to_id(string + offset, lengths[x]); - word_choice->append_unichar_id(unichar_id, fragment_lengths[x], 0.0, 0.0); - offset += lengths[x]; - } - word_choice->set_rating(a_choice->rating); - word_choice->set_certainty(a_choice->certainty); - word_choice->set_permuter(a_choice->permuter); - word_choice->set_fragment_mark(a_choice->fragment_mark); -} - -// Returns the best of two choices and deletes the other (worse) choice. -// A choice is better if it has a non-empty string and has a lower -// rating than the other choice. If the ratings are the same, -// a_choice is preferred over choice. -// If the best choice is in the A_CHOICE form, copies it to a new -// WERD_CHOICE and deletes A_CHOICE. -WERD_CHOICE *get_best_delete_other(const UNICHARSET ¤t_unicharset, - WERD_CHOICE *choice, - A_CHOICE *a_choice) { - if (!a_choice) return choice; - if (choice != NULL && - (choice->rating() < a_choice->rating || a_choice->string == NULL)) { - free_choice(a_choice); - return choice; - } else { - delete choice; - WERD_CHOICE *word_choice = new WERD_CHOICE(); - convert_to_word_choice(a_choice, current_unicharset, word_choice); - free_choice(a_choice); - return word_choice; - } -} - -// Convert BLOB_CHOICE_LIST_VECTOR to CHOICES_LIST. -// The caller is responsible for deleting the returned CHOICES_LIST. -CHOICES_LIST convert_to_choices_list( - const BLOB_CHOICE_LIST_VECTOR &char_choices, - const UNICHARSET ¤t_unicharset) { - CHOICES_LIST old_char_choices = new_choice_list(); - int x; - BLOB_CHOICE_IT it; - BLOB_CHOICE *blob_choice; - char choice_lengths[2] = {0, 0}; - char unichar[UNICHAR_LEN + 1]; - for (x = 0; x < char_choices.length(); ++x) { - it.set_to_list(char_choices.get(x)); - LIST result = NIL; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - blob_choice = it.data(); - strcpy(unichar, - current_unicharset.id_to_unichar(blob_choice->unichar_id())); - choice_lengths[0] = strlen(unichar); - result = append_char_choice(result, unichar, choice_lengths, - blob_choice->rating(), - blob_choice->certainty(), - blob_choice->config(), NULL); - } - old_char_choices = array_push(old_char_choices, result); - } - return old_char_choices; -} diff --git a/dict/conversion.h b/dict/conversion.h deleted file mode 100644 index ca14af3b3d..0000000000 --- a/dict/conversion.h +++ /dev/null @@ -1,43 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: conversion.h -// Description: Collection of utility functions for A_CHOICE conversions. -// TODO(daria): delete this file when conversion to unichar_ids -// is finished and all permuters are completely updated/replaced. -// Author: Daria Antonova -// Created: Mon Jun 23 11:26:43 PDT 2008 -// -// (C) Copyright 2007, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_DICT_CONVERSION_H_ -#define TESSERACT_DICT_CONVERSION_H_ - -#include "choicearr.h" -#include "choices.h" -#include "ratngs.h" -#include "unicharset.h" - -void convert_to_word_choice(const A_CHOICE *a_choice, - const UNICHARSET ¤t_unicharset, - WERD_CHOICE *word_choice); - -WERD_CHOICE *get_best_delete_other( - const UNICHARSET ¤t_unicharset, - WERD_CHOICE *choice, - A_CHOICE *a_choice); - -CHOICES_LIST convert_to_choices_list( - const BLOB_CHOICE_LIST_VECTOR &char_choices, - const UNICHARSET ¤t_unicharset); - -#endif // TESSERACT_DICT_CONVERSION_H_ diff --git a/dict/dawg.cpp b/dict/dawg.cpp index efa352e94c..a4f3dc5849 100644 --- a/dict/dawg.cpp +++ b/dict/dawg.cpp @@ -32,7 +32,6 @@ #endif #include "dawg.h" -#include "context.h" #include "cutil.h" #include "dict.h" #include "emalloc.h" @@ -41,12 +40,6 @@ #include "strngs.h" #include "tprintf.h" -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -INT_VAR(dawg_debug_level, 0, "Set to 1 for general debug info" - ", to 2 for more details, to 3 to see all the debug messages"); - /*---------------------------------------------------------------------- F u n c t i o n s f o r D a w g ----------------------------------------------------------------------*/ @@ -57,7 +50,7 @@ bool Dawg::word_in_dawg(const WERD_CHOICE &word) const { NODE_REF node = 0; int end_index = word.length() - 1; for (int i = 0; i <= end_index; i++) { - if (dawg_debug_level > 1) { + if (debug_level_ > 1) { tprintf("word_in_dawg: exploring node " REFFORMAT ":\n", node); print_node(node, MAX_NODE_EDGES_DISPLAY); tprintf("\n"); @@ -101,7 +94,7 @@ int Dawg::check_for_words(const char *filename, } fclose (word_file); // Make sure the user sees this with fprintf instead of tprintf. - if (dawg_debug_level) tprintf("Number of lost words=%d\n", misses); + if (debug_level_) tprintf("Number of lost words=%d\n", misses); return misses; } @@ -127,7 +120,7 @@ bool Dawg::match_words(WERD_CHOICE *word, inT32 index, if (edge != NO_EDGE) { // normal edge in DAWG node = next_node(edge); if (word_end) { - if (dawg_debug_level > 1) word->print("match_words() found: "); + if (debug_level_ > 1) word->print("match_words() found: "); return true; } else if (node != 0) { return match_words(word, index+1, node, wildcard); @@ -138,7 +131,7 @@ bool Dawg::match_words(WERD_CHOICE *word, inT32 index, } void Dawg::init(DawgType type, const STRING &lang, - PermuterType perm, int unicharset_size) { + PermuterType perm, int unicharset_size, int debug_level) { type_ = type; lang_ = lang; perm_ = perm; @@ -150,6 +143,8 @@ void Dawg::init(DawgType type, const STRING &lang, letter_mask_ = ~(~0 << flag_start_bit_); next_node_mask_ = ~0 << (flag_start_bit_ + NUM_FLAG_BITS); flags_mask_ = ~(letter_mask_ | next_node_mask_); + + debug_level_ = debug_level; } @@ -275,9 +270,12 @@ void SquishedDawg::print_edge(EDGE_REF edge) const { } } -void SquishedDawg::read_squished_dawg(FILE *file, DawgType type, - const STRING &lang, PermuterType perm) { - if (dawg_debug_level) tprintf("Reading squished dawg\n"); +void SquishedDawg::read_squished_dawg(FILE *file, + DawgType type, + const STRING &lang, + PermuterType perm, + int debug_level) { + if (debug_level) tprintf("Reading squished dawg\n"); // Read the magic number and if it does not match kDawgMagicNumber // set swap to true to indicate that we need to switch endianness. @@ -293,7 +291,7 @@ void SquishedDawg::read_squished_dawg(FILE *file, DawgType type, unicharset_size = reverse32(unicharset_size); num_edges_ = reverse32(num_edges_); } - Dawg::init(type, lang, perm, unicharset_size); + Dawg::init(type, lang, perm, unicharset_size, debug_level); edges_ = (EDGE_ARRAY) memalloc(sizeof(EDGE_RECORD) * num_edges_); fread(&edges_[0], sizeof(EDGE_RECORD), num_edges_, file); @@ -303,7 +301,7 @@ void SquishedDawg::read_squished_dawg(FILE *file, DawgType type, edges_[edge] = reverse64(edges_[edge]); } } - if (dawg_debug_level > 2) { + if (debug_level > 2) { tprintf("type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n", type_, lang_.string(), perm_, unicharset_size_, num_edges_); for (edge = 0; edge < num_edges_; ++edge) @@ -340,8 +338,7 @@ NODE_MAP SquishedDawg::build_node_map(inT32 *num_nodes) const { return (node_map); } -void SquishedDawg::write_squished_dawg(const char *filename) { - FILE *file; +void SquishedDawg::write_squished_dawg(FILE *file) { EDGE_REF edge; inT32 num_edges; inT32 node_count = 0; @@ -349,16 +346,10 @@ void SquishedDawg::write_squished_dawg(const char *filename) { EDGE_REF old_index; EDGE_RECORD temp_record; - if (dawg_debug_level) tprintf("write_squished_dawg\n"); + if (debug_level_) tprintf("write_squished_dawg\n"); node_map = build_node_map(&node_count); -#ifdef WIN32 - file = open_file(filename, "wb"); -#else - file = open_file(filename, "w"); -#endif - // Write the magic number to help detecting a change in endianness. inT16 magic = kDawgMagicNumber; fwrite(&magic, sizeof(inT16), 1, file); @@ -372,7 +363,7 @@ void SquishedDawg::write_squished_dawg(const char *filename) { fwrite(&num_edges, sizeof(inT32), 1, file); // write edge count to file - if (dawg_debug_level) { + if (debug_level_) { tprintf("%d nodes in DAWG\n", node_count); tprintf("%d edges in DAWG\n", num_edges); } @@ -394,7 +385,6 @@ void SquishedDawg::write_squished_dawg(const char *filename) { } } free(node_map); - fclose(file); } } // namespace tesseract diff --git a/dict/dawg.h b/dict/dawg.h index f711021f52..e70f0fdb19 100644 --- a/dict/dawg.h +++ b/dict/dawg.h @@ -32,16 +32,8 @@ ----------------------------------------------------------------------*/ #include "elst.h" -#include "general.h" #include "ratngs.h" -#include "varable.h" - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ - -extern INT_VAR_H(dawg_debug_level, 0, "Set to 1 for general debug info, to" - " 2 for more details, to 3 to see all the debug messages"); +#include "params.h" #ifdef __MSW32__ #define NO_EDGE (inT64) 0xffffffffffffffffi64 @@ -75,11 +67,9 @@ typedef GenericVector SuccessorListsVector; enum DawgType { DAWG_TYPE_PUNCTUATION, - DAWG_TYPE_PREFIX, - DAWG_TYPE_ROOT, DAWG_TYPE_WORD, - DAWG_TYPE_SUFFIX, DAWG_TYPE_NUMBER, + DAWG_TYPE_PATTERN, DAWG_TYPE_COUNT // number of enum entries }; @@ -87,10 +77,11 @@ enum DawgType { /*---------------------------------------------------------------------- C o n s t a n t s ----------------------------------------------------------------------*/ + #define FORWARD_EDGE (inT32) 0 #define BACKWARD_EDGE (inT32) 1 #define MAX_NODE_EDGES_DISPLAY (inT64) 100 -#define LAST_FLAG (inT64) 1 +#define MARKER_FLAG (inT64) 1 #define DIRECTION_FLAG (inT64) 2 #define WERD_END_FLAG (inT64) 4 #define LETTER_START_BIT 0 @@ -99,15 +90,13 @@ enum DawgType { // Set kBeginningDawgsType[i] to true if a Dawg of // DawgType i can contain the beginning of a word. -static const bool kBeginningDawgsType[] = {1, 1, 0, 1, 0, 1 }; +static const bool kBeginningDawgsType[] = { 1, 1, 1, 1 }; static const bool kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT] = { - { 0, 1, 0, 1, 0, 0 }, // for DAWG_TYPE_PUNCTUATION - { 0, 0, 1, 1, 0, 0 }, // for DAWG_TYPE_PREFIX - { 0, 0, 0, 0, 1, 0 }, // for DAWG_TYPE_ROOT - { 1, 0, 0, 0, 0, 0 }, // for DAWG_TYPE_WORD - { 1, 0, 0, 0, 0, 0 }, // for DAWG_TYPE_SUFFIX - { 0, 0, 0, 0, 0, 0 } // for DAWG_TYPE_NUMBER + { 0, 1, 1, 0 }, // for DAWG_TYPE_PUNCTUATION + { 1, 0, 0, 0 }, // for DAWG_TYPE_WORD + { 1, 0, 0, 0 }, // for DAWG_TYPE_NUMBER + { 0, 0, 0, 0 }, // for DAWG_TYPE_PATTERN }; static const char kWildcard[] = "*"; @@ -176,6 +165,20 @@ class Dawg { /// At most max_num_edges will be printed. virtual void print_node(NODE_REF node, int max_num_edges) const = 0; + /// Fills vec with unichar ids that represent the character classes + /// of the given unichar_id. + virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, + const UNICHARSET &unicharset, + GenericVector *vec) const {}; + + /// Returns the given EDGE_REF if the EDGE_RECORD that it points to has + /// a self loop and the given unichar_id matches the unichar_id stored in the + /// EDGE_RECORD, returns NO_EDGE otherwise. + virtual EDGE_REF pattern_loop_edge( + EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const { + return false; + } + protected: Dawg() {} @@ -183,6 +186,10 @@ class Dawg { inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const { return ((edge_rec & next_node_mask_) >> next_node_start_bit_); } + /// Returns the marker flag of this edge. + inline bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const { + return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0; + } /// Returns the direction flag of this edge. inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const { return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? @@ -204,8 +211,8 @@ class Dawg { *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_); } /// Sets this edge record to be the last one in a sequence of edges. - inline void set_last_flag_in_edge_rec(EDGE_RECORD *edge_rec) { - *edge_rec |= (LAST_FLAG << flag_start_bit_); + inline void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec) { + *edge_rec |= (MARKER_FLAG << flag_start_bit_); } /// Sequentially compares the given values of unichar ID, next node /// and word end marker with the values in the given EDGE_RECORD. @@ -249,7 +256,7 @@ class Dawg { /// Sets type_, lang_, perm_, unicharset_size_. /// Initializes the values of various masks from unicharset_size_. void init(DawgType type, const STRING &lang, - PermuterType perm, int unicharset_size); + PermuterType perm, int unicharset_size, int debug_level); /// Matches all of the words that are represented by this string. /// If wilcard is set to something other than INVALID_UNICHAR_ID, @@ -274,6 +281,8 @@ class Dawg { uinT64 next_node_mask_; uinT64 flags_mask_; uinT64 letter_mask_; + // Level of debug statements to print to stdout. + int debug_level_; }; // @@ -284,8 +293,7 @@ struct DawgInfo { DawgInfo() : dawg_index(-1), ref(NO_EDGE) {} DawgInfo(int i, EDGE_REF r) : dawg_index(i), ref(r) {} bool operator==(const DawgInfo &other) { - return (this->dawg_index == other.dawg_index && - this->ref == other.ref); + return (this->dawg_index == other.dawg_index && this->ref == other.ref); } int dawg_index; EDGE_REF ref; @@ -306,12 +314,13 @@ class DawgInfoVector : public GenericVector { /// Adds an entry for the given dawg_index with the given node to the vec. /// Returns false if the same entry already exists in the vector, /// true otherwise. - inline bool add_unique(const DawgInfo &new_info, const char *debug_msg) { + inline bool add_unique(const DawgInfo &new_info, bool debug, + const char *debug_msg) { for (int i = 0; i < size_used_; ++i) { if (data_[i] == new_info) return false; } push_back(new_info); - if (dawg_debug_level) { + if (debug) { tprintf("%s[%d, " REFFORMAT "]\n", debug_msg, new_info.dawg_index, new_info.ref); } @@ -344,28 +353,29 @@ class DawgInfoVector : public GenericVector { // class SquishedDawg : public Dawg { public: - SquishedDawg(FILE *file, DawgType type, - const STRING &lang, PermuterType perm) { - read_squished_dawg(file, type, lang, perm); + SquishedDawg(FILE *file, DawgType type, const STRING &lang, + PermuterType perm, int debug_level) { + read_squished_dawg(file, type, lang, perm, debug_level); num_forward_edges_in_node0 = num_forward_edges(0); } SquishedDawg(const char* filename, DawgType type, - const STRING &lang, PermuterType perm) { + const STRING &lang, PermuterType perm, int debug_level) { FILE *file = fopen(filename, "rb"); if (file == NULL) { tprintf("Failed to open dawg file %s\n", filename); exit(1); } - read_squished_dawg(file, type, lang, perm); + read_squished_dawg(file, type, lang, perm, debug_level); num_forward_edges_in_node0 = num_forward_edges(0); fclose(file); } SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, - const STRING &lang, PermuterType perm, int unicharset_size) : + const STRING &lang, PermuterType perm, + int unicharset_size, int debug_level) : edges_(edges), num_edges_(num_edges) { - init(type, lang, perm, unicharset_size); + init(type, lang, perm, unicharset_size, debug_level); num_forward_edges_in_node0 = num_forward_edges(0); - if (dawg_debug_level > 3) print_all("SquishedDawg:"); + if (debug_level > 3) print_all("SquishedDawg:"); } ~SquishedDawg(); @@ -406,7 +416,19 @@ class SquishedDawg : public Dawg { void print_node(NODE_REF node, int max_num_edges) const; /// Writes the squished/reduced Dawg to a file. - void write_squished_dawg(const char *filename); + void write_squished_dawg(FILE *file); + + /// Opens the file with the given filename and writes the + /// squished/reduced Dawg to the file. + void write_squished_dawg(const char *filename) { + FILE *file = fopen(filename, "wb"); + if (file == NULL) { + tprintf("Error opening %s\n", filename); + exit(1); + } + this->write_squished_dawg(file); + fclose(file); + } private: /// Sets the next node link for this edge. @@ -422,8 +444,8 @@ class SquishedDawg : public Dawg { for (int edge = 0; edge < num_edges_; edge++) set_empty_edge(edge); } /// Clears the last flag of this edge. - inline void clear_last_flag(EDGE_REF edge_ref) { - (edges_[edge_ref] &= ~(LAST_FLAG << flag_start_bit_)); + inline void clear_marker_flag(EDGE_REF edge_ref) { + (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_)); } /// Returns true if this edge is in the forward direction. inline bool forward_edge(EDGE_REF edge_ref) const { @@ -441,15 +463,15 @@ class SquishedDawg : public Dawg { } /// Returns true if this edge is the last edge in a sequence. inline bool last_edge(EDGE_REF edge_ref) const { - return (edges_[edge_ref] & (LAST_FLAG << flag_start_bit_)) != 0; + return (edges_[edge_ref] & (MARKER_FLAG << flag_start_bit_)) != 0; } /// Counts and returns the number of forward edges in this node. inT32 num_forward_edges(NODE_REF node) const; /// Reads SquishedDawg from a file. - void read_squished_dawg(FILE *file, DawgType type, - const STRING &lang, PermuterType perm); + void read_squished_dawg(FILE *file, DawgType type, const STRING &lang, + PermuterType perm, int debug_level); /// Prints the contents of an edge indicated by the given EDGE_REF. void print_edge(EDGE_REF edge) const; @@ -469,6 +491,7 @@ class SquishedDawg : public Dawg { int num_edges_; int num_forward_edges_in_node0; }; + } // namespace tesseract #endif diff --git a/dict/dict.cpp b/dict/dict.cpp index 2d878f3861..2f930565ea 100644 --- a/dict/dict.cpp +++ b/dict/dict.cpp @@ -21,6 +21,7 @@ #ifdef _MSC_VER #pragma warning(disable:4244) // Conversion warnings #endif +#include "tprintf.h" namespace tesseract { @@ -28,14 +29,162 @@ class Image; Dict::Dict(Image* image_ptr) : letter_is_okay_(&tesseract::Dict::def_letter_is_okay), - image_ptr_(image_ptr) { + probability_in_context_(&tesseract::Dict::def_probability_in_context), + image_ptr_(image_ptr), + STRING_INIT_MEMBER(user_words_suffix, "", + "A list of user-provided words.", + getImage()->getCCUtil()->params()), + STRING_INIT_MEMBER(user_patterns_suffix, "", + "A list of user-provided patterns.", + getImage()->getCCUtil()->params()), + BOOL_INIT_MEMBER(load_system_dawg, true, "Load system word dawg.", + getImage()->getCCUtil()->params()), + BOOL_INIT_MEMBER(load_freq_dawg, true, "Load frequent word dawg.", + getImage()->getCCUtil()->params()), + BOOL_INIT_MEMBER(load_punc_dawg, true, "Load dawg with punctuation" + " patterns.", getImage()->getCCUtil()->params()), + BOOL_INIT_MEMBER(load_number_dawg, true, "Load dawg with number" + " patterns.", getImage()->getCCUtil()->params()), + BOOL_INIT_MEMBER(load_fixed_length_dawgs, true, "Load fixed length dawgs" + " (e.g. for non-space delimited languages)", + getImage()->getCCUtil()->params()), + double_MEMBER(segment_penalty_dict_frequent_word, 1.0, + "Score multiplier for word matches which have good case and" + "are frequent in the given language (lower is better).", + getImage()->getCCUtil()->params()), + double_MEMBER(segment_penalty_dict_case_ok, 1.1, + "Score multiplier for word matches that have good case " + "(lower is better).", getImage()->getCCUtil()->params()), + double_MEMBER(segment_penalty_dict_case_bad, 1.3125, + "Default score multiplier for word matches, which may have " + "case issues (lower is better).", + getImage()->getCCUtil()->params()), + double_MEMBER(segment_penalty_ngram_best_choice, 1.24, + "Multipler to for the best choice from the ngram model.", + getImage()->getCCUtil()->params()), + double_MEMBER(segment_penalty_dict_nonword, 1.25, + "Score multiplier for glyph fragment segmentations which " + "do not match a dictionary word (lower is better).", + getImage()->getCCUtil()->params()), + double_MEMBER(segment_penalty_garbage, 1.50, + "Score multiplier for poorly cased strings that are not in" + " the dictionary and generally look like garbage (lower is" + " better).", getImage()->getCCUtil()->params()), + INT_MEMBER(dawg_debug_level, 0, "Set to 1 for general debug info" + ", to 2 for more details, to 3 to see all the debug messages", + getImage()->getCCUtil()->params()), + INT_MEMBER(hyphen_debug_level, 0, "Debug level for hyphenated words.", + getImage()->getCCUtil()->params()), + INT_MEMBER(ngram_permuter_debug_level, 0, + "Debug level for the ngram permuter.", + getImage()->getCCUtil()->params()), + double_MEMBER(ngram_permuter_nonmatch_score, -40.0, + "Average classifier score of a non-matching unichar.", + getImage()->getCCUtil()->params()), + INT_MEMBER(max_viterbi_list_size, 10, "Maximum size of viterbi list.", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(use_only_first_uft8_step, false, + "Use only the first UTF8 step of the given string" + " when computing log probabilities.", + getImage()->getCCUtil()->params()), + double_MEMBER(ngram_model_scale_factor, 1.0, "Relative strength of the" + " ngram model relative to the character classifier ", + getImage()->getCCUtil()->params()), + double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", + getImage()->getCCUtil()->params()), + double_MEMBER(stopper_nondict_certainty_base, -2.50, + "Certainty threshold for non-dict words", + getImage()->getCCUtil()->params()), + double_MEMBER(stopper_phase2_certainty_rejection_offset, 1.0, + "Reject certainty offset", + getImage()->getCCUtil()->params()), + INT_MEMBER(stopper_smallword_size, 2, + "Size of dict word to be treated as non-dict word", + getImage()->getCCUtil()->params()), + double_MEMBER(stopper_certainty_per_char, -0.50, "Certainty to add" + " for each dict char above small word size.", + getImage()->getCCUtil()->params()), + double_MEMBER(stopper_allowable_character_badness, 3.0, + "Max certaintly variation allowed in a word (in sigma)", + getImage()->getCCUtil()->params()), + INT_MEMBER(stopper_debug_level, 0, "Stopper debug level", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(stopper_no_acceptable_choices, false, + "Make AcceptableChoice() always return false. Useful" + " when there is a need to explore all segmentations", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(save_raw_choices, false, "Save all explored raw choices", + getImage()->getCCUtil()->params()), + INT_MEMBER(tessedit_truncate_wordchoice_log, 10, + "Max words to keep in list", + getImage()->getCCUtil()->params()), + STRING_MEMBER(word_to_debug, "", "Word for which stopper debug" + " information should be printed to stdout", + getImage()->getCCUtil()->params()), + STRING_MEMBER(word_to_debug_lengths, "", + "Lengths of unichars in word_to_debug", + getImage()->getCCUtil()->params()), + INT_MEMBER(fragments_debug, 0, "Debug character fragments", + getImage()->getCCUtil()->params()), + INT_MEMBER(segment_debug, 0, "Debug the whole segmentation process", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(permute_debug, 0, "Debug char permutation process", + getImage()->getCCUtil()->params()), + double_MEMBER(bestrate_pruning_factor, 2.0, "Multiplying factor of" + " current best rate to prune other hypotheses", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(permute_script_word, 0, + "Turn on word script consistency permuter", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(segment_segcost_rating, 0, + "incorporate segmentation cost in word rating?", + getImage()->getCCUtil()->params()), + double_MEMBER(segment_reward_script, 0.95, + "Score multipler for script consistency within a word. " + "Being a 'reward' factor, it should be <= 1. " + "Smaller value implies bigger reward.", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(permute_fixed_length_dawg, 0, + "Turn on fixed-length phrasebook search permuter", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(permute_chartype_word, 0, + "Turn on character type (property) consistency permuter", + getImage()->getCCUtil()->params()), + double_MEMBER(segment_reward_chartype, 0.97, + "Score multipler for char type consistency within a word. ", + getImage()->getCCUtil()->params()), + double_MEMBER(segment_reward_ngram_best_choice, 0.99, + "Score multipler for ngram permuter's best choice" + " (only used in the Han script path).", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(save_doc_words, 0, "Save Document Words", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(doc_dict_enable, 1, "Enable Document Dictionary ", + getImage()->getCCUtil()->params()), + double_MEMBER(doc_dict_pending_threshold, 0.0, + "Worst certainty for using pending dictionary", + getImage()->getCCUtil()->params()), + double_MEMBER(doc_dict_certainty_threshold, -2.25, + "Worst certainty for words that can be inserted into the" + "document dictionary", getImage()->getCCUtil()->params()), + BOOL_MEMBER(ngram_permuter_activated, false, + "Activate character-level n-gram-based permuter", + getImage()->getCCUtil()->params()), + INT_MEMBER(max_permuter_attempts, 10000, "Maximum number of different" + " character choices to consider during permutation." + " This limit is especially useful when user patterns" + " are specified, since overly generic patterns can result in" + " dawg search exploring an overly large number of options.", + getImage()->getCCUtil()->params()), + BOOL_MEMBER(permute_only_top, false, "Run only the top choice permuter", + getImage()->getCCUtil()->params()) { dang_ambigs_table_ = NULL; replace_ambigs_table_ = NULL; keep_word_choices_ = false; reject_offset_ = 0.0; best_raw_choice_ = NULL; - best_choices_ = NIL; - raw_choices_ = NIL; + best_choices_ = NIL_LIST; + raw_choices_ = NIL_LIST; go_deeper_fxn_ = NULL; hyphen_word_ = NULL; last_word_on_line_ = false; @@ -43,28 +192,139 @@ Dict::Dict(Image* image_ptr) document_words_ = NULL; pending_words_ = NULL; freq_dawg_ = NULL; + punc_dawg_ = NULL; + max_fixed_length_dawgs_wdlen_ = -1; + wordseg_rating_adjust_factor_ = -1.0f; } Dict::~Dict() { if (hyphen_word_ != NULL) delete hyphen_word_; } -// Returns true if in light of the current state the letter at word_index -// in the given word is allowed according to at least one of the dawgs in -// dawgs_. -// +void Dict::Load() { + STRING name; + STRING &lang = getImage()->getCCUtil()->lang; + + if (dawgs_.length() != 0) this->End(); + + hyphen_unichar_id_ = getUnicharset().unichar_to_id(kHyphenSymbol); + TessdataManager &tessdata_manager = + getImage()->getCCUtil()->tessdata_manager; + + // Load dawgs_. + if (load_punc_dawg && tessdata_manager.SeekToStart(TESSDATA_PUNC_DAWG)) { + punc_dawg_ = new SquishedDawg(tessdata_manager.GetDataFilePtr(), + DAWG_TYPE_PUNCTUATION, lang, PUNC_PERM, + dawg_debug_level); + dawgs_ += punc_dawg_; + } + if (load_system_dawg && tessdata_manager.SeekToStart(TESSDATA_SYSTEM_DAWG)) { + dawgs_ += new SquishedDawg(tessdata_manager.GetDataFilePtr(), + DAWG_TYPE_WORD, lang, SYSTEM_DAWG_PERM, + dawg_debug_level); + } + if (load_number_dawg && tessdata_manager.SeekToStart(TESSDATA_NUMBER_DAWG)) { + dawgs_ += + new SquishedDawg(tessdata_manager.GetDataFilePtr(), + DAWG_TYPE_NUMBER, lang, NUMBER_PERM, dawg_debug_level); + } + if (tessdata_manager.SeekToStart(TESSDATA_FREQ_DAWG)) { + freq_dawg_ = new SquishedDawg(tessdata_manager.GetDataFilePtr(), + DAWG_TYPE_WORD, lang, FREQ_DAWG_PERM, + dawg_debug_level); + dawgs_ += freq_dawg_; + } + + if (((STRING &)user_words_suffix).length() > 0) { + Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM, + kMaxUserDawgEdges, getUnicharset().size(), + dawg_debug_level); + name = getImage()->getCCUtil()->language_data_path_prefix; + name += user_words_suffix; + if (!trie_ptr->read_word_list(name.string(), getUnicharset())) { + tprintf("Error: failed to load %s\n", name.string()); + exit(1); + } + dawgs_ += trie_ptr; + } + + if (((STRING &)user_patterns_suffix).length() > 0) { + Trie *trie_ptr = new Trie(DAWG_TYPE_PATTERN, lang, USER_PATTERN_PERM, + kMaxUserDawgEdges, getUnicharset().size(), + dawg_debug_level); + trie_ptr->initialize_patterns(&(getUnicharset())); + name = getImage()->getCCUtil()->language_data_path_prefix; + name += user_patterns_suffix; + if (!trie_ptr->read_pattern_list(name.string(), getUnicharset())) { + tprintf("Error: failed to load %s\n", name.string()); + exit(1); + } + dawgs_ += trie_ptr; + } + + document_words_ = new Trie(DAWG_TYPE_WORD, lang, DOC_DAWG_PERM, + kMaxDocDawgEdges, getUnicharset().size(), + dawg_debug_level); + dawgs_ += document_words_; + + // This dawg is temporary and should not be searched by letter_is_ok. + pending_words_ = new Trie(DAWG_TYPE_WORD, lang, NO_PERM, + kMaxDocDawgEdges, getUnicharset().size(), + dawg_debug_level); + + // Load fixed length dawgs if necessary (used for phrase search + // for non-space delimited languages). + if (load_fixed_length_dawgs && + tessdata_manager.SeekToStart(TESSDATA_FIXED_LENGTH_DAWGS)) { + ReadFixedLengthDawgs(DAWG_TYPE_WORD, lang, SYSTEM_DAWG_PERM, + dawg_debug_level, tessdata_manager.GetDataFilePtr(), + &dawgs_, &max_fixed_length_dawgs_wdlen_); + } + + // Construct a list of corresponding successors for each dawg. Each entry i + // in the successors_ vector is a vector of integers that represent the + // indices into the dawgs_ vector of the successors for dawg i. + successors_.reserve(dawgs_.length()); + for (int i = 0; i < dawgs_.length(); ++i) { + const Dawg *dawg = dawgs_[i]; + SuccessorList *lst = new SuccessorList(); + for (int j = 0; j < dawgs_.length(); ++j) { + const Dawg *other = dawgs_[j]; + if (dawg != NULL && other != NULL && + (dawg->lang() == other->lang()) && + kDawgSuccessors[dawg->type()][other->type()]) *lst += j; + } + successors_ += lst; + } +} + +void Dict::End() { + if (dawgs_.length() == 0) + return; // Not safe to call twice. + dawgs_.delete_data_pointers(); + successors_.delete_data_pointers(); + dawgs_.clear(); + successors_.clear(); + document_words_ = NULL; + max_fixed_length_dawgs_wdlen_ = -1; + if (pending_words_ != NULL) { + delete pending_words_; + pending_words_ = NULL; + } +} + +// Returns true if in light of the current state unichar_id is allowed +// according to at least one of the dawgs in the dawgs_ vector. // See more extensive comments in dict.h where this function is declared. -// -int Dict::def_letter_is_okay(void* void_dawg_args, int word_index, - const void *void_word, bool word_end) { +int Dict::def_letter_is_okay(void* void_dawg_args, + UNICHAR_ID unichar_id, + bool word_end) { DawgArgs *dawg_args = reinterpret_cast(void_dawg_args); - const WERD_CHOICE *word = reinterpret_cast(void_word); if (dawg_debug_level >= 3) { - tprintf("def_letter_is_okay: word_index=%d word_end=%d" - " word=%s num active dawgs=%d num constraints=%d\n", - word_index, word_end, - word->debug_string(getUnicharset()).string(), + tprintf("def_letter_is_okay: current unichar=%s word_end=%d" + " num active dawgs=%d num constraints=%d\n", + getUnicharset().debug_str(unichar_id).string(), word_end, dawg_args->active_dawgs->length(), dawg_args->constraints->length()); } @@ -72,7 +332,6 @@ int Dict::def_letter_is_okay(void* void_dawg_args, int word_index, // Do not accept words that contain kPatternUnicharID. // (otherwise pattern dawgs would not function correctly). // Do not accept words containing INVALID_UNICHAR_IDs. - UNICHAR_ID unichar_id = word->unichar_id(word_index); if (unichar_id == Dawg::kPatternUnicharID || unichar_id == INVALID_UNICHAR_ID) { dawg_args->permuter = NO_PERM; @@ -80,7 +339,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args, int word_index, } // Initialization. - PermuterType current_permuter = NO_PERM; + PermuterType curr_perm = NO_PERM; dawg_args->updated_active_dawgs->clear(); const DawgInfoVector &constraints = *(dawg_args->constraints); *dawg_args->updated_constraints = constraints; @@ -91,26 +350,34 @@ int Dict::def_letter_is_okay(void* void_dawg_args, int word_index, for (int a = 0; a < dawg_args->active_dawgs->length(); ++a) { const DawgInfo &info = (*dawg_args->active_dawgs)[a]; const Dawg *dawg = dawgs_[info.dawg_index]; - // Obtain unichar_id at this position (could be changed later, so this - // needs to be inside the loop over all active dawgs). - unichar_id = word->unichar_id(word_index); + // dawg_unichar_id will contain the literal unichar_id to be found in the + // dawgs (e.g. didgit pattern if unichar_id is a digit and dawg contains + // number patterns, word pattern if dawg is a puncutation dawg and we + // reached an end of beginning puntuation pattern, etc). + UNICHAR_ID dawg_unichar_id = unichar_id; + + // If we are dealing with the pattern dawg, look up all the + // possible edges, not only for the exact unichar_id, but also + // for all its character classes (alpha, digit, etc). + if (dawg->type() == DAWG_TYPE_PATTERN) { + ProcessPatternEdges(dawg, info, dawg_unichar_id, word_end, + dawg_args, &curr_perm); + // There can't be any successors to dawg that is of type + // DAWG_TYPE_PATTERN, so we are done examining this DawgInfo. + continue; + } + // The number dawg generalizes all digits to be kPatternUnicharID, // so try to match kPatternUnicharID if the current unichar is a digit. if (dawg->type() == DAWG_TYPE_NUMBER && - getUnicharset().get_isdigit(unichar_id)) { - unichar_id = Dawg::kPatternUnicharID; - } - // Get the starting node for this letter. - NODE_REF node; - if (info.ref == NO_EDGE) { - node = 0; // beginning to explore this dawg - } else { - node = dawg->next_node(info.ref); - if (node == 0) node = NO_EDGE; // end of word + getUnicharset().get_isdigit(dawg_unichar_id)) { + dawg_unichar_id = Dawg::kPatternUnicharID; } - // Find the edge out of the node for the curent unichar_id. + + // Find the edge out of the node for the dawg_unichar_id. + NODE_REF node = GetStartingNode(dawg, info.ref); EDGE_REF edge = (node != NO_EDGE) ? - dawg->edge_char_of(node, unichar_id, word_end) : NO_EDGE; + dawg->edge_char_of(node, dawg_unichar_id, word_end) : NO_EDGE; if (dawg_debug_level >= 3) { tprintf("Active dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n", @@ -120,12 +387,19 @@ int Dict::def_letter_is_okay(void* void_dawg_args, int word_index, if (edge != NO_EDGE) { // the unichar was found in the current dawg if (ConstraintsOk(*(dawg_args->updated_constraints), word_end, dawg->type())) { - UpdatePermuter(dawg->permuter(), ¤t_permuter); + if (dawg_debug_level >=3) { + tprintf("Letter found in dawg %d\n", info.dawg_index); + } + if (dawg->permuter() > curr_perm) curr_perm = dawg->permuter(); dawg_args->updated_active_dawgs->add_unique( - DawgInfo(info.dawg_index, edge), + DawgInfo(info.dawg_index, edge), dawg_debug_level > 0, "Append current dawg to updated active dawgs: "); } - } else { // the unichar was not found in the current dawg + } else if (dawg_args->sought_word_length == kAnyWordLength) { + // The unichar was not found in the current dawg. + // Explore the successor dawgs (but only if we are not + // just searching one dawg with a fixed word length). + // Handle leading/trailing punctuation dawgs that denote a word pattern // as an edge with kPatternUnicharID. If such an edge is found we add a // constraint denoting the state of the dawg before the word pattern. @@ -135,7 +409,8 @@ int Dict::def_letter_is_okay(void* void_dawg_args, int word_index, edge = dawg->edge_char_of(node, Dawg::kPatternUnicharID, word_end); if (edge != NO_EDGE) { dawg_args->updated_constraints->add_unique( - DawgInfo(info.dawg_index, edge), "Recording constraint: "); + DawgInfo(info.dawg_index, edge), dawg_debug_level > 0, + "Recording constraint: "); } else { // Do not explore successors of this dawg, since this // must be invalid leading or trailing punctuation. @@ -186,31 +461,388 @@ int Dict::def_letter_is_okay(void* void_dawg_args, int word_index, } } // Look for the letter in this successor dawg. - EDGE_REF sedge = sdawg->edge_char_of( - snode, word->unichar_id(word_index), word_end); + EDGE_REF sedge = sdawg->edge_char_of(snode, unichar_id, word_end); // If we found the letter append sdawg to the active_dawgs list. if (sedge != NO_EDGE && ConstraintsOk(*(dawg_args->updated_constraints), word_end, dawgs_[sdawg_index]->type())) { - UpdatePermuter(sdawg->permuter(), ¤t_permuter); + if (dawg_debug_level >= 3) { + tprintf("Letter found in the successor dawg %d\n", sdawg_index); + } + if (sdawg->permuter() > curr_perm) curr_perm = sdawg->permuter(); if (sdawg->next_node(sedge) != 0) { // if not word end dawg_args->updated_active_dawgs->add_unique( - DawgInfo(sdawg_index, sedge), + DawgInfo(sdawg_index, sedge), dawg_debug_level > 0, "Append successor to updated active dawgs: "); } } } // end successors loop } // end if/else } // end for - // Update dawg_args->permuter if it used to be NO_PERM or if we found - // the current letter in a non-punctuation dawg. This allows preserving - // information on which dawg the "core" word came from. - if ((current_permuter == PUNC_PERM && - current_permuter > dawg_args->permuter) || - current_permuter != PUNC_PERM) { - dawg_args->permuter = current_permuter; + // Update dawg_args->permuter if it used to be NO_PERM or became NO_PERM + // or if we found the current letter in a non-punctuation dawg. This + // allows preserving information on which dawg the "core" word came from. + // Keep the old value of dawg_args->permuter if it is COMPOUND_PERM. + if (dawg_args->permuter == NO_PERM || curr_perm == NO_PERM || + (curr_perm != PUNC_PERM && dawg_args->permuter != COMPOUND_PERM)) { + dawg_args->permuter = curr_perm; } return dawg_args->permuter; } +void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgInfo &info, + UNICHAR_ID unichar_id, bool word_end, + DawgArgs *dawg_args, + PermuterType *curr_perm) { + NODE_REF node = GetStartingNode(dawg, info.ref); + // Try to find the edge corresponding to the exact unichar_id and to all the + // edges corresponding to the character class of unichar_id. + GenericVector unichar_id_patterns; + unichar_id_patterns.push_back(unichar_id); + dawg->unichar_id_to_patterns(unichar_id, getUnicharset(), + &unichar_id_patterns); + for (int i = 0; i < unichar_id_patterns.size(); ++i) { + // On the first iteration check all the outgoing edges. + // On the second iteration check all self-loops. + for (int k = 0; k < 2; ++k) { + EDGE_REF edge = (k == 0) ? + dawg->edge_char_of(node, unichar_id_patterns[i], word_end) + : dawg->pattern_loop_edge(info.ref, unichar_id_patterns[i], word_end); + if (edge != NO_EDGE) { + if (dawg_debug_level >= 3) { + tprintf("Pattern dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n", + info.dawg_index, node, edge); + } + if (ConstraintsOk(*(dawg_args->updated_constraints), + word_end, dawg->type())) { + if (dawg_debug_level >=3) { + tprintf("Letter found in pattern dawg %d\n", info.dawg_index); + } + if (dawg->permuter() > *curr_perm) *curr_perm = dawg->permuter(); + dawg_args->updated_active_dawgs->add_unique( + DawgInfo(info.dawg_index, edge), dawg_debug_level > 0, + "Append current dawg to updated active dawgs: "); + } + } + } + } +} + +void Dict::ReadFixedLengthDawgs(DawgType type, const STRING &lang, + PermuterType perm, int debug_level, + FILE *file, DawgVector *dawg_vec, + int *max_wdlen) { + int i; + DawgVector dawg_vec_copy; + dawg_vec_copy.move(dawg_vec); // save the input dawg_vec. + inT32 num_dawgs; + fread(&num_dawgs, sizeof(inT32), 1, file); + bool swap = (num_dawgs > MAX_WERD_LENGTH); + if (swap) num_dawgs = reverse32(num_dawgs); + inT32 word_length; + int max_word_length = 0; + // Read and record pointers to fixed-length dawgs such that: + // dawg_vec[word_length] = pointer to dawg with word length of word_length, + // NULL if such fixed-length dawg does not exist. + for (i = 0; i < num_dawgs; ++i) { + fread(&word_length, sizeof(inT32), 1, file); + if (swap) word_length = reverse32(word_length); + ASSERT_HOST(word_length > 0 && word_length <= MAX_WERD_LENGTH); + while (word_length >= dawg_vec->size()) dawg_vec->push_back(NULL); + (*dawg_vec)[word_length] = + new SquishedDawg(file, type, lang, perm, debug_level); + if (word_length > max_word_length) max_word_length = word_length; + } + *max_wdlen = max_word_length; + // Entries dawg_vec[0] to dawg_vec[max_word_length] now hold pointers + // to fixed-length dawgs. The rest of the vector will contain the dawg + // pointers from the original input dawg_vec. + for (i = 0; i < dawg_vec_copy.size(); ++i) { + dawg_vec->push_back(dawg_vec_copy[i]); + } +} + +void Dict::WriteFixedLengthDawgs( + const GenericVector &dawg_vec, + int num_dawgs, int debug_level, FILE *output_file) { + fwrite(&num_dawgs, sizeof(inT32), 1, output_file); + if (debug_level) tprintf("Writing %d split length dawgs\n", num_dawgs); + for (int i = 1; i < dawg_vec.size(); ++i) { + if ((dawg_vec)[i] != NULL) { + fwrite(&i, sizeof(inT32), 1, output_file); + dawg_vec[i]->write_squished_dawg(output_file); + if (debug_level) tprintf("Wrote Dawg with word length %d\n", i); + } + } +} + +// Fill the given active_dawgs vector with dawgs that could contain the +// beginning of the word. If hyphenated() returns true, copy the entries +// from hyphen_active_dawgs_ instead. +void Dict::init_active_dawgs(int sought_word_length, + DawgInfoVector *active_dawgs, + bool ambigs_mode) { + int i; + if (sought_word_length != kAnyWordLength) { + // Only search one fixed word length dawg. + if (sought_word_length <= max_fixed_length_dawgs_wdlen_ && + dawgs_[sought_word_length] != NULL) { + *active_dawgs += DawgInfo(sought_word_length, NO_EDGE); + } + } else if (hyphenated()) { + *active_dawgs = hyphen_active_dawgs_; + if (dawg_debug_level >= 3) { + for (i = 0; i < hyphen_active_dawgs_.size(); ++i) { + tprintf("Adding hyphen beginning dawg [%d, " REFFORMAT "]\n", + hyphen_active_dawgs_[i].dawg_index, + hyphen_active_dawgs_[i].ref); + } + } + } else { + for (i = 0; i < dawgs_.length(); ++i) { + if (dawgs_[i] != NULL && kBeginningDawgsType[(dawgs_[i])->type()] && + !(ambigs_mode && (dawgs_[i])->type() == DAWG_TYPE_PATTERN)) { + *active_dawgs += DawgInfo(i, NO_EDGE); + if (dawg_debug_level >= 3) { + tprintf("Adding beginning dawg [%d, " REFFORMAT "]\n", i, NO_EDGE); + } + } + } + } +} + +// If hyphenated() returns true, copy the entries from hyphen_constraints_ +// into the given constraints vector. +void Dict::init_constraints(DawgInfoVector *constraints) { + if (hyphenated()) { + *constraints = hyphen_constraints_; + if (dawg_debug_level >= 3) { + for (int i = 0; i < hyphen_constraints_.size(); ++i) { + tprintf("Adding hyphen constraint [%d, " REFFORMAT "]\n", + hyphen_constraints_[i].dawg_index, + hyphen_constraints_[i].ref); + } + } + } +} + +void Dict::add_document_word(const WERD_CHOICE &best_choice) { + // Do not add hyphenated word parts to the document dawg. + // hyphen_word_ will be non-NULL after the set_hyphen_word() is + // called when the first part of the hyphenated word is + // discovered and while the second part of the word is recognized. + // hyphen_word_ is cleared in cc_recg() before the next word on + // the line is recognized. + if (hyphen_word_) return; + + char filename[CHARS_PER_LINE]; + FILE *doc_word_file; + int stringlen = best_choice.length(); + + if (!doc_dict_enable || valid_word(best_choice) || + CurrentWordAmbig() || stringlen < 2) + return; + + // Discard words that contain >= kDocDictMaxRepChars repeating unichars. + if (best_choice.length() >= kDocDictMaxRepChars) { + int num_rep_chars = 1; + UNICHAR_ID uch_id = best_choice.unichar_id(0); + for (int i = 1; i < best_choice.length(); ++i) { + if (best_choice.unichar_id(i) != uch_id) { + num_rep_chars = 1; + uch_id = best_choice.unichar_id(i); + } else { + ++num_rep_chars; + if (num_rep_chars == kDocDictMaxRepChars) return; + } + } + } + + if (best_choice.certainty() < doc_dict_certainty_threshold || + stringlen == 2) { + if (best_choice.certainty() < doc_dict_pending_threshold) + return; + + if (!pending_words_->word_in_dawg(best_choice)) { + if (stringlen > 2 || + (stringlen == 2 && + getUnicharset().get_isupper(best_choice.unichar_id(0)) && + getUnicharset().get_isupper(best_choice.unichar_id(1)))) { + pending_words_->add_word_to_dawg(best_choice); + } + return; + } + } + + if (save_doc_words) { + strcpy(filename, getImage()->getCCUtil()->imagefile.string()); + strcat(filename, ".doc"); + doc_word_file = open_file (filename, "a"); + fprintf(doc_word_file, "%s\n", + best_choice.debug_string(getUnicharset()).string()); + fclose(doc_word_file); + } + document_words_->add_word_to_dawg(best_choice); +} + +void Dict::adjust_word(WERD_CHOICE *word, + float *certainty_array, + const BLOB_CHOICE_LIST_VECTOR *char_choices, + bool nonword, + float additional_adjust, + bool debug) { + bool is_han = (char_choices != NULL && + getUnicharset().han_sid() != getUnicharset().null_sid() && + get_top_word_script(*char_choices, getUnicharset()) == + getUnicharset().han_sid()); + bool case_is_ok = (is_han || case_ok(*word, getUnicharset())); + bool punc_is_ok = (is_han || !nonword || valid_punctuation(*word)); + + float adjust_factor = additional_adjust; + float new_rating = word->rating(); + if (debug) { + tprintf("%sWord: %s %4.2f ", nonword ? "Non-" : "", + word->debug_string(getUnicharset()).string(), word->rating()); + } + new_rating += kRatingPad; + if (nonword) { // non-dictionary word + if (case_is_ok && punc_is_ok) { + adjust_factor += segment_penalty_dict_nonword; + new_rating *= adjust_factor; + if (debug) tprintf(", W"); + } else { + adjust_factor += segment_penalty_garbage; + new_rating *= adjust_factor; + if (debug) { + if (!case_is_ok) tprintf(", C"); + if (!punc_is_ok) tprintf(", P"); + } + } + } else { // dictionary word + if (case_is_ok) { + if (!is_han && freq_dawg_ != NULL && freq_dawg_->word_in_dawg(*word)) { + word->set_permuter(FREQ_DAWG_PERM); + adjust_factor += segment_penalty_dict_frequent_word; + new_rating *= adjust_factor; + if (debug) tprintf(", F"); + } else { + adjust_factor += segment_penalty_dict_case_ok; + new_rating *= adjust_factor; + if (debug) tprintf(", "); + } + } else { + adjust_factor += segment_penalty_dict_case_bad; + new_rating *= adjust_factor; + if (debug) tprintf(", C"); + } + } + new_rating -= kRatingPad; + word->set_rating(new_rating); + if (debug) tprintf(" %4.2f --> %4.2f\n", adjust_factor, new_rating); + LogNewChoice(adjust_factor, certainty_array, false, word); +} + +int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) { + const WERD_CHOICE *word_ptr = &word; + WERD_CHOICE temp_word; + if (hyphenated()) { + copy_hyphen_info(&temp_word); + temp_word += word; + word_ptr = &temp_word; + } + if (word_ptr->length() == 0) return NO_PERM; + // Allocate vectors for holding current and updated + // active_dawgs and constraints and initialize them. + DawgInfoVector *active_dawgs = new DawgInfoVector[2]; + DawgInfoVector *constraints = new DawgInfoVector[2]; + init_active_dawgs(kAnyWordLength, &(active_dawgs[0]), false); + init_constraints(&(constraints[0])); + DawgArgs dawg_args(&(active_dawgs[0]), &(constraints[0]), + &(active_dawgs[1]), &(constraints[1]), + 0.0, NO_PERM, kAnyWordLength, 0); + int last_index = word_ptr->length() - 1; + // Call leter_is_okay for each letter in the word. + for (int i = hyphen_base_size(); i <= last_index; ++i) { + if (!((this->*letter_is_okay_)(&dawg_args, word_ptr->unichar_id(i), + i == last_index))) break; + // Swap active_dawgs, constraints with the corresponding updated vector. + if (dawg_args.updated_active_dawgs == &(active_dawgs[1])) { + dawg_args.updated_active_dawgs = &(active_dawgs[0]); + dawg_args.updated_constraints = &(constraints[0]); + ++(dawg_args.active_dawgs); + ++(dawg_args.constraints); + } else { + ++(dawg_args.updated_active_dawgs); + ++(dawg_args.updated_constraints); + dawg_args.active_dawgs = &(active_dawgs[0]); + dawg_args.constraints = &(constraints[0]); + } + } + delete[] active_dawgs; + delete[] constraints; + return valid_word_permuter(dawg_args.permuter, numbers_ok) ? + dawg_args.permuter : NO_PERM; +} + +bool Dict::valid_punctuation(const WERD_CHOICE &word) { + if (word.length() == 0) return NO_PERM; + int i; + WERD_CHOICE new_word; + int last_index = word.length() - 1; + int new_len = 0; + for (i = 0; i <= last_index; ++i) { + UNICHAR_ID unichar_id = (word.unichar_id(i)); + if (getUnicharset().get_ispunctuation(unichar_id)) { + new_word.append_unichar_id(unichar_id, 1, 0.0, 0.0); + } else if (!getUnicharset().get_isalpha(unichar_id) && + !getUnicharset().get_isdigit(unichar_id)) { + return false; // neither punc, nor alpha, nor digit + } else if ((new_len = new_word.length()) == 0 || + new_word.unichar_id(new_len-1) != Dawg::kPatternUnicharID) { + new_word.append_unichar_id(Dawg::kPatternUnicharID, 1, 0.0, 0.0); + } + } + for (i = 0; i < dawgs_.size(); ++i) { + if (dawgs_[i] != NULL && + dawgs_[i]->type() == DAWG_TYPE_PUNCTUATION && + dawgs_[i]->word_in_dawg(new_word)) return true; + } + return false; +} + +// Returns the "dominant" script ID for the word. By "dominant", the script +// must account for at least half the characters. Otherwise, it returns 0. +// Note that for Japanese, Hiragana and Katakana are simply treated as Han. +int Dict::get_top_word_script(const BLOB_CHOICE_LIST_VECTOR &char_choices, + const UNICHARSET &unicharset) { + int max_script = unicharset.get_script_table_size(); + int *sid = new int[max_script]; + int x; + for (x = 0; x < max_script; x++) sid[x] = 0; + for (x = 0; x < char_choices.length(); ++x) { + BLOB_CHOICE_IT blob_choice_it(char_choices.get(x)); + sid[blob_choice_it.data()->script_id()]++; + } + if (unicharset.han_sid() != unicharset.null_sid()) { + // Add the Hiragana & Katakana counts to Han and zero them out. + if (unicharset.hiragana_sid() != unicharset.null_sid()) { + sid[unicharset.han_sid()] += sid[unicharset.hiragana_sid()]; + sid[unicharset.hiragana_sid()] = 0; + } + if (unicharset.katakana_sid() != unicharset.null_sid()) { + sid[unicharset.han_sid()] += sid[unicharset.katakana_sid()]; + sid[unicharset.katakana_sid()] = 0; + } + } + // Note that high script ID overrides lower one on a tie, thus biasing + // towards non-Common script (if sorted that way in unicharset file). + int max_sid = 0; + for (x = 1; x < max_script; x++) + if (sid[x] >= sid[max_sid]) max_sid = x; + if (sid[max_sid] < char_choices.length() / 2) + max_sid = unicharset.null_sid(); + delete[] sid; + return max_sid; +} + } // namespace tesseract diff --git a/dict/dict.h b/dict/dict.h index edf5c795e8..01f2cccd9e 100644 --- a/dict/dict.h +++ b/dict/dict.h @@ -20,24 +20,18 @@ #define TESSERACT_DICT_DICT_H_ #include "ambigs.h" -#include "choices.h" -#include "choicearr.h" #include "dawg.h" +#include "host.h" #include "image.h" +#include "oldlist.h" #include "ratngs.h" #include "stopper.h" #include "trie.h" #include "unicharset.h" +#include "permute.h" -extern STRING_VAR_H(global_user_words_suffix, "user-words", - "A list of user-provided words."); -extern INT_VAR_H(hyphen_debug_level, 0, "Debug level for hyphenated words."); - -#define MAX_WERD_LENGTH (inT64) 40 +#define MAX_WERD_LENGTH (inT64) 128 #define NO_RATING -1 -#define FREQ_WERD 1.0 -#define GOOD_WERD 1.1 -#define OK_WERD 1.3125 /** Struct used to hold temporary information about fragments. */ struct CHAR_FRAGMENT_INFO { @@ -52,27 +46,54 @@ namespace tesseract { typedef GenericVector DawgVector; +// +// Constants +// +static const int kAnyWordLength = -1; +static const int kRatingPad = 4; +// TODO(daria): If hyphens are different in different languages and can be +// inferred from training data we should load their values dynamically. +static const char kHyphenSymbol[] = "-"; +static const int kMaxNumDawgEdgees = 2000000; +static const int kMaxDocDawgEdges = 250000; +static const int kMaxUserDawgEdges = 50000; +static const float kSimCertaintyScale = -10.0; // similarity matcher scaling +static const float kSimCertaintyOffset = -10.0; // similarity matcher offset +static const float kSimilarityFloor = 100.0; // worst E*L product to stop on +static const int kDocDictMaxRepChars = 4; + struct DawgArgs { - DawgArgs(DawgInfoVector *d, DawgInfoVector *c, - DawgInfoVector *ud, DawgInfoVector *uc, float r) : + DawgArgs(DawgInfoVector *d, DawgInfoVector *c, DawgInfoVector *ud, + DawgInfoVector *uc, float r, PermuterType p, int len, int e) : active_dawgs(d), constraints(c), updated_active_dawgs(ud), updated_constraints(uc), rating_margin(r) { for (int i = 0; i < MAX_WERD_LENGTH; ++i) { rating_array[i] = NO_RATING; } - permuter = NO_PERM; + permuter = p; + sought_word_length = len; + end_char_choice_index = e; } DawgInfoVector *active_dawgs; DawgInfoVector *constraints; DawgInfoVector *updated_active_dawgs; DawgInfoVector *updated_constraints; PermuterType permuter; + int sought_word_length; + + // TODO(daria): remove these fields when permdawg is deprecated. float rating_margin; /**< pruning margin ratio */ float rating_array[MAX_WERD_LENGTH]; + int end_char_choice_index; }; class Dict { public: + // Gain factor for ambiguity threshold. + static const float kStopperAmbiguityThresholdGain; + // Certainty offset for ambiguity threshold. + static const float kStopperAmbiguityThresholdOffset; + Dict(Image* image_ptr); ~Dict(); Image* getImage() { @@ -85,10 +106,17 @@ class Dict { return getImage()->getCCUtil()->unichar_ambigs; } + inline bool compound_marker(UNICHAR_ID unichar_id) { + return (unichar_id == getUnicharset().unichar_to_id("-") || + unichar_id == getUnicharset().unichar_to_id("/")); + } + /* hyphen.cpp ************************************************************/ /// Returns true if we've recorded the beginning of a hyphenated word. - inline bool hyphenated() { return !last_word_on_line_ && hyphen_word_; } + inline bool hyphenated() { return + !last_word_on_line_ && hyphen_word_ && GetMaxFixedLengthDawgIndex() < 0; + } /// Size of the base word (the part on the line before) of a hyphenated word. inline int hyphen_base_size() { return this->hyphenated() ? hyphen_word_->length() : 0; @@ -112,10 +140,14 @@ class Dict { } } /// Check whether the word has a hyphen at the end. + inline bool has_hyphen_end(UNICHAR_ID unichar_id, bool first_pos) { + return (last_word_on_line_ && !first_pos && + unichar_id == hyphen_unichar_id_); + } + /// Same as above, but check the unichar at the end of the word. inline bool has_hyphen_end(const WERD_CHOICE &word) { int word_index = word.length() - 1; - return (last_word_on_line_ && word_index > 0 && - word.unichar_id(word_index) == hyphen_unichar_id_); + return has_hyphen_end(word.unichar_id(word_index), word_index == 0); } /// Unless the previous word was the last one on the line, and the current /// one is not (thus it is the first one on the line), erase hyphen_word_, @@ -128,106 +160,116 @@ class Dict { const DawgInfoVector &constraints); /* permdawg.cpp ************************************************************/ - /// If new_rating < best_choice->rating(), copy word int best_choice - /// and update rating and permuter of best_choice to the new given values. - inline void update_best_choice( - const WERD_CHOICE &word, WERD_CHOICE *best_choice) { - if (word.rating() < best_choice->rating()) { - *best_choice = word; - } + /// Copies word into best_choice if its rating is smaller + /// than that of best_choice. + inline void update_best_choice(const WERD_CHOICE &word, + WERD_CHOICE *best_choice) { + if (word.rating() < best_choice->rating()) *best_choice = word; } /// Fill the given active_dawgs vector with dawgs that could contain the /// beginning of the word. If hyphenated() returns true, copy the entries /// from hyphen_active_dawgs_ instead. - void init_active_dawgs(DawgInfoVector *active_dawgs); + void init_active_dawgs(int sought_word_length, + DawgInfoVector *active_dawgs, + bool ambigs_mode); /// If hyphenated() returns true, copy the entries from hyphen_constraints_ /// into the given constraints vector. void init_constraints(DawgInfoVector *constraints); + /// Returns true if we are operating in ambigs mode. + inline bool ambigs_mode(float rating_limit) { return rating_limit <= 0.0; } /// Recursively explore all the possible character combinations in /// the given char_choices. Use go_deeper_dawg_fxn() to explore all the /// dawgs in the dawgs_ vector in parallel and discard invalid words. /// /// Allocate and return a WERD_CHOICE with the best valid word found. WERD_CHOICE *dawg_permute_and_select( - const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit); - void adjust_word(WERD_CHOICE *best_choice, - float *certainty_array); + const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit, + int sought_word_length, int end_char_choice_index); + WERD_CHOICE *dawg_permute_and_select( + const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit) { + return dawg_permute_and_select(char_choices, rating_limit, + kAnyWordLength, 0); + } /// If the choice being composed so far could be a dictionary word /// and we have not reached the end of the word keep exploring the /// char_choices further. - /// /// Also: - /// - set hyphen word if needed - /// - if word_ending is true and word is better than best_choice - /// copy word to best_choice log new word choice + /// -- sets hyphen word if needed + /// -- if word_ending is true and the word is better than best_choice, + /// copies word to best_choice and logs new word choice void go_deeper_dawg_fxn( const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, bool word_ending, WERD_CHOICE *word, float certainties[], - float *limit, WERD_CHOICE *best_choice, void *void_more_args); + float *limit, WERD_CHOICE *best_choice, int *attempts_left, + void *void_more_args); /* permute.cpp *************************************************************/ - void add_document_word(const WERD_CHOICE &best_choice); - void init_permute(); + WERD_CHOICE *get_top_choice_word( + const BLOB_CHOICE_LIST_VECTOR &char_choices); WERD_CHOICE *permute_top_choice( const BLOB_CHOICE_LIST_VECTOR &char_choices, float* rating_limit, WERD_CHOICE *raw_choice, BOOL8 *any_alpha); - const char* choose_il1(const char *first_char, //< first choice - const char *second_char, //< second choice - const char *third_char, //< third choice - const char *prev_char, //< prev in word - const char *next_char, //< next in word - const char *next_next_char); //< after next next in word - /// @return NO_PERM for words with digits - int valid_word(const WERD_CHOICE &word) { - return valid_word(word, false); - } - /// @return NUMBER_PERM for valid numbers - int valid_word_or_number(const WERD_CHOICE &word) { - return valid_word(word, true); - } - int valid_word(const WERD_CHOICE &word, bool numbers_ok); - bool valid_punctuation(const WERD_CHOICE &word); + const char* choose_il1(const char *first_char, //first choice + const char *second_char, //second choice + const char *third_char, //third choice + const char *prev_char, //prev in word + const char *next_char, //next in word + const char *next_next_char); //after next next in word WERD_CHOICE *permute_all(const BLOB_CHOICE_LIST_VECTOR &char_choices, - float rating_limit, + const WERD_CHOICE *best_choice, WERD_CHOICE *raw_choice); void end_permute(); - void adjust_non_word(WERD_CHOICE *word, float *adjust_factor); void permute_subword(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit, int start, int end, WERD_CHOICE *current_word); - void permute_characters(const BLOB_CHOICE_LIST_VECTOR &char_choices, - float limit, + bool permute_characters(const BLOB_CHOICE_LIST_VECTOR &char_choices, WERD_CHOICE *best_choice, WERD_CHOICE *raw_choice); WERD_CHOICE *permute_compound_words( const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit); - /// checks if the dominant word script, if there is one, is same as target. - bool word_script_eq(const BLOB_CHOICE_LIST_VECTOR &char_choices, - int target_script_id); + /// Find permutations matching a list of fixed-char-length dawgs + /// The bestchoice based on this permuter alone is returned. Alternatively, + /// non-conflicting changes can be combined through permuter_state. + WERD_CHOICE *permute_fixed_length_words( + const BLOB_CHOICE_LIST_VECTOR &char_choices, + PermuterState *permuter_state); /// Incoporate segmentation cost into word rating void incorporate_segcost(WERD_CHOICE* word); - /// checks for script-consistent permutations + /// Checks for script-consistent permutations. Similar to fixed-length + /// permuter, the best choice is returned by the function, but the combined + /// changes are also recorded into permuter_state. WERD_CHOICE *permute_script_words( - const BLOB_CHOICE_LIST_VECTOR &char_choices); + const BLOB_CHOICE_LIST_VECTOR &char_choices, + PermuterState *permuter_state); + /// checks for consistency in character property (eg. alpah, digit, punct) + WERD_CHOICE *permute_chartype_words( + const BLOB_CHOICE_LIST_VECTOR &char_choices, + PermuterState *permuter_state); + + /// Look up the main chartype for each character position and store it in + /// the given array. Also returns the dominant type from unambiguous top + /// choices. + char top_word_chartype(const BLOB_CHOICE_LIST_VECTOR &char_choices, + char* pos_chartypes); WERD_CHOICE *top_fragments_permute_and_select( const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit); - /// If the choice being composed so far could be better - /// than best_choice keep exploring char_choices. - /// If we have reached the end of the word and word is better than - /// best_choice, copy word to best_choice and log a new word choice. + /// While the choice being composed so far could be better + /// than best_choice keeps exploring char_choices. + /// If the end of the word is reached and the word is better than + /// best_choice, copies word to best_choice and logs the new word choice. void go_deeper_top_fragments_fxn( const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, - bool word_ending, WERD_CHOICE *word, float certainties[], - float *limit, WERD_CHOICE *best_choice, void *more_args); + bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, + WERD_CHOICE *best_choice, int *attempts_left, void *more_args); /// Semi-generic functions used by multiple permuters. bool fragment_state_okay(UNICHAR_ID curr_unichar_id, @@ -244,6 +286,7 @@ class Dict { float certainties[], float *limit, WERD_CHOICE *best_choice, + int *attempts_left, void *more_args); void append_choices( @@ -256,6 +299,7 @@ class Dict { float certainties[], float *limit, WERD_CHOICE *best_choice, + int *attempts_left, void *more_args); /// Pointer to go_deeper function that will be modified by various permuters. void (Dict::*go_deeper_fxn_)(const char *debug, @@ -264,73 +308,163 @@ class Dict { const CHAR_FRAGMENT_INFO *prev_char_frag_info, bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, - WERD_CHOICE *best_choice, void *void_more_args); + WERD_CHOICE *best_choice, int *attempts_left, + void *void_more_args); /* stopper.cpp *************************************************************/ - int NoDangerousAmbig(WERD_CHOICE *BestChoice, - DANGERR *fixpt, - bool fix_replaceable, - BLOB_CHOICE_LIST_VECTOR *Choices, - bool *modified_blobs); + bool NoDangerousAmbig(WERD_CHOICE *BestChoice, + DANGERR *fixpt, + bool fix_replaceable, + BLOB_CHOICE_LIST_VECTOR *Choices, + bool *modified_blobs); + /// Replaces the corresponding wrong ngram in werd_choice with the correct + /// one. We indicate that this newly inserted ngram unichar is composed from + /// several fragments and modify the corresponding entries in blob_choices to + /// contain fragments of the correct ngram unichar instead of the original + /// unichars. Ratings and certainties of entries in blob_choices and + /// werd_choice are unichaged. E.g. for werd_choice mystring'' and ambiguity + /// ''->": werd_choice becomes mystring", first ' in blob_choices becomes + /// |"|0|2, second one is set to |"|1|2. void ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, BLOB_CHOICE_LIST_VECTOR *blob_choices, bool *modified_blobs); - inline void DisableChoiceAccum() { keep_word_choices_ = FALSE; } - inline void EnableChoiceAccum() { keep_word_choices_ = TRUE; } + inline void DisableChoiceAccum() { keep_word_choices_ = false; } + inline void EnableChoiceAccum() { keep_word_choices_ = true; } + inline bool ChoiceAccumEnabled() { return keep_word_choices_; } + /// Returns the length of the shortest alpha run in WordChoice. int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice); + /// Allocates a new viable choice data structure, copies WordChoice, + /// Certainties, and current_segmentation_ into it, returns a pointer to + /// the newly created VIABLE_CHOICE. + /// WordChoice is a choice to be converted to a viable choice. + /// AdjustFactor is a factor used to adjust ratings for WordChoice. + /// Certainties contain certainty for each character in WordChoice. VIABLE_CHOICE NewViableChoice(const WERD_CHOICE &WordChoice, FLOAT32 AdjustFactor, const float Certainties[]); + /// Dumps a text representation of the specified Choice to File. void PrintViableChoice(FILE *File, const char *Label, VIABLE_CHOICE Choice); - int StringSameAs(const char *String, - const char *String_lengths, - VIABLE_CHOICE ViableChoice); + /// Compares unichar ids in word_choice to those in viable_choice, + /// returns true if they are the same. bool StringSameAs(const WERD_CHOICE &WordChoice, VIABLE_CHOICE ViableChoice); - int AcceptableChoice(BLOB_CHOICE_LIST_VECTOR *Choices, - WERD_CHOICE *BestChoice, - const WERD_CHOICE &RawChoice, - DANGERR *fixpt, - ACCEPTABLE_CHOICE_CALLER caller, - bool *modified_blobs); - int AcceptableResult(const WERD_CHOICE &BestChoice, - const WERD_CHOICE &RawChoice); + /// Compares String to ViableChoice and returns true if they are the same. + bool StringSameAs(const char *String, + const char *String_lengths, + VIABLE_CHOICE ViableChoice); + /// Returns true if the certainty of the BestChoice word is within a + /// reasonable range of the average certainties for the best choices for + /// each character in the segmentation. This test is used to catch words + /// in which one character is much worse than the other characters in the + /// word (i.e. false will be returned in that case). The algorithm computes + /// the mean and std deviation of the certainties in the word with the worst + /// certainty thrown out. + int UniformCertainties(const BLOB_CHOICE_LIST_VECTOR &Choices, + const WERD_CHOICE &BestChoice); + /// Returns true if the given best_choice is good enough to stop. + bool AcceptableChoice(BLOB_CHOICE_LIST_VECTOR *Choices, + WERD_CHOICE *BestChoice, + DANGERR *fixpt, + ACCEPTABLE_CHOICE_CALLER caller, + bool *modified_blobs); + /// Returns false if the best choice for the current word is questionable + /// and should be tried again on the second pass or should be flagged to + /// the user. + bool AcceptableResult(const WERD_CHOICE &BestChoice); + /// Compares the corresponding strings of WordChoice and ViableChoice and + /// returns true if they are the same. int ChoiceSameAs(const WERD_CHOICE &WordChoice, VIABLE_CHOICE ViableChoice); - void LogNewChoice(const WERD_CHOICE &WordChoice, FLOAT32 AdjustFactor, - const float Certainties[], bool raw_choice); + /// Adds Choice to ChoicesList if the adjusted certainty for Choice is within + /// a reasonable range of the best choice in ChoicesList. The ChoicesList list + /// is kept in sorted order by rating. Duplicates are removed. + /// WordChoice is the new choice for current word. + /// AdjustFactor is an adjustment factor which was applied to choice. + /// Certainties are certainties for each char in new choice. + /// raw_choice indicates whether WordChoice is a raw or best choice. + void LogNewChoice(FLOAT32 AdjustFactor, const float Certainties[], + bool raw_choice, WERD_CHOICE *WordChoice); void EndDangerousAmbigs(); - int CurrentBestChoiceIs(const WERD_CHOICE &WordChoice); + /// Returns true if WordChoice is the same as the current best choice. + bool CurrentBestChoiceIs(const WERD_CHOICE &WordChoice); + /// Returns the adjustment factor for the best choice for the current word. FLOAT32 CurrentBestChoiceAdjustFactor(); - int CurrentWordAmbig(); + /// Returns true if there are multiple good choices for the current word. + bool CurrentWordAmbig(); + /// Prints the current choices for this word to stdout. void DebugWordChoices(); + /// Print all the choices in raw_choices_ list for non 1-1 ambiguities. void PrintAmbigAlternatives(FILE *file, const char *label, int label_num_unichars); + /// Fill ViableChoice with information from WordChoice, AChoice, AdjustFactor, + /// and Certainties. void FillViableChoice(const WERD_CHOICE &WordChoice, FLOAT32 AdjustFactor, const float Certainties[], bool SameString, VIABLE_CHOICE ViableChoice); - int AlternativeChoicesWorseThan(FLOAT32 Threshold); + /// Returns true if there are no alternative choices for the current word + /// or if all alternatives have an adjust factor worse than Threshold. + bool AlternativeChoicesWorseThan(FLOAT32 Threshold); + /// Removes from best_choices_ all choices which are not within a reasonable + /// range of the best choice. void FilterWordChoices(); + /// Compares the best choice for the current word to the best raw choice + /// to determine which characters were classified incorrectly by the + /// classifier. Then places a separate threshold into Thresholds for each + /// character in the word. If the classifier was correct, MaxRating is placed + /// into Thresholds. If the classifier was incorrect, the avg. match rating + /// (error percentage) of the classifier's incorrect choice minus some margin + /// is placed into thresholds.This can then be used by the caller to try to + /// create a new template for the desired class that will classify the + /// character with a rating better than the threshold value. The match rating + /// placed into Thresholds is never allowed to be below MinRating in order to + /// prevent trying to make overly tight templates. + /// MinRating limits how tight to make a template. + /// MaxRating limits how loose to make a template. + /// RatingMargin denotes the amount of margin to put in template. void FindClassifierErrors(FLOAT32 MinRating, - FLOAT32 MaxRating, - FLOAT32 RatingMargin, - FLOAT32 Thresholds[]); + FLOAT32 MaxRating, + FLOAT32 RatingMargin, + FLOAT32 Thresholds[]); + /// Initializes the data structures used to keep track the good word choices + /// found for a word. void InitChoiceAccum(); + /// Clears best_choices_ list accumulated by the stopper. + void ClearBestChoiceAccum(); + /// Updates the blob widths in current_segmentation_ to be the same as + /// provided in BlobWidth. BlobWidth[] contains the number of chunks in each + /// blob in the current segmentation. void LogNewSegmentation(PIECES_STATE BlobWidth); + /// Given Blob (the index of the blob that was split), adds 1 chunk to the + /// specified blob for each choice in best_choices_ and for best_raw_choice_. void LogNewSplit(int Blob); + /// Increments the chunk count of the character in Choice which corresponds + /// to Blob (index of the blob being split). + void AddNewChunk(VIABLE_CHOICE Choice, int Blob); + /// Sets up stopper variables in preparation for the first pass. void SettupStopperPass1(); + /// Sets up stopper variables in preparation for the second pass. void SettupStopperPass2(); - /* choices.cpp *************************************************************/ - void print_word_string(const char* str); - void print_word_choice(const char *label, A_CHOICE* choice); - void print_choices(const char *label, - CHOICES rating); // List of (A_CHOICE*). + /* context.cpp *************************************************************/ + /// Check a string to see if it matches a set of lexical rules. + int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset); + /// Returns true if the word looks like an absolute garbage + /// (e.g. image mistakenly recognized as text). + bool absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharset); /* permngram.cpp ***********************************************************/ - A_CHOICE *ngram_permute_and_select(CHOICES_LIST char_choices, - float rating_limit, - const Dawg *dawg); - /* dawg.cpp ****************************************************************/ + WERD_CHOICE *ngram_permute_and_select( + const BLOB_CHOICE_LIST_VECTOR &char_choices, + float rating_limit, float adjust_factor); + float compute_ngram_cost(float certainty, float denom, + const char *str, const char *context, + const UNICHARSET &unicharset); + + /* dict.cpp ****************************************************************/ + + /// Initialize Dict class - load dawgs from [lang].traineddata and + /// user-specified wordlist and parttern list. + void Load(); + void End(); /** * Returns the maximal permuter code (from ccstruct/ratngs.h) if in light @@ -394,24 +528,77 @@ class Dict { * corresponding dawg index */ - int def_letter_is_okay(void* void_dawg_args, int word_index, - const void* word, bool word_end); + // + int def_letter_is_okay(void* void_dawg_args, + UNICHAR_ID unichar_id, bool word_end); + + int new_letter_is_okay(void* void_dawg_args, + UNICHAR_ID unichar_id, bool word_end); + int (Dict::*letter_is_okay_)(void* void_dawg_args, + UNICHAR_ID unichar_id, bool word_end); + /// Calls letter_is_okay_ member function. + int LetterIsOkay(void* void_dawg_args, + UNICHAR_ID unichar_id, bool word_end) { + return (this->*letter_is_okay_)(void_dawg_args, unichar_id, word_end); + } + - int new_letter_is_okay(void* void_dawg_args, int word_index, - const void* word, bool word_end); - int (Dict::*letter_is_okay_)(void* void_dawg_args, int word_index, - const void *word, bool word_end); - /** Return the number of dawgs in the dawgs_ vector. */ + /// Probability in context function used by the ngram permuter. + double (Dict::*probability_in_context_)(const char* context, + int context_bytes, + const char* character, + int character_bytes); + /// Calls probability_in_context_ member function. + double ProbabilityInContext(const char* context, + int context_bytes, + const char* character, + int character_bytes) { + return (this->*probability_in_context_)(context, context_bytes, + character, character_bytes); + } + + /// Default (no-op) implementation of probability in context function. + double def_probability_in_context( + const char* context, int context_bytes, + const char* character, int character_bytes) { + (void) context; + (void) context_bytes; + (void) character; + (void) character_bytes; + return 0.0; + } + double ngram_probability_in_context(const char* context, + int context_bytes, + const char* character, + int character_bytes); + + /// Return the number of dawgs in the dawgs_ vector. inline const int NumDawgs() const { return dawgs_.size(); } - /** Return i-th dawg pointer recorded in the dawgs_ vector. */ + /// Return i-th dawg pointer recorded in the dawgs_ vector. inline const Dawg *GetDawg(int index) const { return dawgs_[index]; } - /** - * At word ending make sure all the recorded constraints are satisfied. - * Each constraint signifies that we found a beginning pattern in a - * pattern dawg. Check that this pattern can end here (e.g. if some - * leading punctuation is found this would ensure that we are not - * expecting any particular trailing punctuation after the word). - */ + /// Return the points to the punctuation dawg. + inline const Dawg *GetPuncDawg() const { return punc_dawg_; } + /// Return the pointer to the Dawg that contains words of length word_length. + inline const Dawg *GetFixedLengthDawg(int word_length) const { + if (word_length > max_fixed_length_dawgs_wdlen_) return NULL; + assert(dawgs_.size() > word_length); + return dawgs_[word_length]; + } + inline const int GetMaxFixedLengthDawgIndex() const { + return max_fixed_length_dawgs_wdlen_; + } + /// Returns the appropriate next node given the EDGE_REF. + static inline NODE_REF GetStartingNode(const Dawg *dawg, EDGE_REF edge_ref) { + if (edge_ref == NO_EDGE) return 0; // beginning to explore the dawg + NODE_REF node = dawg->next_node(edge_ref); + if (node == 0) node = NO_EDGE; // end of word + return node; + } + /// At word ending make sure all the recorded constraints are satisfied. + /// Each constraint signifies that we found a beginning pattern in a + /// pattern dawg. Check that this pattern can end here (e.g. if some + /// leading punctuation is found this would ensure that we are not + /// expecting any particular trailing punctuation after the word). inline bool ConstraintsOk(const DawgInfoVector &constraints, int word_end, DawgType current_dawg_type) { if (!word_end) return true; @@ -429,21 +616,78 @@ class Dict { } return true; } - /// Record the maximum of the two permuters in permuter. - static inline void UpdatePermuter(PermuterType new_permuter, - PermuterType *permuter) { - if (dawg_debug_level >= 3) tprintf("Letter found\n"); - if (new_permuter > *permuter) *permuter = new_permuter; - } - /* conversion.cpp **********************************************************/ - // TODO(daria): remove these function when conversion.cpp is deprecated - // and all the code is converted to work with unichar ids. - void LogNewWordChoice(A_CHOICE *a_choice, - FLOAT32 adjust_factor, - const float certainties[], - const UNICHARSET &unicharset); - int valid_word(const char *string); + /// For each of the character classes of the given unichar_id (and the + /// unichar_id itself) finds the corresponding outgoing node or self-loop + /// in the given dawg and (after checking that it is valid) records it in + /// dawg_args->updated_ative_dawgs. Updates current_permuter if any valid + /// edges were found. + void ProcessPatternEdges(const Dawg *dawg, const DawgInfo &info, + UNICHAR_ID unichar_id, bool word_end, + DawgArgs *dawg_args, PermuterType *current_permuter); + + /// Read/Write/Access special purpose dawgs which contain words + /// only of a certain length (used for phrase search for + /// non-space-delimited languages). + + /// Reads a sequence of dawgs from the given file. + /// Appends the constructed dawgs to the given dawg_vec. + /// Fills the given table with indices of the dawgs in the + /// dawg_vec corresponding to the dawgs with words + /// of a particular length. + static void ReadFixedLengthDawgs(DawgType type, const STRING &lang, + PermuterType perm, int debug_level, + FILE *file, DawgVector *dawg_vec, + int *max_wdlen); + /// Writes the dawgs in the dawgs_vec to a file. Updates the given table with + /// the indices of dawgs in the dawg_vec for the corresponding word lengths. + static void WriteFixedLengthDawgs( + const GenericVector &dawg_vec, + int num_dawgs, int debug_level, FILE *output_file); + + /// Check all the DAWGs to see if this word is in any of them. + inline bool valid_word_permuter(uinT8 perm, bool numbers_ok) { + return (perm == SYSTEM_DAWG_PERM || perm == FREQ_DAWG_PERM || + perm == DOC_DAWG_PERM || perm == USER_DAWG_PERM || + perm == USER_PATTERN_PERM || (numbers_ok && perm == NUMBER_PERM)); + } + int valid_word(const WERD_CHOICE &word, bool numbers_ok); + int valid_word(const WERD_CHOICE &word) { + return valid_word(word, false); // return NO_PERM for words with digits + } + int valid_word_or_number(const WERD_CHOICE &word) { + return valid_word(word, true); // return NUMBER_PERM for valid numbers + } + /// This function is used by api/tesseract_cube_combiner.cpp + int valid_word(const char *string) { + WERD_CHOICE word(string, getUnicharset()); + return valid_word(word); + } + /// Returns true if the word contains a valid punctuation pattern. + /// Note: Since the domains of punctuation symbols and symblos + /// used in numbers are not disjoint, a valid number might contain + /// an invalid punctuation pattern (e.g. .99). + bool valid_punctuation(const WERD_CHOICE &word); + /// Returns true if a good answer is found for the unknown blob rating. + int good_choice(const WERD_CHOICE &choice); + /// Adds a word found on this document to the document specific dictionary. + void add_document_word(const WERD_CHOICE &best_choice); + int get_top_word_script(const BLOB_CHOICE_LIST_VECTOR &char_choices, + const UNICHARSET &unicharset); + /// Adjusts the rating of the given word. + void adjust_word(WERD_CHOICE *word, float *certainty_array, + const BLOB_CHOICE_LIST_VECTOR *char_choices, + bool nonword, float additional_adjust, bool debug); + void adjust_word(WERD_CHOICE *word, float *certainty_array, bool debug) { + adjust_word(word, certainty_array, NULL, false, 0.0f, debug); + } + void adjust_non_word(WERD_CHOICE *word, float *certainty_array, bool debug) { + adjust_word(word, certainty_array, NULL, true, 0.0f, debug); + } + /// Set wordseg_rating_adjust_factor_ to the given value. + inline void SetWordsegRatingAdjustFactor(float f) { + wordseg_rating_adjust_factor_ = f; + } private: /** Private member variables. */ @@ -461,7 +705,7 @@ class Dict { * Flag used to disable accumulation of word choices * during compound word permutation. */ - BOOL8 keep_word_choices_; + bool keep_word_choices_; /** Additional certainty padding allowed before a word is rejected. */ FLOAT32 reject_offset_; /** Current word segmentation. */ @@ -479,13 +723,130 @@ class Dict { // Dawgs. DawgVector dawgs_; SuccessorListsVector successors_; - Dawg *freq_dawg_; Trie *pending_words_; /// The following pointers are only cached for convenience. /// The dawgs will be deleted when dawgs_ vector is destroyed. // TODO(daria): need to support multiple languages in the future, // so maybe will need to maintain a list of dawgs of each kind. + Dawg *freq_dawg_; + Dawg *punc_dawg_; Trie *document_words_; + /// Maximum word length of fixed-length word dawgs. + /// A value < 1 indicates that no fixed-length dawgs are loaded. + int max_fixed_length_dawgs_wdlen_; + /// Current segmentation cost adjust factor for word rating. + /// See comments in incorporate_segcost. + float wordseg_rating_adjust_factor_; + + public: + /// Variable members. + /// These have to be declared and initialized after image_ptr_, which contains + /// the pointer to the params vector - the member of its base CCUtil class. + STRING_VAR_H(user_words_suffix, "", "A list of user-provided words."); + STRING_VAR_H(user_patterns_suffix, "", + "A list of user-provided patterns."); + BOOL_VAR_H(load_system_dawg, true, "Load system word dawg."); + BOOL_VAR_H(load_freq_dawg, true, "Load frequent word dawg."); + BOOL_VAR_H(load_punc_dawg, true, + "Load dawg with punctuation patterns."); + BOOL_VAR_H(load_number_dawg, true, "Load dawg with number patterns."); + BOOL_VAR_H(load_fixed_length_dawgs, true, "Load fixed length" + " dawgs (e.g. for non-space delimited languages)"); + double_VAR_H(segment_penalty_dict_frequent_word, 1.0, + "Score multiplier for word matches which have good case and" + "are frequent in the given language (lower is better)."); + + double_VAR_H(segment_penalty_dict_case_ok, 1.1, + "Score multiplier for word matches that have good case " + "(lower is better)."); + + double_VAR_H(segment_penalty_dict_case_bad, 1.3125, + "Default score multiplier for word matches, which may have " + "case issues (lower is better)."); + + // TODO(daria): remove this param when ngram permuter is deprecated. + double_VAR_H(segment_penalty_ngram_best_choice, 1.24, + "Multipler to for the best choice from the ngram model."); + + double_VAR_H(segment_penalty_dict_nonword, 1.25, + "Score multiplier for glyph fragment segmentations which " + "do not match a dictionary word (lower is better)."); + + double_VAR_H(segment_penalty_garbage, 1.50, + "Score multiplier for poorly cased strings that are not in" + " the dictionary and generally look like garbage (lower is" + " better)."); + INT_VAR_H(dawg_debug_level, 0, "Set to 1 for general debug info" + ", to 2 for more details, to 3 to see all the debug messages"); + INT_VAR_H(hyphen_debug_level, 0, "Debug level for hyphenated words."); + INT_VAR_H(ngram_permuter_debug_level, 0, + "Debug level for the ngram permuter."); + double_VAR_H(ngram_permuter_nonmatch_score, -40.0, + "Average classifier score of a non-matching unichar."); + INT_VAR_H(max_viterbi_list_size, 10, "Maximum size of viterbi list."); + BOOL_VAR_H(use_only_first_uft8_step, false, + "Use only the first UTF8 step of the given string" + " when computing log probabilities."); + double_VAR_H(ngram_model_scale_factor, 1.0, "Relative strength of the" + " ngram model relative to the character classifier "); + double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor"); + double_VAR_H(stopper_nondict_certainty_base, -2.50, + "Certainty threshold for non-dict words"); + double_VAR_H(stopper_phase2_certainty_rejection_offset, 1.0, + "Reject certainty offset"); + INT_VAR_H(stopper_smallword_size, 2, + "Size of dict word to be treated as non-dict word"); + double_VAR_H(stopper_certainty_per_char, -0.50, + "Certainty to add for each dict char above small word size."); + double_VAR_H(stopper_allowable_character_badness, 3.0, + "Max certaintly variation allowed in a word (in sigma)"); + INT_VAR_H(stopper_debug_level, 0, "Stopper debug level"); + BOOL_VAR_H(stopper_no_acceptable_choices, false, + "Make AcceptableChoice() always return false. Useful" + " when there is a need to explore all segmentations"); + BOOL_VAR_H(save_raw_choices, false, "Save all explored raw choices"); + INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list"); + STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information" + " should be printed to stdout"); + STRING_VAR_H(word_to_debug_lengths, "", + "Lengths of unichars in word_to_debug"); + INT_VAR_H(fragments_debug, 0, "Debug character fragments"); + INT_VAR_H(segment_debug, 0, "Debug the whole segmentation process"); + BOOL_VAR_H(permute_debug, 0, "Debug char permutation process"); + double_VAR_H(bestrate_pruning_factor, 2.0, "Multiplying factor of" + " current best rate to prune other hypotheses"); + BOOL_VAR_H(permute_script_word, 0, + "Turn on word script consistency permuter"); + BOOL_VAR_H(segment_segcost_rating, 0, + "incorporate segmentation cost in word rating?"); + double_VAR_H(segment_reward_script, 0.95, + "Score multipler for script consistency within a word. " + "Being a 'reward' factor, it should be <= 1. " + "Smaller value implies bigger reward."); + BOOL_VAR_H(permute_fixed_length_dawg, 0, + "Turn on fixed-length phrasebook search permuter"); + BOOL_VAR_H(permute_chartype_word, 0, + "Turn on character type (property) consistency permuter"); + double_VAR_H(segment_reward_chartype, 0.97, + "Score multipler for char type consistency within a word. "); + // TODO(daria): remove this param when ngram permuter is deprecated. + double_VAR_H(segment_reward_ngram_best_choice, 0.99, + "Score multipler for ngram permuter's best choice" + " (only used in the Han script path)."); + BOOL_VAR_H(save_doc_words, 0, "Save Document Words"); + BOOL_VAR_H(doc_dict_enable, 1, "Enable Document Dictionary "); + double_VAR_H(doc_dict_pending_threshold, 0.0, + "Worst certainty for using pending dictionary"); + double_VAR_H(doc_dict_certainty_threshold, -2.25, "Worst certainty" + " for words that can be inserted into the document dictionary"); + BOOL_VAR_H(ngram_permuter_activated, false, + "Activate character-level n-gram-based permuter"); + INT_VAR_H(max_permuter_attempts, 10000, "Maximum number of different" + " character choices to consider during permutation." + " This limit is especially useful when user patterns" + " are specified, since overly generic patterns can result in" + " dawg search exploring an overly large number of options."); + BOOL_VAR_H(permute_only_top, false, "Run only the top choice permuter"); }; } // namespace tesseract diff --git a/dict/hyphen.cpp b/dict/hyphen.cpp index 31ee8d88f7..de5050f739 100644 --- a/dict/hyphen.cpp +++ b/dict/hyphen.cpp @@ -24,8 +24,6 @@ #include "dict.h" -INT_VAR(hyphen_debug_level, 0, "Debug level for hyphenated words."); - namespace tesseract { // Unless the previous word was the last one on the line, and the current diff --git a/dict/matchdefs.h b/dict/matchdefs.h index 0f375f9cf4..c2b321fe51 100644 --- a/dict/matchdefs.h +++ b/dict/matchdefs.h @@ -21,7 +21,7 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "general.h" +#include "host.h" #include #include "unichar.h" diff --git a/dict/permdawg.cpp b/dict/permdawg.cpp index 81d3a8eed6..1852c2e0dd 100644 --- a/dict/permdawg.cpp +++ b/dict/permdawg.cpp @@ -26,41 +26,20 @@ I n c l u d e s ----------------------------------------------------------------------*/ -#include "context.h" -#include "conversion.h" #include "cutil.h" #include "dawg.h" #include "freelist.h" #include "globals.h" #include "ndminx.h" -#include "permdawg.h" #include "permute.h" #include "stopper.h" -#include "tordvars.h" #include "tprintf.h" -#include "varable.h" +#include "params.h" #include #include "dict.h" #include "image.h" -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -BOOL_VAR(segment_dawg_debug, 0, "Debug mode for word segmentation"); - -double_VAR(segment_penalty_dict_case_bad, OK_WERD, - "Default score multiplier for word matches, which may have " - "case issues (lower is better)."); - -double_VAR(segment_penalty_dict_case_ok, GOOD_WERD, - "Score multiplier for word matches that have good case " - "(lower is better)."); - -double_VAR(segment_penalty_dict_frequent_word, FREQ_WERD, - "Score multiplier for word matches which have good case and are " - "frequent in the given language (lower is better)."); - /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ @@ -68,52 +47,6 @@ namespace tesseract { static const float kPermDawgRatingPad = 5.0; -/** - * @name adjust_word - * - * Assign an adjusted value to a string that is a word. The value - * that this word choice has is based on case and punctuation rules. - */ -void Dict::adjust_word(WERD_CHOICE *word, - float *certainty_array) { - float adjust_factor; - float new_rating = word->rating(); - - if (segment_dawg_debug) { - tprintf("Word: %s %4.2f ", - word->debug_string(getUnicharset()).string(), word->rating()); - } - - new_rating += RATING_PAD; - if (Context::case_ok(*word, getUnicharset())) { - if (freq_dawg_ != NULL && freq_dawg_->word_in_dawg(*word)) { - word->set_permuter(FREQ_DAWG_PERM); - new_rating *= segment_penalty_dict_frequent_word; - adjust_factor = segment_penalty_dict_frequent_word; - if (segment_dawg_debug) - tprintf(", F, %4.2f ", (double)segment_penalty_dict_frequent_word); - } else { - new_rating *= segment_penalty_dict_case_ok; - adjust_factor = segment_penalty_dict_case_ok; - if (segment_dawg_debug) - tprintf(", %4.2f ", (double)segment_penalty_dict_case_ok); - } - } else { - new_rating *= segment_penalty_dict_case_bad; - adjust_factor = segment_penalty_dict_case_bad; - if (segment_dawg_debug) { - tprintf(", C %4.2f ", (double)segment_penalty_dict_case_bad); - } - } - new_rating -= RATING_PAD; - word->set_rating(new_rating); - - LogNewChoice(*word, adjust_factor, certainty_array, false); - - if (segment_dawg_debug) - tprintf(" --> %4.2f\n", new_rating); -} - /** * @name go_deeper_dawg_fxn * @@ -133,15 +66,14 @@ void Dict::adjust_word(WERD_CHOICE *word, */ void Dict::go_deeper_dawg_fxn( const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - bool word_ending, WERD_CHOICE *word, float certainties[], - float *limit, WERD_CHOICE *best_choice, void *void_more_args) { + int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, + bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, + WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args) { DawgArgs *more_args = reinterpret_cast(void_more_args); + word_ending = (char_choice_index == more_args->end_char_choice_index); int word_index = word->length() - 1; - bool ambigs_mode = (*limit <= 0.0); - if (ambigs_mode) { + if (ambigs_mode(*limit)) { if (best_choice->rating() < *limit) return; } else { // Prune bad subwords @@ -151,7 +83,7 @@ void Dict::go_deeper_dawg_fxn( float permdawg_limit = more_args->rating_array[word_index] * more_args->rating_margin + kPermDawgRatingPad; if (permdawg_limit < word->rating()) { - if (segment_dawg_debug) { + if (permute_debug && dawg_debug_level) { tprintf("early pruned word rating=%4.2f," " permdawg_limit=%4.2f, word=%s\n", word->rating(), permdawg_limit, word->debug_string(getUnicharset()).string()); @@ -161,15 +93,27 @@ void Dict::go_deeper_dawg_fxn( } } // Deal with hyphens - if (word_ending && has_hyphen_end(*word) && !ambigs_mode) { - if (segment_dawg_debug) - tprintf("new hyphen choice = %s\n", - word->debug_string(getUnicharset()).string()); - word->set_permuter(more_args->permuter); - adjust_word(word, certainties); - set_hyphen_word(*word, *(more_args->active_dawgs), - *(more_args->constraints)); - update_best_choice(*word, best_choice); + if (word_ending && more_args->sought_word_length == kAnyWordLength && + has_hyphen_end(*word) && !ambigs_mode(*limit)) { + // Copy more_args->active_dawgs to clean_active_dawgs removing + // dawgs of type DAWG_TYPE_PATTERN. + DawgInfoVector clean_active_dawgs; + const DawgInfoVector &active_dawgs = *(more_args->active_dawgs); + for (int i = 0; i < active_dawgs.size(); ++i) { + if (dawgs_[active_dawgs[i].dawg_index]->type() != DAWG_TYPE_PATTERN) { + clean_active_dawgs += active_dawgs[i]; + } + } + if (clean_active_dawgs.size() > 0) { + if (permute_debug && dawg_debug_level) + tprintf("new hyphen choice = %s\n", + word->debug_string(getUnicharset()).string()); + word->set_permuter(more_args->permuter); + adjust_word(word, certainties, permute_debug); + set_hyphen_word(*word, *(more_args->active_dawgs), + *(more_args->constraints)); + update_best_choice(*word, best_choice); + } } else { // Look up char in DAWG // TODO(daria): update the rest of the code that specifies alternative // letter_is_okay_ functions (e.g. TessCharNgram class) to work with @@ -180,7 +124,7 @@ void Dict::go_deeper_dawg_fxn( UNICHAR_ID orig_uch_id = word->unichar_id(word_index); bool checked_unigrams = false; if (getUnicharset().get_isngram(orig_uch_id)) { - if (segment_dawg_debug) { + if (permute_debug && dawg_debug_level) { tprintf("checking unigrams in an ngram %s\n", getUnicharset().debug_str(orig_uch_id).string()); } @@ -196,10 +140,13 @@ void Dict::go_deeper_dawg_fxn( DawgInfoVector unigram_constraints = *(more_args->constraints); DawgInfoVector unigram_updated_active_dawgs; DawgInfoVector unigram_updated_constraints; - DawgArgs unigram_dawg_args(&unigram_active_dawgs, &unigram_constraints, + DawgArgs unigram_dawg_args(&unigram_active_dawgs, + &unigram_constraints, &unigram_updated_active_dawgs, - &unigram_updated_constraints, 0.0); - unigram_dawg_args.permuter = more_args->permuter; + &unigram_updated_constraints, 0.0, + more_args->permuter, + more_args->sought_word_length, + more_args->end_char_choice_index); // Check unigrams in the ngram with letter_is_okay(). while (unigrams_ok && ngram_ptr < ngram_str_end) { int step = getUnicharset().step(ngram_ptr); @@ -209,13 +156,14 @@ void Dict::go_deeper_dawg_fxn( ++num_unigrams; word->append_unichar_id(uch_id, 1, 0.0, 0.0); unigrams_ok = unigrams_ok && (this->*letter_is_okay_)( - &unigram_dawg_args, word_index+num_unigrams-1, word, + &unigram_dawg_args, + word->unichar_id(word_index+num_unigrams-1), word_ending && (ngram_ptr == ngram_str_end)); (*unigram_dawg_args.active_dawgs) = *(unigram_dawg_args.updated_active_dawgs); (*unigram_dawg_args.constraints) = *(unigram_dawg_args.updated_constraints); - if (segment_dawg_debug) { + if (permute_debug && dawg_debug_level) { tprintf("unigram %s is %s\n", getUnicharset().debug_str(uch_id).string(), unigrams_ok ? "OK" : "not OK"); @@ -235,26 +183,27 @@ void Dict::go_deeper_dawg_fxn( } } - // Check which dawgs from dawgs_ vector contain the word + // Check which dawgs from the dawgs_ vector contain the word // up to and including the current unichar. - if (checked_unigrams || - (this->*letter_is_okay_)(more_args, word_index, word, word_ending)) { + if (checked_unigrams || (this->*letter_is_okay_)( + more_args, word->unichar_id(word_index), word_ending)) { // Add a new word choice if (word_ending) { - if (segment_dawg_debug) { + if (permute_debug && dawg_debug_level) { tprintf("found word = %s\n", word->debug_string(getUnicharset()).string()); } WERD_CHOICE *adjusted_word = word; WERD_CHOICE hyphen_tail_word; - if (!ambigs_mode && hyphen_base_size() > 0) { + if (hyphen_base_size() > 0) { hyphen_tail_word = *word; remove_hyphen_head(&hyphen_tail_word); adjusted_word = &hyphen_tail_word; } adjusted_word->set_permuter(more_args->permuter); - if (!ambigs_mode) { - adjust_word(adjusted_word, &certainties[hyphen_base_size()]); + if (!ambigs_mode(*limit)) { + adjust_word(adjusted_word, &certainties[hyphen_base_size()], + permute_debug); } update_best_choice(*adjusted_word, best_choice); } else { // search the next letter @@ -267,7 +216,7 @@ void Dict::go_deeper_dawg_fxn( ++(more_args->constraints); permute_choices(debug, char_choices, char_choice_index + 1, prev_char_frag_info, word, certainties, limit, - best_choice, more_args); + best_choice, attempts_left, more_args); // Restore previous state to explore another letter in this position. --(more_args->updated_active_dawgs); --(more_args->updated_constraints); @@ -275,7 +224,7 @@ void Dict::go_deeper_dawg_fxn( --(more_args->constraints); } } else { - if (segment_dawg_debug) { + if (permute_debug && dawg_debug_level) { tprintf("last unichar not OK at index %d in %s\n", word_index, word->debug_string(getUnicharset()).string()); } @@ -290,22 +239,39 @@ void Dict::go_deeper_dawg_fxn( * the given char_choices. Use go_deeper_dawg_fxn() to search all the * dawgs in the dawgs_ vector in parallel and discard invalid words. * + * If sought_word_length is not kAnyWordLength, the function only searches + * for a valid word formed by the given char_choices in one fixed length + * dawg (that contains words of length sought_word_length) starting at the + * start_char_choice_index. + * * Allocate and return a WERD_CHOICE with the best valid word found. */ WERD_CHOICE *Dict::dawg_permute_and_select( - const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit) { + const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit, + int sought_word_length, int start_char_choice_index) { WERD_CHOICE *best_choice = new WERD_CHOICE(); best_choice->make_bad(); best_choice->set_rating(rating_limit); if (char_choices.length() == 0) return best_choice; DawgInfoVector *active_dawgs = new DawgInfoVector[char_choices.length() + 1]; DawgInfoVector *constraints = new DawgInfoVector[char_choices.length() + 1]; - init_active_dawgs(&(active_dawgs[0])); + init_active_dawgs(sought_word_length, &(active_dawgs[0]), + ambigs_mode(rating_limit)); init_constraints(&(constraints[0])); + int end_char_choice_index = (sought_word_length == kAnyWordLength) ? + char_choices.length()-1 : start_char_choice_index+sought_word_length-1; + // Need to skip accumulating word choices if we are only searching a part of + // the word (e.g. for the phrase search in non-space delimited languages). + // Also need to skip accumulating choices if char_choices are expanded + // with ambiguities. + bool re_enable_choice_accum = ChoiceAccumEnabled(); + if (sought_word_length != kAnyWordLength || + ambigs_mode(rating_limit)) DisableChoiceAccum(); DawgArgs dawg_args(&(active_dawgs[0]), &(constraints[0]), &(active_dawgs[1]), &(constraints[1]), (segment_penalty_dict_case_bad / - segment_penalty_dict_case_ok)); + segment_penalty_dict_case_ok), + NO_PERM, sought_word_length, end_char_choice_index); WERD_CHOICE word(MAX_WERD_LENGTH); copy_hyphen_info(&word); // Discard rating and certainty of the hyphen base (if any). @@ -318,57 +284,16 @@ WERD_CHOICE *Dict::dawg_permute_and_select( } float certainties[MAX_WERD_LENGTH]; this->go_deeper_fxn_ = &tesseract::Dict::go_deeper_dawg_fxn; - permute_choices(segment_dawg_debug ? "segment_dawg_debug" : NULL, - char_choices, 0, NULL, &word, certainties, - &rating_limit, best_choice, &dawg_args); + int attempts_left = max_permuter_attempts; + permute_choices((permute_debug && dawg_debug_level) ? + "permute_dawg_debug" : NULL, + char_choices, start_char_choice_index, NULL, &word, + certainties, &rating_limit, best_choice, &attempts_left, + &dawg_args); delete[] active_dawgs; delete[] constraints; + if (re_enable_choice_accum) EnableChoiceAccum(); return best_choice; } -/** - * Fill the given active_dawgs vector with dawgs that could contain the - * beginning of the word. If hyphenated() returns true, copy the entries - * from hyphen_active_dawgs_ instead. - */ -void Dict::init_active_dawgs(DawgInfoVector *active_dawgs) { - int i; - if (hyphenated()) { - *active_dawgs = hyphen_active_dawgs_; - if (dawg_debug_level >= 3) { - for (i = 0; i < hyphen_active_dawgs_.size(); ++i) { - tprintf("Adding hyphen beginning dawg [%d, " REFFORMAT "]\n", - hyphen_active_dawgs_[i].dawg_index, - hyphen_active_dawgs_[i].ref); - } - } - } else { - for (i = 0; i < dawgs_.length(); ++i) { - if (kBeginningDawgsType[(dawgs_[i])->type()]) { - *active_dawgs += DawgInfo(i, NO_EDGE); - if (dawg_debug_level >= 3) { - tprintf("Adding beginning dawg [%d, " REFFORMAT "]\n", i, NO_EDGE); - } - } - } - } -} - -/** - * If hyphenated() returns true, copy the entries from hyphen_constraints_ - * into the given constraints vector. - */ -void Dict::init_constraints(DawgInfoVector *constraints) { - if (hyphenated()) { - *constraints = hyphen_constraints_; - if (dawg_debug_level >= 3) { - for (int i = 0; i < hyphen_constraints_.size(); ++i) { - tprintf("Adding hyphen constraint [%d, " REFFORMAT "]\n", - hyphen_constraints_[i].dawg_index, - hyphen_constraints_[i].ref); - } - } - } -} - } // namespace tesseract diff --git a/dict/permdawg.h b/dict/permdawg.h deleted file mode 100644 index c770fefced..0000000000 --- a/dict/permdawg.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: permdawg.h (Formerly permdawg.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Mon May 20 16:45:29 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef PERMDAWG_H -#define PERMDAWG_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "dawg.h" -#include "choices.h" -#include "choicearr.h" -#include "varable.h" -#include "permute.h" - -/*--------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -extern BOOL_VAR_H(segment_dawg_debug, 0, "Debug mode for word segmentation"); - -extern double_VAR_H(segment_penalty_dict_case_bad, 1.3125, - "Default score multiplier for word matches, which may have case or " - "punctuation issues (lower is better)."); - -extern double_VAR_H(segment_penalty_dict_case_ok, 1.1, - "Score multiplier for word matches that have good case " - "(lower is better)."); - -extern double_VAR_H(segment_penalty_dict_frequent_word, 1.0, - "Score multiplier for word matches which have good case and are " - "frequent in the given language (lower is better)."); - - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------*/ - -#endif diff --git a/dict/permngram.cpp b/dict/permngram.cpp index f7ef248313..20a0ff3350 100644 --- a/dict/permngram.cpp +++ b/dict/permngram.cpp @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////// // File: permngram.cpp // Description: Character n-gram permuter -// Author: Thomas Kielbus +// Author: Thomas Kielbus, modified by Daria Antonova // Created: Wed Sep 12 11:26:43 PDT 2007 // // (C) Copyright 2007, Google Inc. @@ -17,357 +17,307 @@ // /////////////////////////////////////////////////////////////////////// -#include "const.h" #include "permngram.h" -#include "permute.h" -#include "dawg.h" -#include "tordvars.h" -#include "stopper.h" -#include "globals.h" -#include "context.h" -#include "ndminx.h" + #include "dict.h" -#include "conversion.h" +#include "elst.h" +#include "genericvector.h" +#include "host.h" +#include "strngs.h" +#include "unichar.h" +#include "params.h" #include #include -// Ratio to control the relative importance of the classifier and the ngram -// in the final score of a classification unit. Must be >= 0 and <= 1. -// A value of 1.0 uses only the shape classifier score. -// A value of 0.0 uses only the ngram score. -double_VAR(classifier_score_ngram_score_ratio, - 0.7, - ""); - -// Rating adjustment multiplier for words not in the DAWG. Must be >= 1. -double_VAR(non_dawg_prefix_rating_adjustment, - 1.5, - ""); - -// HypothesisPrefix represents a word prefix during the search of the -// character-level n-gram model based permuter. -// It holds the data needed to create the corresponding A_CHOICE. -// Note that the string stored in the _word data member always begin with a -// space character. This is used by the n-gram model to score the word. -// HypothesisPrefix also contains the node in the DAWG that is reached when -// searching for the corresponding prefix. -class HypothesisPrefix { - public: - HypothesisPrefix(); - HypothesisPrefix(const HypothesisPrefix& prefix, - A_CHOICE* choice, - bool end_of_word, - const tesseract::Dawg *dawg, - tesseract::Dict* dict); - - double rating() const {return rating_;} - double certainty() const {return certainty_;} - const char* word() const {return word_;} - const char* unichar_lengths() const {return unichar_lengths_;} - const float* certainty_array() const {return certainty_array_;} - bool is_dawg_prefix() const {return is_dawg_prefix_;} - NODE_REF dawg_node() const {return dawg_node_;} - - private: - double rating_; - double certainty_; - char word_[UNICHAR_LEN * MAX_WERD_LENGTH + 2]; - char unichar_lengths_[MAX_WERD_LENGTH + 1]; - float certainty_array_[MAX_WERD_LENGTH + 1]; - NODE_REF dawg_node_; - bool is_dawg_prefix_; -}; - -// HypothesisPrefix is the class used as nodes in HypothesisPrefixLists -typedef HypothesisPrefix HypothesisPrefixListNode; - -// HypothesisPrefixList maintains a sorted list of HypothesisPrefixes. The size -// is bounded by the argument given to the constructor. -// For the sake of simplicity, current implementation is not as efficient as it -// could be. The list is represented by a static array of pointers to its -// elements. All nodes are stored in positions from 0 to (size() - 1). -class HypothesisPrefixList { - public: - HypothesisPrefixList(int size_bound); - ~HypothesisPrefixList(); - - void add_node(HypothesisPrefix* node); - int size() const {return _size;} - void clear(); - const HypothesisPrefix& node(int index) {return *_list_nodes[index];} +ELISTIZE(ViterbiEntry) - private: - HypothesisPrefix** _list_nodes; - int _size_bound; - int _size; -}; +// Permutes the given char_choices using a character level n-gram model and +// returns the best word choice found. +// This is performed with the help of Viterbi algorithm for finding the most +// likely string of all the possible ones represented by char_choices. +// The transition probabilities are given by the ngram model and are obtained +// by calling ProbabilityInContext() function. The emission probabilities are +// given by the normalized certainties supplied by tesseract character +// classifier. They are not true probabilities, but more or less depict +// the probability that the character choice is correct. +// All the computation is performed in log-space to avoid underflow. -// Return the classifier_score_ngram_score_ratio for a given choice string. -// The classification decision for characters like comma and period should -// be based only on shape rather than on shape and n-gram score. -// Return 1.0 for them, the default classifier_score_ngram_score_ratio -// otherwise. -static double get_classifier_score_ngram_score_ratio(const char* choice); - -// Permute the given char_choices using a character level n-gram model and -// return the best word choice found. -// This is performed by maintaining a HypothesisPrefixList of HypothesisPrefixes. -// For each character position, each possible character choice is appended to -// the best current prefixes to create the list of best prefixes at the next -// character position. namespace tesseract { -A_CHOICE *Dict::ngram_permute_and_select(CHOICES_LIST char_choices, - float rating_limit, - const Dawg *dawg) { - if (array_count (char_choices) <= MAX_WERD_LENGTH) { - CHOICES choices; - int char_index_max = array_count(char_choices); - HypothesisPrefixList list_1(20); - HypothesisPrefixList list_2(20); - HypothesisPrefixList* current_list = &list_1; - HypothesisPrefixList* next_list = &list_2; - HypothesisPrefix* initial_node = new HypothesisPrefix(); - current_list->add_node(initial_node); - for (int char_index = 0; char_index < char_index_max; ++char_index) { - iterate_list(choices, (CHOICES) array_index(char_choices, char_index)) { - A_CHOICE* choice = (A_CHOICE *) first_node(choices); - for (int node_index = 0; - node_index < current_list->size(); - ++node_index) { - // Append this choice to the current node - HypothesisPrefix* new_node = new HypothesisPrefix( - current_list->node(node_index), - choice, - char_index == char_index_max - 1, - dawg, this); - next_list->add_node(new_node); - } - } - // Clear current list and switch lists - current_list->clear(); - HypothesisPrefixList* temp_list = current_list; - current_list = next_list; - next_list = temp_list; - // Give up if the current best rating is worse than rating_limit - if (current_list->node(0).rating() > rating_limit) - return new_choice (NULL, NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM); +static const float kSmallProbability = 0.0000001; + +// Computes -(log(prob(classifier)) + log(prob(ngram model))) +// for the given string (str_ptr) in the given context. +// If there are multiple unichars at one position - take the average of their +// probabilities. UNICHAR::utf8_step() is used to separate out individual +// UTF8 characters, since probability_in_context() can only handle one at +// a time (while unicharset might contain ngrams and glyphs composed from +// multiple UTF8 characters). +float Dict::compute_ngram_cost(float certainty, float denom, + const char *str, const char *context, + const UNICHARSET &unicharset) { + const char *context_ptr = context; + char *modified_context = NULL; + char *modified_context_end = NULL; + const char *str_ptr = str; + const char *str_end = str_ptr + strlen(str_ptr); + float prob = 0.0; + int num = 0; + int step = 0; + while (str_ptr < str_end && (step = UNICHAR::utf8_step(str_ptr))) { + if (ngram_permuter_debug_level > 1) { + tprintf("prob(%s | %s)=%g\n", str_ptr, context_ptr, + (this->*probability_in_context_)(context_ptr, -1, str_ptr, step)); + } + prob += (this->*probability_in_context_)(context_ptr, -1, str_ptr, step); + ++num; + if (use_only_first_uft8_step) break; + str_ptr += step; + // If there are multiple characters present in str, context is + // updated to include the previously examined characters from str, + // unless use_only_first_uft8_step is true. + if (str_ptr < str_end) { + if (modified_context == NULL) { + int context_len = strlen(context); + modified_context = new char[context_len + strlen(str_ptr) + step + 1]; + strcpy(modified_context, context); + modified_context_end = modified_context + context_len; + context_ptr = modified_context; + } + strncpy(modified_context_end, str_ptr - step, step); + modified_context_end += step; + *modified_context_end = '\0'; } - const HypothesisPrefix& best_word = current_list->node(0); - A_CHOICE* best_choice = new_choice (best_word.word() + 1, - best_word.unichar_lengths(), - best_word.rating(), - best_word.certainty(), -1, - valid_word(best_word.word() + 1) ? - SYSTEM_DAWG_PERM : TOP_CHOICE_PERM); - LogNewWordChoice(best_choice, best_word.is_dawg_prefix() ? - 1.0 : non_dawg_prefix_rating_adjustment, - const_cast(best_word.certainty_array()), - getUnicharset()); - return best_choice; - } else { - return new_choice (NULL, NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM); } -} -} // namespace tesseract - -double get_classifier_score_ngram_score_ratio(const char* choice) { - if (!strcmp(",", choice) || - !strcmp(".", choice)) - return 1.0; - else - return classifier_score_ngram_score_ratio; -} - -// Initial HypothesisPrefix constructor used to create the first state of the -// search. -HypothesisPrefix::HypothesisPrefix() { - rating_ = 0; - certainty_ = MAXFLOAT; - strcpy(word_, " "); - unichar_lengths_[0] = '\0'; - dawg_node_ = 0; - is_dawg_prefix_ = true; + if (modified_context != NULL) delete[] modified_context; + if (prob < kSmallProbability) prob = kSmallProbability; + float cost = + -(log(-1.0 * certainty / denom) + + ngram_model_scale_factor * log(prob / static_cast(num))); + if (ngram_permuter_debug_level > 1) { + tprintf("-log p(%s | %s)=%g\n", str, context_ptr, cost); + } + return cost; } -// Main constructor to create a new HypothesisPrefix by appending a character -// choice (A_CHOICE) to an existing HypothesisPrefix. This constructor takes -// care of copying the original prefix's data members, appends the character -// choice to the word and updates its rating using a character-level n-gram -// model. The state in the DAWG is also updated. -HypothesisPrefix::HypothesisPrefix(const HypothesisPrefix& prefix, - A_CHOICE* choice, - bool end_of_word, - const tesseract::Dawg *dawg, - tesseract::Dict* dict) { - char* word_ptr = word_; - const char* prefix_word_ptr = prefix.word_; - - // Copy first space character - *(word_ptr++) = *(prefix_word_ptr++); - - // Copy existing word, unichar_lengths, certainty_array - int char_index; - for (char_index = 0; - prefix.unichar_lengths_[char_index] != '\0'; - ++char_index) { - for (int char_subindex = 0; - char_subindex < prefix.unichar_lengths_[char_index]; - ++char_subindex) { - *(word_ptr++) = *(prefix_word_ptr++); +WERD_CHOICE *Dict::ngram_permute_and_select( + const BLOB_CHOICE_LIST_VECTOR &char_choices, + float rating_limit, + float adjust_factor) { + if (char_choices.length() > MAX_WERD_LENGTH || + char_choices.length() <= 1) return NULL; + const UNICHARSET &unicharset = getUnicharset(); + if (ngram_permuter_debug_level > 1) { + tprintf("Starting ngram permuter\n"); + } + // Make sure there are no NULL choices and no empty choices lists. + // Compute the size of the largest choices list. + // Compute the normalization factors for the classifier confidences. + int i; + BLOB_CHOICE_IT bit; + int max_num_choices = 0; + float denoms[MAX_WERD_LENGTH]; + float denom; + for (i = 0; i < char_choices.length(); ++i) { + bit.set_to_list(char_choices[i]); + int len = 0; + denom = 0.0; + for (bit.mark_cycle_pt(); !bit.cycled_list(); bit.forward()) { + if (bit.data() == NULL) return NULL; + ++len; + denom += -1.0 / bit.data()->certainty(); } - unichar_lengths_[char_index] = prefix.unichar_lengths_[char_index]; - certainty_array_[char_index] = prefix.certainty_array_[char_index]; + if (len == 0) return NULL; + // The ideal situation would be to have the classifier scores for + // classifying each position as each of the characters in the unicharset. + // Since we can not do this because of speed, we add a very crude estimate + // of what these scores for the "missing" classifications would sum up to. + denom += (unicharset.size() - len) * (-1.0 / ngram_permuter_nonmatch_score); + denoms[i] = denom; + if (max_num_choices < len) max_num_choices = len; } - - // If choice is empty, use a space character instead - const char* class_string_choice = *class_string(choice) == '\0' ? - " " : class_string(choice); - - // Update certainty - certainty_ = MIN(prefix.certainty_, class_certainty(choice)); - - // Apprend choice to the word - strcpy(word_ptr, class_string_choice); - unichar_lengths_[char_index] = strlen(class_string_choice); - unichar_lengths_[char_index + 1] = '\0'; - - // Append choice certainty to the certainty array - certainty_array_[char_index] = class_certainty(choice); - - // Copy DAWG node state - dawg_node_ = prefix.dawg_node_; - is_dawg_prefix_ = prefix.is_dawg_prefix_; - - // Verify DAWG and update dawg_node_ if the current prefix is already valid - if (is_dawg_prefix_) { - for (int char_subindex = 0; - class_string_choice[char_subindex] != '\0'; - ++char_subindex) { - - // TODO(daria): update this code (and the rest of ngram permuter code - // to deal with unichar ids, make use of the new parallel dawg search - // and use WERD_CHOICE, BLOB_CHOICE_LIST_VECTOR instead of the deprecated - // A_CHOICE. - tprintf("Error: ngram permuter functionality is not available\n"); - exit(1); - - // Verify each byte of the appended character. Note that word_ptr points - // to the first byte so (word_ptr - (word_ + 1)) is the index of the first - // new byte in the string that starts at (word_ + 1). - /* - int current_byte_index = word_ptr - (word_ + 1) + char_subindex; - if (!(dict->*dict->letter_is_okay_)( - dawg, &dawg_node_, current_byte_index, word_ + 1, - end_of_word && class_string_choice[char_subindex + 1] == '\0')) { - dawg_node_ = NO_EDGE; - is_dawg_prefix_ = false; - break; + // Compute the best string for each position of the char_chocies. + ViterbiEntry_LIST vlist1; + ViterbiEntry_LIST vlist2; + ViterbiEntry_LIST *tmp_vlist_ptr; + ViterbiEntry_LIST *curr_vlist_ptr = &vlist1; + ViterbiEntry_LIST *prev_vlist_ptr = &vlist2; + ViterbiEntry *ve = NULL; + for (i = 0; i < char_choices.length(); ++i) { + bit.set_to_list(char_choices[i]); + if (ngram_permuter_debug_level > 1) { + tprintf("Exploring char list of size %d at position %d\n", + char_choices[i]->length(), i); + } + denom = denoms[i]; + int curr_vlist_len = 0; + float max_cost = 0; + for (bit.mark_cycle_pt(); !bit.cycled_list(); bit.forward()) { + BLOB_CHOICE *b = bit.data(); + const CHAR_FRAGMENT *frag = unicharset.get_fragment(b->unichar_id()); + const char *curr_unichar = (frag != NULL) ? frag->get_unichar() : + unicharset.id_to_unichar(b->unichar_id()); + if (i == 0) { // initialize Viterbi entries for the 1st position + if (frag != NULL && !frag->is_beginning()) continue; + ve = new ViterbiEntry(); + ve->string_so_far[0] = '\0'; + ve->num_unichars = 0; + if (frag == NULL) ve->UpdateChars(curr_unichar, 1, b->unichar_id()); + ve->frag = frag; + // TODO(daria): check if we need to pass " " instead of "". + // TODO(daria): see if we can use the previous word. + ve->cost = compute_ngram_cost(b->certainty(), denom, + curr_unichar, "", unicharset); + ve->ratings_sum = b->rating(); + curr_vlist_ptr->add_sorted(ViterbiEntry::compare, false, ve); + if (ngram_permuter_debug_level > 1) ve->Print(); + curr_vlist_len++; + } else { + ViterbiEntry_IT prev_vlist_it = prev_vlist_ptr; + for (prev_vlist_it.mark_cycle_pt(); !prev_vlist_it.cycled_list(); + prev_vlist_it.forward()) { + const ViterbiEntry &prev_ve = *(prev_vlist_it.data()); + // Drop this entry if it's ratings_sum * adjust_factor will be + // larger than rating_limit. + if ((prev_ve.ratings_sum + b->rating()) * adjust_factor > + rating_limit) { + if (ngram_permuter_debug_level > 1) { + tprintf("Discarded expensive ViterbiEntry with rating %g*%g" + ", last unichar %s, rating_limit=%g\n", + prev_ve.ratings_sum + b->rating(), + unicharset.id_to_unichar(b->unichar_id()), + adjust_factor, rating_limit); + } + continue; + } + + // If we are dealing with fragments make sure that the previous + // fragment matches the current one. + // For character composed from n fragments probability_in_context() + // is called n times total. + if (prev_ve.frag != NULL || frag != NULL) { + CHAR_FRAGMENT_INFO prev_char_frag_info; + prev_char_frag_info.fragment = prev_ve.frag; + CHAR_FRAGMENT_INFO char_frag_info; + if (!fragment_state_okay( + b->unichar_id(), 0.0, 0.0, &prev_char_frag_info, + (ngram_permuter_debug_level > 1) ? "ngram_debug" : NULL, + i+1 == char_choices.length(), &char_frag_info)) continue; + } + float cost = prev_ve.cost + + compute_ngram_cost(b->certainty(), denom, curr_unichar, + prev_ve.string_so_far, unicharset); + if (curr_vlist_len >= max_viterbi_list_size && cost >= max_cost) { + if (ngram_permuter_debug_level > 1) { + tprintf("Discarded ViterbiEntry with a high cost %g" + ", last unichar %s, max_cost=%g\n", + cost, unicharset.id_to_unichar(b->unichar_id()), + max_cost); + } + continue; + } + + // Add a new entry to the current ViterbiEntry_CLIST. + ve = new ViterbiEntry(); + ve->CopyChars(prev_ve); + if (frag == NULL) { + ve->UpdateChars(curr_unichar, 1, b->unichar_id()); + } else if (frag->is_ending()) { + ve->UpdateChars("", frag->get_total(), + unicharset.unichar_to_id(frag->get_unichar())); + } + ve->frag = (frag != NULL && frag->is_ending()) ? NULL : frag; + ve->cost = cost; + ve->ratings_sum = prev_ve.ratings_sum + b->rating(); + curr_vlist_ptr->add_sorted(ViterbiEntry::compare, false, ve); + if (cost > max_cost) max_cost = cost; + if (ngram_permuter_debug_level > 1) ve->Print(); + curr_vlist_len++; + } } - */ } + if (curr_vlist_len == 0) { return NULL; } // no entries stored + // Leave only max_viterbi_list_size ViterbiEntries. + if (curr_vlist_len > max_viterbi_list_size) { + ViterbiEntry_IT curr_vlist_it = curr_vlist_ptr; + curr_vlist_len = 0; + for (curr_vlist_it.mark_cycle_pt(); !curr_vlist_it.cycled_list(); + curr_vlist_it.forward()) { + if (curr_vlist_len < max_viterbi_list_size) { + curr_vlist_len++; + } else { + delete curr_vlist_it.extract(); + } + } + } + // Clear prev_vlist, swap prev_vlist_ptr and curr_vlist_ptr + prev_vlist_ptr->clear(); + tmp_vlist_ptr = prev_vlist_ptr; + prev_vlist_ptr = curr_vlist_ptr; + curr_vlist_ptr = tmp_vlist_ptr; } - // Copy the prefix rating - rating_ = prefix.rating_; - - // Compute rating of current character - double probability = probability_in_context(prefix.word_, -1, - class_string_choice, -1); - - // If last character of the word, take the following space into account - if (end_of_word) - probability *= probability_in_context(word_, -1, " ", -1); - - double local_classifier_score_ngram_score_ratio = - get_classifier_score_ngram_score_ratio(class_string_choice); - - double classifier_rating = class_rating(choice); - double ngram_rating = -log(probability) / log(2.0); - double mixed_rating = - local_classifier_score_ngram_score_ratio * classifier_rating + - (1 - local_classifier_score_ngram_score_ratio) * ngram_rating; - - // If the current word is not a valid prefix, adjust the rating of the - // character being appended. If it used to be a valid prefix, compensate for - // previous adjustments. - if (!is_dawg_prefix_) { - if (prefix.is_dawg_prefix_) - rating_ *= non_dawg_prefix_rating_adjustment; - mixed_rating *= non_dawg_prefix_rating_adjustment; + float best_cost = MAX_FLOAT32; + ViterbiEntry *best_ve = NULL; + ViterbiEntry_IT prev_vlist_it = prev_vlist_ptr; + for (prev_vlist_it.mark_cycle_pt(); !prev_vlist_it.cycled_list(); + prev_vlist_it.forward()) { + ViterbiEntry *tmp_ve = prev_vlist_it.data(); + if (tmp_ve->cost < best_cost) { + best_ve = tmp_ve; + best_cost = tmp_ve->cost; + } } - // Update rating by adding the rating of the character being appended. - rating_ += mixed_rating; -} - -// Create an empty HypothesisPrefixList. Its maximum size is set to the given -// bound. -HypothesisPrefixList::HypothesisPrefixList(int size_bound): - _size_bound(size_bound), - _size(0) { - _list_nodes = new HypothesisPrefix*[_size_bound]; - for (int i = 0; i < _size_bound; ++i) - _list_nodes[i] = NULL; -} - -// Destroy a HypothesisPrefixList all contained nodes are deleted as well. -HypothesisPrefixList::~HypothesisPrefixList() { - this->clear(); - delete[] _list_nodes; -} - -// Add a node to the HypothesisPrefixList. Maintains the sorted list property. -// Note that the HypothesisPrefixList takes ownership of the given node and -// might delete it if needed. It must therefore have been allocated on the heap. -void HypothesisPrefixList::add_node(HypothesisPrefix* node) { - // Detect nodes that have a worst rating that the current maximum and treat - // them separately. - if (_size > 0 && _list_nodes[_size - 1]->rating() < node->rating()) { - if (_size == _size_bound) { - // The list is already full. This node will not be added - delete node; - } else { - // The list is not full. Add the node at the last position. - _list_nodes[_size] = node; - ++_size; + // Populate ngram_choice with the unichar ids from best_ve->unichar_ids. + // Get the certainties of the corresponding unichars from char_choices. + WERD_CHOICE *ngram_choice = + new WERD_CHOICE(best_ve->num_unichars); + float certainties[MAX_WERD_LENGTH]; + float min_certainty = 0.0; + int char_choices_index = 0; + const char *frag_uch = ""; + int j; + for (i = 0; i < best_ve->num_unichars; ++i) { + ngram_choice->append_unichar_id_space_allocated( + best_ve->unichar_ids[i], best_ve->fragment_lengths[i], 0.0, 0.0); + if (best_ve->fragment_lengths[i] > 1) { + frag_uch = unicharset.id_to_unichar(best_ve->fragment_lengths[i]); } - return; - } - // Find the correct position - int node_index_target = 0; - while (node_index_target < _size_bound && - _list_nodes[node_index_target] != NULL && - _list_nodes[node_index_target]->rating() < node->rating()) { - ++node_index_target; - } - if (node_index_target >= _size_bound) { - delete node; - } else { - // Move next states by 1. Starting from the last one. - int node_index_move = _size - 1; - while (node_index_move >= node_index_target) { - if (node_index_move == _size_bound - 1) - delete _list_nodes[node_index_move]; - else - _list_nodes[node_index_move + 1] = _list_nodes[node_index_move]; - _list_nodes[node_index_move] = NULL; - --node_index_move; + for (j = 0; j < best_ve->fragment_lengths[i]; ++j) { + UNICHAR_ID unichar_id_for_cert; + if (best_ve->fragment_lengths[i] > 1) { + unichar_id_for_cert = unicharset.unichar_to_id( + CHAR_FRAGMENT::to_string(frag_uch, j, + best_ve->fragment_lengths[i]).string()); + } else { + unichar_id_for_cert = best_ve->unichar_ids[i]; + } + bit.set_to_list(char_choices[char_choices_index]); + for (bit.mark_cycle_pt(); !bit.cycled_list(); bit.forward()) { + if (bit.data()->unichar_id() == unichar_id_for_cert) { + certainties[char_choices_index] = bit.data()->certainty(); + if (certainties[char_choices_index] < min_certainty) { + min_certainty = certainties[char_choices_index]; + } + break; + } + } + char_choices_index++; } - // Insert new node - _list_nodes[node_index_target] = node; - // Increment size if it has changed - if (_size < _size_bound) - ++_size; } -} - -// Delete all contained nodes and set the size of the HypothesisPrefixList to 0 -void HypothesisPrefixList::clear() { - for (int i = 0; i < _size_bound && _list_nodes[i] != NULL; ++i) { - delete _list_nodes[i]; - _list_nodes[i] = NULL; + ngram_choice->set_certainty(min_certainty); + ngram_choice->set_permuter(NGRAM_PERM); + ngram_choice->set_rating(best_ve->ratings_sum * adjust_factor); + LogNewChoice(adjust_factor, certainties, false, ngram_choice); + if (ngram_permuter_debug_level > 1) { + tprintf("Ngram model choice: %s\n", + ngram_choice->debug_string(getUnicharset()).string()); + ngram_choice->print(); } - _size = 0; + + return ngram_choice; } + +} // namespace tesseract diff --git a/dict/permngram.h b/dict/permngram.h index 8587e7b5fe..9b7c2a5f31 100644 --- a/dict/permngram.h +++ b/dict/permngram.h @@ -20,14 +20,63 @@ #ifndef PERMNGRAM_H #define PERMNGRAM_H -#include "choicearr.h" -#include "dawg.h" +#include "dict.h" +#include "clst.h" +#include "unichar.h" +#include "params.h" -// Permute the given char_choices using a character level n-gram model and -// return the best word choice found. The given dawg is used to determine -// which choices are contained in the dictionary. -/*A_CHOICE *ngram_permute_and_select(CHOICES_LIST char_choices, - float rating_limit, - const Dawg *dawg);*/ +class ViterbiEntry : public ELIST_LINK { + public: + ViterbiEntry() {} + ~ViterbiEntry() {} + // Comparator function for sorting ViterbiEntry_LISTs in + // non-increasing order of costs. + static int compare(const void *e1, const void *e2) { + const ViterbiEntry *ve1 = + *reinterpret_cast(e1); + const ViterbiEntry *ve2 = + *reinterpret_cast(e2); + if (ve1->cost == ve2->cost) return 0; + return (ve1->cost < ve2->cost) ? -1 : 1; + } + inline void CopyChars(const ViterbiEntry &src) { + strcpy(string_so_far, src.string_so_far); + num_unichars = src.num_unichars; + if (src.num_unichars > 0) { + memcpy(fragment_lengths, src.fragment_lengths, + src.num_unichars * sizeof(char)); + memcpy(unichar_ids, src.unichar_ids, + src.num_unichars * sizeof(UNICHAR_ID)); + } + } + inline void UpdateChars(const char *unichar, int unichar_length, + UNICHAR_ID unichar_id) { + char *string_so_far_end = string_so_far + strlen(string_so_far); + strcpy(string_so_far_end, unichar); + fragment_lengths[num_unichars] = unichar_length; + unichar_ids[num_unichars] = unichar_id; + num_unichars++; + } + + void Print() const { + tprintf("ViterbiEntry: string_so_far=%s cost=%g ratings_sum=%g" + " unichar_ids=[ ", string_so_far, cost, ratings_sum); + int i; + for (i = 0; i < num_unichars; ++i) tprintf("%d ", unichar_ids[i]); + tprintf("] unichar lengths=[ "); + for (i = 0; i < num_unichars; ++i) tprintf("%d ", fragment_lengths[i]); + tprintf("]\n"); + } + char string_so_far[MAX_WERD_LENGTH * UNICHAR_LEN + 1]; + int num_unichars; + char fragment_lengths[MAX_WERD_LENGTH]; + UNICHAR_ID unichar_ids[MAX_WERD_LENGTH]; + float cost; + float ratings_sum; + const CHAR_FRAGMENT *frag; +}; + +// Make ViterbiEntry listable. +ELISTIZEH(ViterbiEntry) #endif // PERMNGRAM_H diff --git a/dict/permute.cpp b/dict/permute.cpp index aeecc65fe0..486b45e0aa 100644 --- a/dict/permute.cpp +++ b/dict/permute.cpp @@ -42,91 +42,23 @@ #include "permute.h" #include "callcpp.h" -#include "context.h" -#include "conversion.h" +#include "ccutil.h" +#include "dict.h" #include "freelist.h" +#include "helpers.h" +#include "image.h" #include "globals.h" #include "ndminx.h" -#include "permdawg.h" #include "permngram.h" #include "ratngs.h" #include "stopper.h" -#include "tordvars.h" #include "tprintf.h" #include "trie.h" -#include "varable.h" +#include "params.h" #include "unicharset.h" -#include "dict.h" -#include "image.h" -#include "ccutil.h" - -int permutation_count; // Used in metrics.cpp. -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -// TODO(tkielbus) Choose a value for the MAX_NUM_EDGES constant -// (or make it dynamic) -#define MAX_NUM_EDGES 2000000 -#define MAX_DOC_EDGES 250000 -#define MAX_USER_EDGES 50000 - /* Weights for adjustment */ -#define NON_WERD 1.25 -#define GARBAGE_STRING 1.5 -#define MAX_PERM_LENGTH 128 - -// debugging flags -INT_VAR(fragments_debug, 0, "Debug character fragments"); - -BOOL_VAR(segment_debug, 0, "Debug the whole segmentation process"); - -BOOL_VAR(permute_debug, 0, "Debug char permutation process"); - - -// control parameters -double_VAR(bestrate_pruning_factor, 2.0, - "Multiplying factor of current best rate to prune other hypotheses"); - -BOOL_VAR(permute_script_word, 0, - "Turn on word script consistency permuter"); - -BOOL_VAR(segment_segcost_rating, 0, - "incorporate segmentation cost in word rating?"); - -double_VAR(segment_reward_script, 0.95, - "Score multipler for script consistency within a word. " - "Being a 'reward' factor, it should be <= 1. " - "Smaller value implies bigger reward."); - -double_VAR(segment_penalty_dict_nonword, NON_WERD, - "Score multiplier for glyph fragment segmentations which do not " - "match a dictionary word (lower is better)."); -double_VAR(segment_penalty_garbage, GARBAGE_STRING, - "Score multiplier for poorly cased strings that are not in the " - "dictionary and generally look like garbage (lower is better)."); -BOOL_VAR(save_doc_words, 0, "Save Document Words"); - -BOOL_VAR(doc_dict_enable, 1, "Enable Document Dictionary "); - -BOOL_VAR(ngram_permuter_activated, FALSE, - "Activate character-level n-gram-based permuter"); - -STRING_VAR(global_user_words_suffix, "user-words", "A list of user-provided words."); - -// This is an ugly way to incorporate segmentation cost in word rating. -// See comments in incorporate_segcost. -float wordseg_rating_adjust_factor; - -int permute_only_top = 0; - -#define SIM_CERTAINTY_SCALE -10.0 /*< Similarity matcher values */ -#define SIM_CERTAINTY_OFFSET -10.0 /*< Similarity matcher values */ -#define SIMILARITY_FLOOR 100.0 /*< Worst E*L product to stop on */ - -// TODO(daria): If hyphens are different in different languages and can be -// inferred from training data we should load their values dynamically. -static const char kHyphenSymbol[] = "-"; +namespace tesseract { /*---------------------------------------------------------------------- F u n c t i o n s @@ -153,314 +85,91 @@ WERD_CHOICE *get_best_delete_other(WERD_CHOICE *choice1, } } - -/** - * good_choice - * - * Return TRUE if a good answer is found for the unknown blob rating. - */ -int good_choice(const WERD_CHOICE &choice) { - register float certainty; - if (tord_similarity_enable) { - if ((choice.rating() + 1) * choice.certainty() > SIMILARITY_FLOOR) - return false; - certainty = - SIM_CERTAINTY_OFFSET + choice.rating() * SIM_CERTAINTY_SCALE; - } else { - certainty = choice.certainty(); - } - - return (certainty > tord_certainty_threshold) ? true : false; -} - - -namespace tesseract { /** - * add_document_word - * - * Add a word found on this document to the document specific - * dictionary. + * Returns the n-th choice in the given blob_list (top-K choices). + * If n > K, the last choice is returned. */ -void Dict::add_document_word(const WERD_CHOICE &best_choice) { - // Do not add hyphenated word parts to the document dawg. - // hyphen_word_ will be non-NULL after the set_hyphen_word() is - // called when the first part of the hyphenated word is - // discovered and while the second part of the word is recognized. - // hyphen_word_ is cleared in cc_recg() before the next word on - // the line is recognized. - if (hyphen_word_) return; - - char filename[CHARS_PER_LINE]; - FILE *doc_word_file; - int stringlen = best_choice.length(); - - if (!doc_dict_enable || valid_word(best_choice) || - CurrentWordAmbig() || stringlen < 2) - return; - - if (!good_choice(best_choice) || stringlen == 2) { - if (best_choice.certainty() < permuter_pending_threshold) - return; - - if (!pending_words_->word_in_dawg(best_choice)) { - if (stringlen > 2 || - (stringlen == 2 && - getUnicharset().get_isupper(best_choice.unichar_id(0)) && - getUnicharset().get_isupper(best_choice.unichar_id(1)))) { - pending_words_->add_word_to_dawg(best_choice); - } - return; - } - } - - if (save_doc_words) { - strcpy(filename, getImage()->getCCUtil()->imagefile.string()); - strcat (filename, ".doc"); - doc_word_file = open_file (filename, "a"); - fprintf (doc_word_file, "%s\n", - best_choice.debug_string(getUnicharset()).string()); - fclose(doc_word_file); - } - document_words_->add_word_to_dawg(best_choice); +BLOB_CHOICE* get_nth_choice(BLOB_CHOICE_LIST* blob_list, int n) { + BLOB_CHOICE_IT c_it(blob_list); + while (n-- > 0 && !c_it.at_last()) + c_it.forward(); + return c_it.data(); } - -/** - * adjust_non_word - * - * Assign an adjusted value to a string that is a non-word. The value - * that this word choice has is based on case and punctuation rules. - * The adjustment value applied is stored in adjust_factor upon return. - */ -void Dict::adjust_non_word(WERD_CHOICE *word, float *adjust_factor) { - float new_rating; - if (permute_debug) - cprintf("Non-word: %s %4.2f ", - word->debug_string(getUnicharset()).string(), word->rating()); - - new_rating = word->rating() + RATING_PAD; - if (Context::case_ok(*word, getUnicharset()) && valid_punctuation(*word)) { - new_rating *= segment_penalty_dict_nonword; - *adjust_factor = segment_penalty_dict_nonword; - if (permute_debug) tprintf(", W"); - } else { - new_rating *= segment_penalty_garbage; - *adjust_factor = segment_penalty_garbage; - if (permute_debug) { - if (!Context::case_ok(*word, getUnicharset())) tprintf(", C"); - if (!valid_punctuation(*word)) tprintf(", P"); - } - } - new_rating -= RATING_PAD; - word->set_rating(new_rating); - if (permute_debug) - cprintf (" %4.2f --> %4.2f\n", *adjust_factor, new_rating); +/** Returns the top choice char id. A helper function to make code cleaner. */ +UNICHAR_ID get_top_choice_uid(BLOB_CHOICE_LIST *blob_list) { + if (!blob_list) return INVALID_UNICHAR_ID; + BLOB_CHOICE_IT blob_choice_it(blob_list); + return (blob_choice_it.data()) ? blob_choice_it.data()->unichar_id() + : INVALID_UNICHAR_ID; } - /** - * init_permute - * - * Initialize anything that needs to be set up for the permute - * functions. + * Returns the rank (starting at 0) of a given unichar ID in the char + * choice list, or -1 if not found. */ -void Dict::init_permute() { - STRING name; - STRING &lang = getImage()->getCCUtil()->lang; - - if (dawgs_.length() != 0) end_permute(); - - hyphen_unichar_id_ = getUnicharset().unichar_to_id(kHyphenSymbol); - TessdataManager &tessdata_manager = - getImage()->getCCUtil()->tessdata_manager; - - // Load dawgs_. - if (global_load_punc_dawg && - tessdata_manager.SeekToStart(TESSDATA_PUNC_DAWG)) { - dawgs_ += new SquishedDawg(tessdata_manager.GetDataFilePtr(), - DAWG_TYPE_PUNCTUATION, lang, PUNC_PERM); - } - if (global_load_system_dawg && - tessdata_manager.SeekToStart(TESSDATA_SYSTEM_DAWG)) { - dawgs_ += new SquishedDawg(tessdata_manager.GetDataFilePtr(), - DAWG_TYPE_WORD, lang, SYSTEM_DAWG_PERM); - } - if (global_load_number_dawg && - tessdata_manager.SeekToStart(TESSDATA_NUMBER_DAWG)) { - dawgs_ += - new SquishedDawg(tessdata_manager.GetDataFilePtr(), - DAWG_TYPE_NUMBER, lang, NUMBER_PERM); - } - if (((STRING &)global_user_words_suffix).length() > 0) { - name = getImage()->getCCUtil()->language_data_path_prefix; - name += global_user_words_suffix; - if (exists_file(name.string())) { - Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM, - MAX_USER_EDGES, getUnicharset().size()); - if (!trie_ptr->read_word_list(name.string(), getUnicharset())) { - tprintf("Error: failed to load %s\n", name.string()); - exit(1); - } - dawgs_ += trie_ptr; - } - } - document_words_ = new Trie(DAWG_TYPE_WORD, lang, DOC_DAWG_PERM, - MAX_DOC_EDGES, getUnicharset().size()); - dawgs_ += document_words_; - - // This dawg is temporary and should not be searched by letter_is_ok. - pending_words_ = new Trie(DAWG_TYPE_WORD, lang, NO_PERM, - MAX_DOC_EDGES, getUnicharset().size()); - - // The frequent words dawg is only searched when a word - // is found in any of the other dawgs. - if (tessdata_manager.SeekToStart(TESSDATA_FREQ_DAWG)) { - freq_dawg_ = new SquishedDawg(tessdata_manager.GetDataFilePtr(), - DAWG_TYPE_WORD, lang, FREQ_DAWG_PERM); - } - - // Construct a list of corresponding successors for each dawg. Each entry i - // in the successors_ vector is a vector of integers that represent the - // indices into the dawgs_ vector of the successors for dawg i. - successors_.reserve(dawgs_.length()); - for (int i = 0; i < dawgs_.length(); ++i) { - const Dawg *dawg = dawgs_[i]; - SuccessorList *lst = new SuccessorList(); - for (int j = 0; j < dawgs_.length(); ++j) { - const Dawg *other = dawgs_[j]; - if (dawg->lang() == other->lang() && - kDawgSuccessors[dawg->type()][other->type()]) *lst += j; - } - successors_ += lst; +int find_choice_by_uid(BLOB_CHOICE_LIST *blob_list, UNICHAR_ID target_uid) { + BLOB_CHOICE_IT c_it(blob_list); + int pos = 0; + while (1) { + if (c_it.data()->unichar_id() == target_uid) return pos; + if (c_it.at_last()) break; + c_it.forward(); + pos++; } + return -1; } -void Dict::end_permute() { - if (dawgs_.length() == 0) - return; // Not safe to call twice. - dawgs_.delete_data_pointers(); - successors_.delete_data_pointers(); - dawgs_.clear(); - successors_.clear(); - document_words_ = NULL; - if (pending_words_ != NULL) delete pending_words_; - pending_words_ = NULL; - if (freq_dawg_ != NULL) delete freq_dawg_; - freq_dawg_ = NULL; -} - - /** - * permute_all - * - * Permute all the characters together using all of the different types - * of permuters/selectors available. Each of the characters must have - * a non-NULL choice list. - * - * Note: order of applying permuters does matter, since the latter - * permuter will be recorded if the resulting word ratings are the same. + * Returns a WERD formed by taking the specified position (nth choice) string + * from char_choices starting at the given position. + * For example, if start_pos=2, pos_str="0121" will form a word using the + * 1st choice of char 3, 2nd choice of char 4, 3rd choice of char 5, 2nd choice + * of char 6. If n > number of choice, the closest (last) one is used. */ -WERD_CHOICE *Dict::permute_all(const BLOB_CHOICE_LIST_VECTOR &char_choices, - float rating_limit, - WERD_CHOICE *raw_choice) { - WERD_CHOICE *result1; - WERD_CHOICE *result2 = NULL; - BOOL8 any_alpha; - float top_choice_rating_limit = rating_limit; - - // Initialize result1 from the result of permute_top_choice. - result1 = permute_top_choice(char_choices, &top_choice_rating_limit, - raw_choice, &any_alpha); - - // Enforce script consistency within a word on some scripts - if (permute_script_word && - !word_script_eq(char_choices, getUnicharset().common_sid()) && - !word_script_eq(char_choices, getUnicharset().latin_sid())) { - result2 = permute_script_words(char_choices); - // TODO(dsl): incorporate segmentation cost into word rating. - // This should only be turned on for scripts that we have a segmentation - // cost model for, such as CJK. - if (segment_segcost_rating) - incorporate_segcost(result2); - result1 = get_best_delete_other(result1, result2); +WERD_CHOICE* get_choice_from_posstr(const BLOB_CHOICE_LIST_VECTOR &char_choices, + int start_pos, + const char* pos_str, + float *certainties) { + int pos_str_len = strlen(pos_str); + WERD_CHOICE* wchoice = new WERD_CHOICE(); + if (start_pos + pos_str_len > char_choices.length()) { + wchoice->make_bad(); + return wchoice; } - - // Permute character fragments if necessary. - if (result1 == NULL || result1->fragment_mark()) { - result2 = top_fragments_permute_and_select(char_choices, - top_choice_rating_limit); - result1 = get_best_delete_other(result1, result2); + for (int x = 0; x < pos_str_len; x++) { + int pos = pos_str[x]-'0'; + if (pos < 0) pos = 0; // use the top choice by default, eg. '.' + if (pos >= 10) + tprintf("PosStr[%d](%d)=%c %d\n", x, pos_str_len, pos_str[x], pos); + ASSERT_HOST(pos < 10); + BLOB_CHOICE* blob_it = get_nth_choice(char_choices.get(start_pos+x), pos); + wchoice->set_permuter(NO_PERM); + wchoice->append_unichar_id(blob_it->unichar_id(), 1, + blob_it->rating(), + blob_it->certainty()); + if (certainties != NULL) certainties[x] = blob_it->certainty(); } - - // TODO(daria): update ngram permuter code. - if (ngram_permuter_activated) { - tprintf("Error: ngram permuter functionality is not available\n"); - exit(1); - // A_CHOICE *ngram_choice = - // ngram_permute_and_select(old_char_choices, rating_limit, word_dawg_); - // return ngram_choice; - } - - if (result1 == NULL) - return (NULL); - if (permute_only_top) - return result1; - - result2 = dawg_permute_and_select(char_choices, rating_limit); - result1 = get_best_delete_other(result1, result2); - - result2 = permute_compound_words(char_choices, rating_limit); - result1 = get_best_delete_other(result1, result2); - - return (result1); -} - -/** Returns the top choice char id. A helper function to make code cleaner. */ -UNICHAR_ID get_top_choice_uid(BLOB_CHOICE_LIST *blob_list) { - BLOB_CHOICE_IT blob_choice_it; - blob_choice_it.set_to_list(blob_list); - return (blob_choice_it.data()) ? blob_choice_it.data()->unichar_id() - : INVALID_UNICHAR_ID; + return wchoice; } /** - * Return the "dominant" script ID for the word. By "dominant", the script - * must account for at least half the characters. Otherwise, it returns 0. + * Given a WERD_CHOICE, find the corresponding position string from + * char_choices. Pos_str must have been allocated already. + * This is the reverse of get_choice_from_posstr. */ -int get_top_word_script(const BLOB_CHOICE_LIST_VECTOR &char_choices, - const UNICHARSET &unicharset) { - int max_script = unicharset.get_script_table_size(); - int *sid = new int[max_script]; - int x; - for (x = 0; x < max_script; x++) sid[x] = 0; - for (x = 0; x < char_choices.length(); ++x) { - BLOB_CHOICE_IT blob_choice_it; - blob_choice_it.set_to_list(char_choices.get(x)); - sid[blob_choice_it.data()->script_id()]++; +void get_posstr_from_choice(const BLOB_CHOICE_LIST_VECTOR &char_choices, + WERD_CHOICE* word_choice, + int start_pos, + char* pos_str) { + for (int i = 0; i < word_choice->length(); i++) { + UNICHAR_ID target_id = word_choice->unichar_id(i); + BLOB_CHOICE_LIST* blob_choice_list = char_choices.get(start_pos + i); + int pos = find_choice_by_uid(blob_choice_list, target_id); + if (pos < 0) pos = 0; + pos_str[i] = pos + '0'; } - // Note that high script ID overrides lower one on a tie, thus biasing - // towards non-Common script (if sorted that way in unicharset file). - int max_sid = 0; - for (x = 1; x < max_script; x++) - if (sid[x] >= sid[max_sid]) max_sid = x; - if (sid[max_sid] < char_choices.length() / 2) - max_sid = unicharset.null_sid(); - delete[] sid; - return max_sid; -} - -/** - * Checks whether the dominant word script, if there is one, matches - * the given target script ID. - */ -bool Dict::word_script_eq(const BLOB_CHOICE_LIST_VECTOR &char_choices, - int target_sid) { - int max_sid = get_top_word_script(char_choices, getUnicharset()); - // If "Latin" is not a loaded script, then latin_sid() would return 0. - // max_sid could also be 0 if there is no dominant script. - // This is faster than - // strcmp(getUnicharset().get_script_from_script_id(max_sid), "Latin") - return (max_sid > 0 && max_sid == target_sid); + pos_str[word_choice->length()] = '\0'; } /** @@ -470,27 +179,14 @@ bool Dict::word_script_eq(const BLOB_CHOICE_LIST_VECTOR &char_choices, * If not match is found, a NULL is returned. */ BLOB_CHOICE* find_choice_by_type( - BLOB_CHOICE_LIST *char_choices, + BLOB_CHOICE_LIST *blob_choices, char target_type, const UNICHARSET &unicharset) { - BLOB_CHOICE_IT c_it; - c_it.set_to_list(char_choices); + BLOB_CHOICE_IT c_it(blob_choices); for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { - bool found = false; - UNICHAR_ID unichar_id = c_it.data()->unichar_id(); - switch (target_type) { - case '*': found = true; break; - case 'A': found = unicharset.get_isupper(unichar_id); break; - case 'a': found = unicharset.get_islower(unichar_id); break; - case '0': found = unicharset.get_isdigit(unichar_id); break; - case 'p': found = unicharset.get_ispunctuation(unichar_id); break; - case 'x': found = !unicharset.get_isupper(unichar_id) && - !unicharset.get_islower(unichar_id) && - !unicharset.get_isdigit(unichar_id) && - !unicharset.get_ispunctuation(unichar_id); - break; - } - if (found) return c_it.data(); + if (c_it.data() && + unicharset.get_chartype(c_it.data()->unichar_id()) == target_type) + return c_it.data(); } return NULL; } @@ -508,12 +204,11 @@ BLOB_CHOICE* find_choice_by_type( * find_choice_by_script(cchoice, han_sid, 0, common_sid); */ BLOB_CHOICE* find_choice_by_script( - BLOB_CHOICE_LIST *char_choices, + BLOB_CHOICE_LIST *blob_choices, int target_sid, int backup_sid, int secondary_sid) { - BLOB_CHOICE_IT c_it; - c_it.set_to_list(char_choices); + BLOB_CHOICE_IT c_it(blob_choices); for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { bool found = false; if (c_it.data()->script_id() == 0) continue; @@ -522,7 +217,7 @@ BLOB_CHOICE* find_choice_by_script( if (found) return c_it.data(); } if (secondary_sid > 0) { - c_it.set_to_list(char_choices); + c_it.set_to_list(blob_choices); for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { if (c_it.data()->script_id() == 0) continue; if (c_it.data()->script_id() == secondary_sid) @@ -532,37 +227,466 @@ BLOB_CHOICE* find_choice_by_script( return NULL; } + +PermuterState::PermuterState() { + char_choices_ = NULL; + adjust_factor_ = 1.0f; + allow_collision_ = false; + word_length_ = 0; + debug_ = false; +} + +void PermuterState::Init(const BLOB_CHOICE_LIST_VECTOR& char_choices, + const UNICHARSET& unicharset, + float default_bias, + bool debug) { + ASSERT_HOST(char_choices.length() < MAX_PERM_LENGTH); + char_choices_ = &char_choices; + word_length_ = char_choices.length(); + for (int i = 0; i < word_length_; ++i) + perm_state_[i] = kPosFree; + perm_state_[word_length_] = '\0'; + // Skip fragment choice and the mark positions so they remain unchanged. + for (int i = 0; i < word_length_; ++i) { + UNICHAR_ID unichar_id = get_top_choice_uid(char_choices.get(i)); + if (unicharset.get_fragment(unichar_id) != NULL) + perm_state_[i] = '1'; + } + adjust_factor_ = default_bias; + allow_collision_ = false; + debug_ = debug; +} + +// Promote char positions specified in pos_str with given weight. +// For example, AddPreference(5, "234", 0.95f) +// would promote the 3rd, 4th and 5th choice for character 5, 6, 7 +// (starting at 0) in the word with a rating adjustment of 0.95. +void PermuterState::AddPreference(int start_pos, char* pos_str, float weight) { + ASSERT_HOST(char_choices_ != NULL); + ASSERT_HOST(start_pos + strlen(pos_str) - 1 < word_length_); + if (debug_) { + tprintf("Copy over %s -> %s @ %d ", pos_str, perm_state_, start_pos); + } + // copy over preferred position without the terminating null + if (!allow_collision_) { + int len = strlen(pos_str); + for (int i = 0; i < len; ++i) + if (position_marked(start_pos + i)) return; + } + strncpy(&perm_state_[start_pos], pos_str, strlen(pos_str)); + adjust_factor_ *= weight; + if (debug_) tprintf("==> %s %f\n", perm_state_, adjust_factor_); +} + +// Promote the input blob_choice at specified position with given weight. +void PermuterState::AddPreference(int char_pos, BLOB_CHOICE* blob_choice, + float weight) { + ASSERT_HOST(char_choices_ != NULL); + ASSERT_HOST(char_pos < word_length_); + // avoid collision (but this doesn't work if the first choice is favored! + if (!allow_collision_ && position_marked(char_pos)) return; + + if (debug_) { + tprintf("Set UID %d -> %s @ %d ", + blob_choice->unichar_id(), perm_state_, char_pos); + } + int pos = find_choice_by_uid(char_choices_->get(char_pos), + blob_choice->unichar_id()); + perm_state_[char_pos] = pos + '0'; + adjust_factor_ *= weight; + if (debug_) tprintf("==> %s %f\n", perm_state_, adjust_factor_); +} + +// Get the best word permutation so far. Caller should destroy WERD_CHOICE. +WERD_CHOICE* PermuterState::GetPermutedWord(float *certainties, + float *adjust_factor) { + ASSERT_HOST(char_choices_ != NULL); + WERD_CHOICE *word_choice = get_choice_from_posstr(*char_choices_, + 0, perm_state_, + certainties); + float rating = word_choice->rating() * adjust_factor_; + word_choice->set_rating(rating); + *adjust_factor = adjust_factor_; + return word_choice; +} + + +/********************************************************************** + * permute_all + * + * Permute all the characters together using all of the different types + * of permuters/selectors available. Each of the characters must have + * a non-NULL choice list. + * + * Note: order of applying permuters does matter, since the latter + * permuter will be recorded if the resulting word ratings are the same. + * + * Note: the function assumes that best_choice and raw_choice are not NULL. + * + * Note: Since permuter_all maybe called recursively (through permuter_ + * compound_words), there must be a separate instance of permuter_state + * for each invocation. + **********************************************************************/ +WERD_CHOICE *Dict::permute_all(const BLOB_CHOICE_LIST_VECTOR &char_choices, + const WERD_CHOICE *best_choice, + WERD_CHOICE *raw_choice) { + WERD_CHOICE *result1 = NULL; + WERD_CHOICE *result2 = NULL; + BOOL8 any_alpha; + float top_choice_rating_limit = best_choice->rating(); + int word_script_id = get_top_word_script(char_choices, getUnicharset()); + + PermuterState permuter_state; + if (getUnicharset().han_sid() != getUnicharset().null_sid() && + word_script_id == getUnicharset().han_sid()) { + permuter_state.Init(char_choices, getUnicharset(), 1.0f, permute_debug); + + result1 = get_top_choice_word(char_choices); + + // Note that we no longer need the returned word from these permuters, + // except to delete the memory. The word choice from all permutations + // is returned by permuter_state.GetpermutedWord() at the end. + if (permute_fixed_length_dawg) { + result2 = permute_fixed_length_words(char_choices, &permuter_state); + delete result2; + } + if (permute_chartype_word) { + result2 = permute_chartype_words(char_choices, &permuter_state); + delete result2; + } + if (permute_script_word) { + result2 = permute_script_words(char_choices, &permuter_state); + delete result2; + } + + float certainties[MAX_PERM_LENGTH]; + float adjust_factor; + result2 = permuter_state.GetPermutedWord(certainties, &adjust_factor); + LogNewChoice(adjust_factor, certainties, false, result2); + result1 = get_best_delete_other(result1, result2); + + if (ngram_permuter_activated && + (best_choice->rating() == WERD_CHOICE::kBadRating || + best_choice->permuter() == TOP_CHOICE_PERM) && + result1->permuter() == TOP_CHOICE_PERM) { + result2 = ngram_permute_and_select(char_choices, best_choice->rating(), + segment_reward_ngram_best_choice); + result1 = get_best_delete_other(result1, result2); + } + + if (segment_segcost_rating) incorporate_segcost(result1); + } else { + result1 = permute_top_choice(char_choices, &top_choice_rating_limit, + raw_choice, &any_alpha); + if (result1 == NULL) + return (NULL); + if (permute_only_top) + return result1; + + if (permute_chartype_word) { + permuter_state.Init(char_choices, getUnicharset(), + segment_penalty_garbage, permute_debug); + result2 = permute_chartype_words(char_choices, &permuter_state); + result1 = get_best_delete_other(result1, result2); + } + + // Permute character fragments if necessary. + if (result1 == NULL || result1->fragment_mark()) { + result2 = top_fragments_permute_and_select(char_choices, + top_choice_rating_limit); + result1 = get_best_delete_other(result1, result2); + } + + result2 = dawg_permute_and_select(char_choices, best_choice->rating()); + result1 = get_best_delete_other(result1, result2); + + result2 = permute_compound_words(char_choices, best_choice->rating()); + result1 = get_best_delete_other(result1, result2); + + if (ngram_permuter_activated && + best_choice->permuter() < SYSTEM_DAWG_PERM && + result1->permuter() < SYSTEM_DAWG_PERM) { + result2 = ngram_permute_and_select(char_choices, best_choice->rating(), + segment_penalty_ngram_best_choice); + result1 = get_best_delete_other(result1, result2); + } + } + return result1; +} + /** * Incorporate segmentation cost into the word rating. This is done - * through a mutliplier wordseg_rating_adjust_factor which is determined + * through a multiplier wordseg_rating_adjust_factor_ which is determined * in bestfirst.cpp during state evaluation. This is not the cleanest * way to do this. It would be better to reorganize the SEARCH_STATE * to keep track of associated states, or do the rating adjustment * outside the permuter in evalaute_state. */ void Dict::incorporate_segcost(WERD_CHOICE *word) { - if (!word || wordseg_rating_adjust_factor <= 0) return; + if (!word || wordseg_rating_adjust_factor_ <= 0) return; float old_rating = word->rating(); - float new_rating = old_rating * wordseg_rating_adjust_factor; + float new_rating = old_rating * wordseg_rating_adjust_factor_; word->set_rating(new_rating); if (permute_debug) tprintf("Permute segadjust %f * %f --> %f\n", - old_rating, wordseg_rating_adjust_factor, new_rating); + old_rating, wordseg_rating_adjust_factor_, new_rating); +} + + +/** + * Perform search on fixed-length dictionaries within a word. + * This is used for non-space delimited languages like CJK when a "word" + * corresponds to a "phrase" consisted of multiple short words. + * It iterates over every character position looking for longest matches + * against a set of fixed-length dawgs. Each dictionary hit is rewarded + * with a rating bonus. + * Note: this is very slow as it is performed on every segmentation state. + */ +WERD_CHOICE* Dict::permute_fixed_length_words( + const BLOB_CHOICE_LIST_VECTOR &char_choices, + PermuterState *permuter_state) { + if (permute_debug) + print_char_choices_list("\n\nPermute FixedLength Word", + char_choices, getUnicharset(), false); + WERD_CHOICE* best_choice = new WERD_CHOICE(char_choices.length()); + const int max_dict_len = max_fixed_length_dawgs_wdlen_; + const int min_dict_len = 2; + char posstr[256]; + int match_score = 0; + int anchor_pos = 0; + while (anchor_pos < char_choices.length()) { + // search from longest phrase to shortest, stop when we find a match + WERD_CHOICE* part_choice = NULL; + int step = max_dict_len; + while (step >= min_dict_len) { + int end_pos = anchor_pos + step - 1; + if (end_pos < char_choices.length()) { + part_choice = dawg_permute_and_select(char_choices, + 200.0, // rate limit + step, + anchor_pos); + if (part_choice->length() == step) { + if (permute_debug) + tprintf("match found at pos=%d len=%d\n%s\n", anchor_pos, step, + part_choice->unichar_string().string()); + break; + } + delete part_choice; + part_choice = NULL; + } + step--; + } + + if (part_choice && step > 1) { // found lexicon match + part_choice->populate_unichars(getUnicharset()); + get_posstr_from_choice(char_choices, part_choice, anchor_pos, posstr); + float adjust_factor = pow(0.95, 1.0 + step*2.0/char_choices.length()); + if (permuter_state) + permuter_state->AddPreference(anchor_pos, posstr, adjust_factor); + match_score += step - 1; // single chars don't count + if (permute_debug) + tprintf("Promote word rating %d-len%d\n%s\n", anchor_pos, step, + part_choice->unichar_string().string()); + } else { // no lexicon match + step = 1; + part_choice = + get_choice_from_posstr(char_choices, anchor_pos, "0", NULL); + if (permute_debug) + tprintf("Single char %d %s\n", anchor_pos, + part_choice->unichar_string().string()); + } + if (part_choice && part_choice->length() > 0) + (*best_choice) += (*part_choice); + if (part_choice) delete part_choice; + anchor_pos += step; + } + + if (match_score > 0) { + float adjust_factor = pow(0.8, // 1.0/segment_penalty_dict_nonword, + match_score * 2.0 / char_choices.length()); + float adjusted_score = best_choice->rating() * adjust_factor; + if (permute_debug) + tprintf("Adjusting score %f @ %d -> %f\n", + best_choice->rating(), match_score, adjusted_score); + best_choice->set_rating(adjusted_score); + } + best_choice->populate_unichars(getUnicharset()); + if (permute_debug) + tprintf("Found Best CJK word %f: %s\n", + best_choice->rating(), best_choice->unichar_string().string()); + return best_choice; +} + + +/********************************************************************** + * Returns the dominant chartype for the word. Only the "main" chartype + * of each character is used, and a consistent chartype is defined by + * the majority chartype from non-ambiguous character positions. + * If pos_chartypes is not NULL, then the "main" chartype at each pos + * is also returned. The caller must allocate and deallocate the space. + **********************************************************************/ +char Dict::top_word_chartype(const BLOB_CHOICE_LIST_VECTOR &char_choices, + char* pos_chartypes) { + const UNICHARSET &unicharset = getUnicharset(); + const int hist_size = 128; // to contain 'A','a','0','x','p' + int chprop[hist_size]; + int x; + for (x = 0; x < hist_size; x++) chprop[x] = 0; + for (x = 0; x < char_choices.length(); ++x) { + UNICHAR_ID unichar_id = get_top_choice_uid(char_choices.get(x)); + char ctype = unicharset.get_chartype(unichar_id); + if (pos_chartypes) pos_chartypes[x] = ctype; + if (ctype == 0 || ctype == 'p') continue; + if (getUnicharAmbigs().OneToOneDefiniteAmbigs(unichar_id) != NULL) continue; + chprop[ctype]++; + if (x == 0 && ctype == 'A') // first-cap also counts as lower + chprop['a']++; + } + int max_prop = 0; + for (x = 1; x < hist_size; x++) + if (chprop[x] >= chprop[max_prop]) max_prop = x; + return (chprop[max_prop] > 0) ? max_prop : 0; +} + +/********************************************************************** + * Promote consistent character type based on neighboring characters. + * For each character position, if the top choice property is inconsistent + * with that of the word or previous character, then its likely + * subsitutions, as defined by DangAmbigs, will be examined and the one + * with a matching property will be selected. + **********************************************************************/ +WERD_CHOICE* Dict::permute_chartype_words( + const BLOB_CHOICE_LIST_VECTOR &char_choices, + PermuterState *permuter_state) { + + if (char_choices.length() >= MAX_PERM_LENGTH) + return NULL; + // Store main character property of top choice at every position + char pos_chartypes[MAX_PERM_LENGTH]; + char word_type = top_word_chartype(char_choices, pos_chartypes); + if (word_type == 0 || word_type == 'p') + return NULL; // skip if word type is punctuation or unknown + if (permute_debug) { + tprintf("\n\nPermuteCharType[%c]\n", word_type); + print_char_choices_list("", char_choices, getUnicharset(), true); + } + + WERD_CHOICE *current_word = new WERD_CHOICE(); + BLOB_CHOICE_IT blob_choice_it; + const UNICHARSET& unicharset = getUnicharset(); + bool replaced = false; // has any character choice been replaced + int prev_unambig_type = 0; // the last chartype of an unambiguous char + for (int x = 0; x < char_choices.length(); ++x) { + BLOB_CHOICE_LIST* pos_choice = char_choices.get(x); + UNICHAR_ID unichar_id = get_top_choice_uid(pos_choice); + if (unichar_id == 0) { + delete current_word; + return NULL; + } + blob_choice_it.set_to_list(pos_choice); + BLOB_CHOICE *first_choice = blob_choice_it.data(); + ASSERT_HOST(first_choice != NULL); + + const UnicharIdVector* ambig_uids = + getUnicharAmbigs().OneToOneDefiniteAmbigs(unichar_id); + bool is_ambiguous = (ambig_uids != NULL); + bool is_punct = unicharset.get_ispunctuation(unichar_id); + bool is_consistent = is_punct || + unicharset.get_chartype(unichar_id) == prev_unambig_type || + unicharset.get_chartype(unichar_id) == word_type; + bool is_fragment = getUnicharset().get_fragment(unichar_id) != NULL; + if (permute_debug) + tprintf("char[%d]:%s is_ambig %c is_punct %c is_consistent %c\n", + x, unicharset.id_to_unichar(unichar_id), + is_ambiguous?'T':'F', is_punct?'T':'F', is_consistent?'T':'F'); + + if (is_fragment) { + // Ignore any fragmented characters by skipping them to next choice + // (originally first choice). + first_choice = get_nth_choice(pos_choice, 1); + ASSERT_HOST(first_choice != NULL); + } else if (is_ambiguous && !is_consistent) { + // Check every confusable blob choice where the top choice is inconsistent + // with the character type of the previous unambiguous character. + if (permute_debug) { + tprintf("Checking %s r%g PrevCharType %c\n", + unicharset.id_to_unichar(unichar_id), + first_choice->rating(), prev_unambig_type); + print_ratings_list("\t", pos_choice, getUnicharset()); + } + BLOB_CHOICE* c_it = NULL; + if (c_it == NULL) { + c_it = find_choice_by_type(pos_choice, word_type, unicharset); + } + + // Prefer a character choice whose type is the same as the previous + // unambiguous character and the confusion appears in the ambig list. + if (c_it == NULL && prev_unambig_type > 0) { + c_it = find_choice_by_type(pos_choice, prev_unambig_type, unicharset); + if (c_it && + UnicharIdArrayUtils::find_in(*ambig_uids, c_it->unichar_id()) < 0) + c_it = NULL; + } + + // Otherwise, perfer a punctuation + if (c_it == NULL) { + c_it = find_choice_by_type(pos_choice, 'p', unicharset); + if (c_it && + UnicharIdArrayUtils::find_in(*ambig_uids, c_it->unichar_id()) < 0) + c_it = NULL; + } + + // save any preference other than the top choice + if (c_it != NULL) { + if (permute_debug) { + tprintf("Replacing %s r%g ==> %s r%g\n", + unicharset.id_to_unichar(unichar_id), first_choice->rating(), + unicharset.id_to_unichar(c_it->unichar_id()), c_it->rating()); + tprintf("\n\nPermuteCharType[%c]\n", word_type); + print_char_choices_list("", char_choices, getUnicharset(), false); + } + if (permuter_state) + permuter_state->AddPreference(x, c_it, segment_reward_chartype); + first_choice = c_it; + replaced = true; + } + } else if (!is_ambiguous && !is_punct) { + // keep the last unambiguous character type + prev_unambig_type = pos_chartypes[x]; + } + current_word->append_unichar_id(first_choice->unichar_id(), 1, + first_choice->rating(), + first_choice->certainty()); + } + // All permuter choices should go through adjust_non_word so the choice + // rating would be adjusted on the same scale. + float certainties[MAX_PERM_LENGTH + 1]; + adjust_non_word(current_word, certainties, permute_debug); + current_word->populate_unichars(unicharset); + if (replaced) { + // Apply a reward multiplier on rating if an chartype permutation is made. + float rating = current_word->rating(); + current_word->set_rating(rating * segment_reward_chartype); + if (permute_debug) + current_word->print("<== permute_chartype_word **"); + } + return current_word; } /** * Try flipping characters in a word to get better script consistency. * Similar to how upper/lower case checking is done in top_choice_permuter, * this permuter tries to suggest a more script-consistent choice AND - * modifieds the rating. So it combines both the case_ok check and + * modifies the rating. So it combines both the case_ok check and * adjust_non_word functionality. However, instead of penalizing an * inconsistent word with a > 1 multiplier, we reward the script-consistent * choice with a < 1 multiplier. */ WERD_CHOICE* Dict::permute_script_words( - const BLOB_CHOICE_LIST_VECTOR &char_choices) { - if (char_choices.length() > MAX_WERD_LENGTH) + const BLOB_CHOICE_LIST_VECTOR &char_choices, + PermuterState *permuter_state) { + if (char_choices.length() >= MAX_WERD_LENGTH) return NULL; int word_sid = get_top_word_script(char_choices, getUnicharset()); @@ -576,19 +700,32 @@ WERD_CHOICE* Dict::permute_script_words( permute_debug > 1); } - WERD_CHOICE *current_word = new WERD_CHOICE(MAX_WERD_LENGTH); + WERD_CHOICE *current_word = new WERD_CHOICE(); BLOB_CHOICE_IT blob_choice_it; bool replaced = false; bool prev_is_consistent = false; for (int x = 0; x < char_choices.length(); ++x) { blob_choice_it.set_to_list(char_choices.get(x)); BLOB_CHOICE *first_choice = blob_choice_it.data(); - if (!first_choice) return NULL; + if (!first_choice) { + delete current_word; + return NULL; + } UNICHAR_ID unichar_id = first_choice->unichar_id(); - bool sid_consistent = (first_choice->script_id() == word_sid); - bool this_is_punct = getUnicharset().get_ispunctuation(unichar_id); - - if (!sid_consistent && !this_is_punct && prev_is_consistent) { + if (unichar_id == 0) { + delete current_word; + return NULL; + } + bool sid_consistent = (getUnicharset().get_script(unichar_id) == word_sid); + bool this_is_punct = getUnicharset().get_chartype(unichar_id) == 'p'; + bool is_fragment = getUnicharset().get_fragment(unichar_id) != NULL; + + if (is_fragment) { + // Ignore any fragmented characters by skipping them to next choice + // (originally first choice). + first_choice = get_nth_choice(char_choices.get(x), 1); + ASSERT_HOST(first_choice != NULL); + } else if (!sid_consistent && !this_is_punct && prev_is_consistent) { // If the previous char is CJK, we prefer a cjk over non-cjk char if (permute_debug) { tprintf("Checking %s r%g\n", getUnicharset().id_to_unichar(unichar_id), @@ -598,40 +735,40 @@ WERD_CHOICE* Dict::permute_script_words( // prefer a script consistent choice BLOB_CHOICE* c_it = find_choice_by_script(char_choices.get(x), word_sid, 0, 0); - // make this a separate check // otherwise, prefer a punctuation if (c_it == NULL) c_it = find_choice_by_type(char_choices.get(x), 'p', getUnicharset()); if (c_it != NULL) { if (permute_debug) - tprintf("Replacing %d r%g ==> %d r%g\n", - first_choice->unichar_id(), first_choice->rating(), - c_it->unichar_id(), c_it->rating()); + tprintf("Replacing %s r%g ==> %s r%g\n", + getUnicharset().id_to_unichar(unichar_id), + first_choice->rating(), + getUnicharset().id_to_unichar(c_it->unichar_id()), + c_it->rating()); + if (permuter_state) + permuter_state->AddPreference(x, c_it, segment_reward_script); first_choice = c_it; replaced = true; } } - current_word->append_unichar_id_space_allocated( - first_choice->unichar_id(), 1, - first_choice->rating(), first_choice->certainty()); + current_word->append_unichar_id(first_choice->unichar_id(), 1, + first_choice->rating(), + first_choice->certainty()); prev_is_consistent = sid_consistent; } + // All permuter choices should go through adjust_non_word so the choice + // rating would be adjusted on the same scale. + float certainties[MAX_PERM_LENGTH + 1]; + adjust_non_word(current_word, certainties, permute_debug); + current_word->populate_unichars(getUnicharset()); if (replaced) { - // When we replace a word choice (usually top choice) with - // another for the sake of script consistency, we need to improve its - // rating so that it will replace the best choice. How much we modify - // the rating determines how strong is the script consistency constraint. - // We need a more consistent solution for all contextual constraints - // like case, punct pattern, script, etc. Right now, this does the same - // thing as adjust_non_words for case and punctuation rules. + // Apply a reward multiplier on rating if an script permutation is made. float rating = current_word->rating(); - rating *= segment_reward_script; - current_word->set_rating(rating); + current_word->set_rating(rating * segment_reward_script); + if (permute_debug) + current_word->print("<== permute_script_word **"); } - current_word->populate_unichars(getUnicharset()); - if (permute_debug && replaced) - current_word->print("<== permute_script_word **"); return current_word; } @@ -640,28 +777,27 @@ WERD_CHOICE* Dict::permute_script_words( * * Permute these characters together according to each of the different * permuters that are enabled. + * Returns true if best_choice was updated. */ -void Dict::permute_characters(const BLOB_CHOICE_LIST_VECTOR &char_choices, - float limit, +bool Dict::permute_characters(const BLOB_CHOICE_LIST_VECTOR &char_choices, WERD_CHOICE *best_choice, WERD_CHOICE *raw_choice) { float old_raw_choice_rating = raw_choice->rating(); - permutation_count++; /* Global counter */ - if (tord_display_ratings > 1) { - cprintf("\nchar_choices in permute_characters:\n"); + if (permute_debug) { + tprintf("\n\n\n##### Permute_Characters #######\n"); print_char_choices_list("\n==> Input CharChoices", char_choices, - getUnicharset(), true); + getUnicharset(), segment_debug > 1); + tprintf("\n"); } if (char_choices.length() == 1 && - get_top_choice_uid(char_choices.get(0)) == 0) - return; - WERD_CHOICE *this_choice = permute_all(char_choices, limit, raw_choice); + get_top_choice_uid(char_choices.get(0)) == 0) return false; + WERD_CHOICE *this_choice = permute_all(char_choices, best_choice, raw_choice); if (raw_choice->rating() < old_raw_choice_rating) { // Populate unichars_ and unichar_lengths_ of raw_choice. This is // needed for various components that still work with unichars rather - // than unichar ids (e.g. AdaptToWord). + // than unichar ids (e.g. LearnWord). raw_choice->populate_unichars(getUnicharset()); } if (this_choice && this_choice->rating() < best_choice->rating()) { @@ -671,12 +807,16 @@ void Dict::permute_characters(const BLOB_CHOICE_LIST_VECTOR &char_choices, // than unichar ids (dawg, *_ok functions, various hard-coded hacks). best_choice->populate_unichars(getUnicharset()); - if (tord_display_ratings) { - cprintf("permute_characters: %s\n", + if (permute_debug) { + best_choice->print("\n**** Populate BestChoice"); + cprintf("populate best_choice\n\t%s\n", best_choice->debug_string(getUnicharset()).string()); } + delete this_choice; + return true; } delete this_choice; + return false; } /** @@ -728,7 +868,6 @@ WERD_CHOICE *Dict::permute_compound_words( current_word.rating() <= rating_limit) { permute_subword(char_choices, rating_limit, first_index, x - 1, ¤t_word); - current_word.populate_unichars(getUnicharset()); best_choice = new WERD_CHOICE(current_word); best_choice->set_permuter(COMPOUND_PERM); } @@ -743,7 +882,7 @@ WERD_CHOICE *Dict::permute_compound_words( * and the start and end of the word. Call the standard word permute * function on a set of choices covering only part of the original * word. When it is done reclaim the memory that was used in the - * excercise. + * exercise. */ void Dict::permute_subword(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit, @@ -765,13 +904,12 @@ void Dict::permute_subword(const BLOB_CHOICE_LIST_VECTOR &char_choices, } if (!subchoices.empty()) { - bool old_segment_dawg_debug = segment_dawg_debug; - if (segment_debug) segment_dawg_debug.set_value(true); - best_choice = permute_all(subchoices, rating_limit, &raw_choice); + WERD_CHOICE initial_choice; + initial_choice.make_bad(); + initial_choice.set_rating(rating_limit); + + best_choice = permute_all(subchoices, &initial_choice, &raw_choice); - if (segment_debug) { - segment_dawg_debug.set_value(old_segment_dawg_debug); - } if (best_choice && best_choice->length() > 0) { *current_word += *best_choice; } else { @@ -789,10 +927,32 @@ void Dict::permute_subword(const BLOB_CHOICE_LIST_VECTOR &char_choices, current_word->debug_string(getUnicharset()).string(), current_word->rating(), current_word->certainty()); } + current_word->populate_unichars(getUnicharset()); EnableChoiceAccum(); } +/** + * Return the top choice for each character as the choice for the word. + */ +WERD_CHOICE *Dict::get_top_choice_word( + const BLOB_CHOICE_LIST_VECTOR &char_choices) { + WERD_CHOICE *top_word = new WERD_CHOICE(MAX_PERM_LENGTH); + float certainties[MAX_PERM_LENGTH]; + top_word->set_permuter(TOP_CHOICE_PERM); + for (int x = 0; x < char_choices.length(); x++) { + BLOB_CHOICE_IT blob_choice_it; + blob_choice_it.set_to_list(char_choices.get(x)); + BLOB_CHOICE *top_choice = blob_choice_it.data(); + top_word->append_unichar_id_space_allocated(top_choice->unichar_id(), 1, + top_choice->rating(), + top_choice->certainty()); + certainties[x] = top_choice->certainty(); + } + LogNewChoice(1.0, certainties, true, top_word); + return top_word; +} + /** * permute_top_choice * @@ -824,7 +984,6 @@ WERD_CHOICE *Dict::permute_top_choice( int x; BOOL8 char_alpha; float first_rating = 0; - float adjust_factor; float certainties[MAX_PERM_LENGTH + 1]; float lower_certainties[MAX_PERM_LENGTH + 1]; @@ -979,7 +1138,7 @@ WERD_CHOICE *Dict::permute_top_choice( if (word.rating() > bestrate_pruning_factor * *rating_limit) { if (permute_debug) - tprintf("\n***** Aborting high-cost word: %g > limit %g \n", + tprintf("\n***** Aborting high-cost word: %g > limit %g\n", word.rating(), bestrate_pruning_factor * *rating_limit); return (NULL); } @@ -993,23 +1152,16 @@ WERD_CHOICE *Dict::permute_top_choice( if (word.rating() < raw_choice->rating()) { *raw_choice = word; - LogNewChoice(*raw_choice, 1.0, certainties, true); + LogNewChoice(1.0, certainties, true, raw_choice); } - - if (ngram_permuter_activated) - return NULL; - float rating = word.rating(); - adjust_non_word(&word, &adjust_factor); - LogNewChoice(word, adjust_factor, certainties, false); + adjust_non_word(&word, certainties, permute_debug); float lower_rating = lower_word.rating(); - adjust_non_word(&lower_word, &adjust_factor); - LogNewChoice(lower_word, adjust_factor, lower_certainties, false); + adjust_non_word(&lower_word, lower_certainties, permute_debug); float upper_rating = capital_word.rating(); - adjust_non_word(&capital_word, &adjust_factor); - LogNewChoice(capital_word, adjust_factor, upper_certainties, false); + adjust_non_word(&capital_word, upper_certainties, permute_debug); WERD_CHOICE *best_choice = &word; *rating_limit = rating; @@ -1131,88 +1283,6 @@ const char* Dict::choose_il1(const char *first_char, return first_char; } -/** - * Check all the DAWGs to see if this word is in any of them. - */ -int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) { - const WERD_CHOICE *word_ptr = &word; - WERD_CHOICE temp_word; - if (hyphenated()) { - copy_hyphen_info(&temp_word); - temp_word += word; - word_ptr = &temp_word; - } - if (word_ptr->length() == 0) return NO_PERM; - // Allocate vectors for holding current and updated - // active_dawgs and constraints and initialize them. - DawgInfoVector *active_dawgs = new DawgInfoVector[2]; - DawgInfoVector *constraints = new DawgInfoVector[2]; - init_active_dawgs(&(active_dawgs[0])); - init_constraints(&(constraints[0])); - DawgArgs dawg_args(&(active_dawgs[0]), &(constraints[0]), - &(active_dawgs[1]), &(constraints[1]), 0.0); - int last_index = word_ptr->length() - 1; - // Call leter_is_okay for each letter in the word. - for (int i = hyphen_base_size(); i <= last_index; ++i) { - if (!((this->*letter_is_okay_)(&dawg_args, i, word_ptr, - i == last_index))) break; - // Swap active_dawgs, constraints with the corresponding updated vector. - if (dawg_args.updated_active_dawgs == &(active_dawgs[1])) { - dawg_args.updated_active_dawgs = &(active_dawgs[0]); - dawg_args.updated_constraints = &(constraints[0]); - ++(dawg_args.active_dawgs); - ++(dawg_args.constraints); - } else { - ++(dawg_args.updated_active_dawgs); - ++(dawg_args.updated_constraints); - dawg_args.active_dawgs = &(active_dawgs[0]); - dawg_args.constraints = &(constraints[0]); - } - } - delete[] active_dawgs; - delete[] constraints; - if (dawg_args.permuter == SYSTEM_DAWG_PERM || - dawg_args.permuter == DOC_DAWG_PERM || - dawg_args.permuter == USER_DAWG_PERM || - (numbers_ok && dawg_args.permuter == NUMBER_PERM)){ - return dawg_args.permuter; - } else { - return NO_PERM; - } -} - -/** - * @return true if the word contains a valid punctuation pattern. - * - * @note Since the domains of punctuation symbols and symblos - * used in numbers are not disjoint, a valid number might contain - * an invalid punctuation pattern (e.g. .99). - */ -bool Dict::valid_punctuation(const WERD_CHOICE &word) { - if (word.length() == 0) return NO_PERM; - int i; - WERD_CHOICE new_word; - int last_index = word.length() - 1; - int new_len = 0; - for (i = 0; i <= last_index; ++i) { - UNICHAR_ID unichar_id = (word.unichar_id(i)); - if (getUnicharset().get_ispunctuation(unichar_id)) { - new_word.append_unichar_id(unichar_id, 1, 0.0, 0.0); - } else if (!getUnicharset().get_isalpha(unichar_id) && - !getUnicharset().get_isdigit(unichar_id)) { - return false; // neither punc, nor alpha, nor digit - } else if ((new_len = new_word.length()) == 0 || - new_word.unichar_id(new_len-1) != Dawg::kPatternUnicharID) { - new_word.append_unichar_id(Dawg::kPatternUnicharID, 1, 0.0, 0.0); - } - } - for (i = 0; i < dawgs_.size(); ++i) { - if (dawgs_[i]->type() == DAWG_TYPE_PUNCTUATION && - dawgs_[i]->word_in_dawg(new_word)) return true; - } - return false; -} - /** * @name fragment_state * @@ -1377,9 +1447,10 @@ WERD_CHOICE *Dict::top_fragments_permute_and_select( word.set_permuter(TOP_CHOICE_PERM); float certainties[MAX_PERM_LENGTH]; this->go_deeper_fxn_ = &tesseract::Dict::go_deeper_top_fragments_fxn; + int attempts_left = max_permuter_attempts; permute_choices((fragments_debug > 1) ? "fragments_debug" : NULL, frag_char_choices, 0, NULL, &word, certainties, - &rating_limit, best_choice, NULL); + &rating_limit, best_choice, &attempts_left, NULL); frag_char_choices.delete_data_pointers(); return best_choice; @@ -1400,10 +1471,11 @@ void Dict::permute_choices( float certainties[], float *limit, WERD_CHOICE *best_choice, + int *attempts_left, void *more_args) { if (debug) { tprintf("%s permute_choices: char_choice_index=%d" - " limit=%4.2f rating=%4.2f, certainty=%4.2f word=%s\n", + " limit=%g rating=%g, certainty=%g word=%s\n", debug, char_choice_index, *limit, word->rating(), word->certainty(), word->debug_string(getUnicharset()).string()); } @@ -1412,10 +1484,14 @@ void Dict::permute_choices( blob_choice_it.set_to_list(char_choices.get(char_choice_index)); for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list(); blob_choice_it.forward()) { + (*attempts_left)--; append_choices(debug, char_choices, *(blob_choice_it.data()), char_choice_index, prev_char_frag_info, word, - certainties, limit, best_choice, more_args); - + certainties, limit, best_choice, attempts_left, more_args); + if (*attempts_left <= 0) { + if (debug) tprintf("permute_choices(): attempts_left is 0\n"); + break; + } } } } @@ -1423,8 +1499,8 @@ void Dict::permute_choices( /** * append_choices * - * Check to see whether or not the next choice is worth appending to - * the word being generated. If so then keep going deeper into the word. + * Checks to see whether or not the next choice is worth appending to + * the word being generated. If so then keeps going deeper into the word. * * This function assumes that Dict::go_deeper_fxn_ is set. */ @@ -1438,6 +1514,7 @@ void Dict::append_choices( float certainties[], float *limit, WERD_CHOICE *best_choice, + int *attempts_left, void *more_args) { int word_ending = (char_choice_index == char_choices.length() - 1) ? true : false; @@ -1453,7 +1530,7 @@ void Dict::append_choices( if (char_frag_info.unichar_id == INVALID_UNICHAR_ID) { permute_choices(debug, char_choices, char_choice_index + 1, &char_frag_info, word, certainties, limit, - best_choice, more_args); + best_choice, attempts_left, more_args); return; } @@ -1469,7 +1546,7 @@ void Dict::append_choices( // Explore the next unichar. (this->*go_deeper_fxn_)(debug, char_choices, char_choice_index, &char_frag_info, word_ending, word, certainties, - limit, best_choice, more_args); + limit, best_choice, attempts_left, more_args); // Remove the unichar we added to explore other choices in it's place. word->remove_last_unichar_id(); @@ -1481,15 +1558,16 @@ void Dict::append_choices( /** * go_deeper_top_fragments_fxn * - * If the choice being composed so far could be better - * than best_choice keep exploring choices. + * While the choice being composed so far could be better + * than best_choice keeps exploring char_choices. + * If the end of the word is reached and the word is better than + * best_choice, copies word to best_choice and logs the new word choice. */ void Dict::go_deeper_top_fragments_fxn( const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, - int char_choice_index, - const CHAR_FRAGMENT_INFO *prev_char_frag_info, - bool word_ending, WERD_CHOICE *word, float certainties[], - float *limit, WERD_CHOICE *best_choice, void *more_args) { + int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, + bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, + WERD_CHOICE *best_choice, int *attempts_left, void *more_args) { if (word->rating() < *limit) { if (word_ending) { if (fragments_debug > 1) { @@ -1497,18 +1575,12 @@ void Dict::go_deeper_top_fragments_fxn( word->debug_string(getUnicharset()).string()); } *limit = word->rating(); - - float adjust_factor; - adjust_non_word(word, &adjust_factor); - LogNewChoice(*word, adjust_factor, certainties, false); - - if (word->rating() < best_choice->rating()) { - *best_choice = *word; - } + adjust_non_word(word, certainties, permute_debug); + update_best_choice(*word, best_choice); } else { // search the next letter permute_choices(debug, char_choices, char_choice_index + 1, prev_char_frag_info, word, certainties, limit, - best_choice, more_args); + best_choice, attempts_left, more_args); } } else { if (fragments_debug > 1) { diff --git a/dict/permute.h b/dict/permute.h index 711a8e9393..e3f9d1c98c 100644 --- a/dict/permute.h +++ b/dict/permute.h @@ -29,26 +29,25 @@ I n c l u d e s ----------------------------------------------------------------------*/ -#include "choicearr.h" -#include "choices.h" #include "ratngs.h" -#include "varable.h" +#include "params.h" +#include "unicharset.h" -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -#define RATING_PAD 4.0 +#define MAX_PERM_LENGTH 128 /*---------------------------------------------------------------------- V a r i a b l e s ----------------------------------------------------------------------*/ extern INT_VAR_H(fragments_debug, 0, "Debug character fragments"); -extern BOOL_VAR_H(segment_debug, 0, "Debug the whole segmentation process"); +extern INT_VAR_H(segment_debug, 0, "Debug the whole segmentation process"); extern BOOL_VAR_H(permute_debug, 0, "char permutation debug"); extern BOOL_VAR_H(permute_script_word, 0, "Turn on word script consistency permuter"); +extern BOOL_VAR_H(permute_fixed_length_dawg, 0, + "Turn on fixed-length phrasebook search permuter"); + extern BOOL_VAR_H(segment_segcost_rating, 0, "incorporate segmentation cost in word rating?"); @@ -57,17 +56,26 @@ extern double_VAR_H(segment_reward_script, 0.95, "Being a 'reward' factor, it should be <= 1. " "Smaller value implies bigger reward."); -extern double_VAR_H(segment_penalty_garbage, 1.5, - "Score multiplier for poorly cased strings that are not " - "in the dictionary and generally look like garbage " - "(lower is better)."); +extern BOOL_VAR_H(permute_chartype_word, 0, + "Turn on character type (property) consistency permuter"); +extern double_VAR_H(segment_reward_chartype, 0.97, + "Score multipler for char type consistency within a word. "); + +extern double_VAR_H(segment_reward_ngram_best_choice, 0.99, + "Score multipler for ngram permuter's best choice" + " (only used in the Han script path)."); -extern double_VAR_H(segment_penalty_dict_nonword, 1.25, - "Score multiplier for glyph fragment segmentations which " - "do not match a dictionary word (lower is better)."); +extern BOOL_VAR_H(ngram_permuter_activated, false, + "Activate character-level n-gram-based permuter"); + +extern INT_VAR_H(max_permuter_attempts, 100000, + "Maximum number of different character choices to consider" + " during permutation. This limit is especially useful when" + " user patterns are specified, since overly generic patterns" + " can result in dawg search exploring an overly large number" + "of options."); extern int permute_only_top; -extern float wordseg_rating_adjust_factor; /*---------------------------------------------------------------------- F u n c t i o n s @@ -82,4 +90,62 @@ const char* choose_il1(const char *first_char, //first choice const char *next_char, //next in word const char *next_next_char); +namespace tesseract { + +// This is an awkward solution to allow "compounding" of permuter effects. +// Right now, each permuter generates a WERD_CHOICE with some modified +// rating which is compared to the current best choice, and the winner +// is saved. Therefore, independent permuter improvements, eg. from script +// consistency, dictionary check, and punctuation promoting, override each +// other and can not be combined. +// We need a trellis and someway to modify the path cost. Instead, we +// approximate by saving a permutation string, which records the preferred +// char choice [0-9] at each position [0..#chunks], and a cumulative reward +// factor. Non-conflicting changes can be accumulated and the combined +// result will be returned. +// Default_bias is the initial value for the base multiplier. In other words, +// it is the multiplier for raw choice rating if nothing is modified. +// This would be 1.0 when used with reward-based permuters in CJK-path, +// but it could be > 1 (eg. segment_penalty_garbage) to be compatible with +// penalty-based permuters in the Latin path. +// Note this class does not handle fragmented characters. It does so by +// setting the preferred position of fragmented characters to '1' at Init, +// which effectively skips the fragment choice. However, it can still be +// overridden if collision is allowed. It is the responsibility of the +// permuters to avoid permuting fragmented characters. +class PermuterState { + public: + PermuterState(); + + void Init(const BLOB_CHOICE_LIST_VECTOR& char_choices, + const UNICHARSET &unicharset, + float default_bias, + bool debug); + + void AddPreference(int start_pos, char* pos_str, float weight); + + void AddPreference(int char_pos, BLOB_CHOICE* blob_choice, float weight); + + WERD_CHOICE* GetPermutedWord(float *certainties, float *adjust_factor); + + void set_allow_collision(bool flag) { allow_collision_ = flag; } + void set_adjust_factor(float factor) { adjust_factor_ = factor; } + void set_debug(bool debug) { debug_ = debug; } + bool position_marked(int pos) { return perm_state_[pos] != kPosFree; } + + private: + static const char kPosFree = '.'; + + const BLOB_CHOICE_LIST_VECTOR *char_choices_; // reference pointer only + // does not need to be allocated or freed + char perm_state_[MAX_PERM_LENGTH]; // handles upto MAX_PERM_LENGTH-1 states + // stores preferred char choices, '0'..'9', or '.' + int word_length_; // the number of char positions in the word + bool allow_collision_; // can previously set preference to be overwritten? + float adjust_factor_; // multiplying factor for rating adjustment + bool debug_; // whether debug statements should be printed +}; + +} // namespace tesseract + #endif diff --git a/dict/states.cpp b/dict/states.cpp index 1abf5cce80..35a06477ed 100644 --- a/dict/states.cpp +++ b/dict/states.cpp @@ -27,15 +27,12 @@ ----------------------------------------------------------------------*/ #include "states.h" #include "structures.h" -#include "tordvars.h" #include "callcpp.h" /*------------------------------------------------------------------------- Variables --------------------------------------------------------------------------*/ -#define STATEBLOCK 100 /* Cells per block */ -makestructure (newstate, free_state, printstate, STATE, -freestate, STATEBLOCK, "STATE", statecount); +makestructure(newstate, free_state, STATE); /*---------------------------------------------------------------------- F u n c t i o n s @@ -104,9 +101,6 @@ void bin_to_pieces(STATE *state, int num_joints, PIECES_STATE pieces) { unsigned int mask; /* Bit mask */ inT16 num_pieces = 0; /* Preset mask */ - if (tord_debug_8) - print_state ("bin_to_pieces = ", state, num_joints); - mask = ((num_joints > 32) ? (1 << (num_joints - 1 - 32)) : (1 << (num_joints - 1))); @@ -120,8 +114,6 @@ void bin_to_pieces(STATE *state, int num_joints, PIECES_STATE pieces) { ((state->part2 & mask) ? TRUE : FALSE) : ((state->part1 & mask) ? TRUE : FALSE)) { pieces[++num_pieces] = 0; - if (tord_debug_8) - cprintf ("[%d]=%d ", num_pieces - 1, pieces[num_pieces - 1]); } /* Next mask value */ mask = ((mask == 1) ? (1 << 31) : (mask >> 1)); @@ -129,8 +121,6 @@ void bin_to_pieces(STATE *state, int num_joints, PIECES_STATE pieces) { pieces[num_pieces]++; pieces[++num_pieces] = 0; ASSERT_HOST (num_pieces < MAX_NUM_CHUNKS + 2); - if (tord_debug_8) - new_line(); } @@ -269,116 +259,3 @@ void set_n_ones(STATE *state, int n) { state->part1 >>= 64 - n; } } - - -/** - * compare_states - * - * Compare the 2 states at the given blob index. Return 1 if the given - * blob is a fragment compared to reality, 2 if correct, 4 if a join, - * and 5 if both a join and a fragment. - * On return the blob index is set to the corresponding index in the - * correct string. - */ -int compare_states(STATE *true_state, STATE *this_state, int *blob_index) { - int blob_count; //number found - int true_index; //index of true blob - int index; //current - int result = 0; //return value - uinT32 mask; - - if (true_state->part1 == this_state->part1 - && true_state->part2 == this_state->part2) - return 2; - if (*blob_index == 0) { - if (bits_in_states > 32) { - for (mask = 1 << (bits_in_states - 33); mask != 0; mask >>= 1) { - if (this_state->part1 & mask) { - if (true_state->part1 & mask) - return 2; - else - return 1; - } - else if (true_state->part1 & mask) - return 4; - } - index = 31; - } - else - index = bits_in_states - 1; - for (mask = 1 << index; mask != 0; mask >>= 1) { - if (this_state->part2 & mask) { - if (true_state->part2 & mask) - return 2; - else - return 1; - } - else if (true_state->part2 & mask) - return 4; - } - return 2; - } - else { - blob_count = 0; - true_index = 0; - if (bits_in_states > 32) { - for (mask = 1 << (bits_in_states - 33); mask != 0; mask >>= 1) { - if (true_state->part1 & mask) - true_index++; - if (this_state->part1 & mask) { - blob_count++; - if (blob_count == *blob_index) { - if ((true_state->part1 & mask) == 0) - result = 1; - break; - } - } - } - if (blob_count == *blob_index) { - for (mask >>= 1; mask != 0; mask >>= 1) { - if (this_state->part1 & mask) { - if ((true_state->part1 & mask) && result == 0) - return 2; - else - return result | 1; - } - else if (true_state->part1 & mask) - result |= 4; - } - } - index = 31; - } - else - index = bits_in_states - 1; - mask = 1 << index; - if (blob_count < *blob_index) { - for (; mask != 0; mask >>= 1) { - if (true_state->part2 & mask) - true_index++; - if (this_state->part2 & mask) { - blob_count++; - if (blob_count == *blob_index) { - if ((true_state->part2 & mask) == 0) - result = 1; - break; - } - } - } - if (blob_count != *blob_index) - return 2; - mask >>= 1; - } - *blob_index = true_index; - for (; mask != 0; mask >>= 1) { - if (this_state->part2 & mask) { - if ((true_state->part2 & mask) && result == 0) - return 2; - else - return result | 1; - } - else if (true_state->part2 & mask) - result |= 4; - } - return result == 0 ? 2 : result; - } -} diff --git a/dict/states.h b/dict/states.h index 877ab7c4d1..a478c39bae 100644 --- a/dict/states.h +++ b/dict/states.h @@ -28,7 +28,7 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ -#include "general.h" +#include "host.h" /*---------------------------------------------------------------------- T y p e s @@ -66,47 +66,6 @@ void print_state(const char *label, STATE *state, int num_joints); void set_n_ones(STATE *state, int n); -int compare_states(STATE *true_state, STATE *this_state, int *blob_index); - extern void free_state(STATE *); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* states.c -void insert_new_chunk - _ARGS((STATE *state, - int index)); - -SEARCH_STATE bin_to_chunks - _ARGS((STATE *state, - int num_joints)); - -STATE *new_state - _ARGS((STATE *oldstate)); - -int ones_in_state - _ARGS((STATE *state, - int num_joints)); - -void print_state - _ARGS((char *label, - STATE *state, - int num_joints)); - -void set_n_ones - _ARGS((STATE *state, - int n)); -int compare_states - _ARGS(( -STATE *true_state, -STATE *this_state, -int* blob_index)); - -#undef _ARGS -*/ #endif diff --git a/dict/stopper.cpp b/dict/stopper.cpp index 379b85d7cc..319da67126 100644 --- a/dict/stopper.cpp +++ b/dict/stopper.cpp @@ -15,22 +15,19 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ + #include "stopper.h" #include "emalloc.h" #include "matchdefs.h" #include "callcpp.h" #include "permute.h" -#include "context.h" #include "danerror.h" #include "const.h" #include "freelist.h" #include "efio.h" #include "scanutils.h" #include "unichar.h" -#include "varable.h" +#include "params.h" #include "dict.h" #include "image.h" #include "ccutil.h" @@ -61,108 +58,91 @@ typedef struct UNICHAR_ID ChunkClass[MAX_NUM_CHUNKS]; } EXPANDED_CHOICE; -/**---------------------------------------------------------------------------- - Macros -----------------------------------------------------------------------------**/ -#define BestCertainty(Choices) (((VIABLE_CHOICE) first_node (Choices))->Certainty) -#define BestRating(Choices) (((VIABLE_CHOICE) first_node (Choices))->Rating) -#define BestFactor(Choices) (((VIABLE_CHOICE) first_node (Choices))->AdjustFactor) - -#define AmbigThreshold(F1,F2) (((F2) - (F1)) * stopper_ambiguity_threshold_gain - \ - stopper_ambiguity_threshold_offset) - -/*--------------------------------------------------------------------------- - Private Function Prototoypes -----------------------------------------------------------------------------*/ -void AddNewChunk(VIABLE_CHOICE Choice, int Blob); - -int CmpChoiceRatings(void *arg1, //VIABLE_CHOICE Choice1, - void *arg2); //VIABLE_CHOICE Choice2); - -void ExpandChoice(VIABLE_CHOICE Choice, EXPANDED_CHOICE *ExpandedChoice); - -int FreeBadChoice(void *item1, //VIABLE_CHOICE Choice, - void *item2); //EXPANDED_CHOICE *BestChoice); +#define BestCertainty(Choices) \ + (((VIABLE_CHOICE) first_node (Choices))->Certainty) -int UniformCertainties(const BLOB_CHOICE_LIST_VECTOR &Choices, - const WERD_CHOICE &BestChoice); - -/**---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------**/ -double_VAR(certainty_scale, 20.0, "Certainty scaling factor"); - -double_VAR(stopper_nondict_certainty_base, -2.50, - "Certainty threshold for non-dict words"); - -double_VAR(stopper_phase2_certainty_rejection_offset, 1.0, - "Reject certainty offset"); - -INT_VAR(stopper_smallword_size, 2, - "Size of dict word to be treated as non-dict word"); - -double_VAR(stopper_certainty_per_char, -0.50, - "Certainty to add for each dict char above small word size."); - -double_VAR(stopper_allowable_character_badness, 3.0, - "Max certaintly variation allowed in a word (in sigma)"); - -INT_VAR(stopper_debug_level, 0, "Stopper debug level"); +#define BestRating(Choices) (((VIABLE_CHOICE) first_node (Choices))->Rating) -double_VAR(stopper_ambiguity_threshold_gain, 8.0, - "Gain factor for ambiguity threshold"); +#define BestFactor(Choices) \ + (((VIABLE_CHOICE) first_node (Choices))->AdjustFactor) -double_VAR(stopper_ambiguity_threshold_offset, 1.5, - "Certainty offset for ambiguity threshold"); +#define AmbigThreshold(F1,F2) \ + (((F2) - (F1)) * tesseract::Dict::kStopperAmbiguityThresholdGain - \ + tesseract::Dict::kStopperAmbiguityThresholdOffset) -BOOL_VAR(stopper_no_acceptable_choices, false, - "Make AcceptableChoice() always return false. Useful" - " when there is a need to explore all segmentations"); +/**---------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------**/ +// Returns -1 if the rating for Choice1 is less than the rating for Choice2, +// otherwise returns 1. +static int CmpChoiceRatings(void *arg1, // VIABLE_CHOICE Choice1 + void *arg2) { // VIABLE_CHOICE Choice2 + float R1, R2; + VIABLE_CHOICE Choice1 = (VIABLE_CHOICE) arg1; + VIABLE_CHOICE Choice2 = (VIABLE_CHOICE) arg2; + R1 = Choice1->Rating; + R2 = Choice2->Rating; + return (R1 < R2) ? -1 : 1; +} -BOOL_VAR(save_raw_choices, false, "Save all explored raw choices"); +// Expands Choice and places the results in ExpandedChoice. The primary +// function of expansion is to create an two arrays, one which holds the +// corresponding certainty for each chunk in Choice, and one which holds +// the class for each chunk. +static void ExpandChoice(VIABLE_CHOICE Choice, + EXPANDED_CHOICE *ExpandedChoice) { + int i, j, Chunk; + ExpandedChoice->Choice = Choice; + for (i = 0, Chunk = 0; i < Choice->Length; i++) + for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) { + ExpandedChoice->ChunkCertainty[Chunk] = Choice->Blob[i].Certainty; + ExpandedChoice->ChunkClass[Chunk] = Choice->Blob[i].Class; + } +} -INT_VAR (tessedit_truncate_wordchoice_log, 10, "Max words to keep in list"); +// If the certainty of any chunk in Choice (item1) is not ambiguous with the +// corresponding chunk in the best choice (item2), frees Choice and +// returns true. +static int FreeBadChoice(void *item1, // VIABLE_CHOICE Choice, + void *item2) { // EXPANDED_CHOICE *BestChoice + int i, j, Chunk; + FLOAT32 Threshold; + VIABLE_CHOICE Choice = reinterpret_cast(item1); + EXPANDED_CHOICE *BestChoice = reinterpret_cast(item2); + Threshold = AmbigThreshold(BestChoice->Choice->AdjustFactor, + Choice->AdjustFactor); + for (i = 0, Chunk = 0; i < Choice->Length; i++) { + for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++){ + if (Choice->Blob[i].Class != BestChoice->ChunkClass[Chunk] && + Choice->Blob[i].Certainty - BestChoice->ChunkCertainty[Chunk] < + Threshold) { + memfree(Choice); + return true; + } + } + } + return false; +} -STRING_VAR(word_to_debug, "", "Word for which stopper debug information" - " should be printed to stdout"); +namespace tesseract { -STRING_VAR(word_to_debug_lengths, "", "Lengths of unichars in word_to_debug"); +const float Dict::kStopperAmbiguityThresholdGain = 8.0; +const float Dict::kStopperAmbiguityThresholdOffset = 1.5; -/**---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -namespace tesseract { -int Dict::AcceptableChoice(BLOB_CHOICE_LIST_VECTOR *Choices, - WERD_CHOICE *BestChoice, - const WERD_CHOICE &RawChoice, - DANGERR *fixpt, - ACCEPTABLE_CHOICE_CALLER caller, - bool *modified_blobs) { -/* - ** Parameters: - ** Choices choices for current segmentation - ** BestChoice best choice for current segmentation - ** RawChoice best raw choice for current segmentation - ** Variables Used: - ** stopper_nondict_certainty_base certainty for a non-dict word - ** stopper_smallword_size size of word to be treated as non-word - ** stopper_certainty_per_char certainty to add for each dict char - ** Operation: Return TRUE if the results from this segmentation are - ** good enough to stop. Otherwise return FALSE. - ** Return: TRUE or FALSE. - ** Exceptions: none - ** History: Mon Apr 29 14:57:32 1991, DSJ, Created. - */ +bool Dict::AcceptableChoice(BLOB_CHOICE_LIST_VECTOR *Choices, + WERD_CHOICE *BestChoice, + DANGERR *fixpt, + ACCEPTABLE_CHOICE_CALLER caller, + bool *modified_blobs) { float CertaintyThreshold = stopper_nondict_certainty_base; int WordSize; + if (modified_blobs != NULL) *modified_blobs = false; if (stopper_no_acceptable_choices) return false; - if (fixpt != NULL) - fixpt->index = -1; + if (fixpt != NULL) fixpt->clear(); if (BestChoice->length() == 0) - return (FALSE); + return false; if (caller == CHOPPER_CALLER && BestChoice->fragment_mark()) { if (stopper_debug_level >= 1) { cprintf("AcceptableChoice(): a choice with fragments beats BestChoice"); @@ -170,17 +150,21 @@ int Dict::AcceptableChoice(BLOB_CHOICE_LIST_VECTOR *Choices, return false; } - bool no_dang_ambigs = - NoDangerousAmbig(BestChoice, fixpt, true, Choices, modified_blobs); + bool no_dang_ambigs = (GetMaxFixedLengthDawgIndex() >= 0 || + NoDangerousAmbig(BestChoice, fixpt, true, + Choices, modified_blobs)); + bool is_valid_word = valid_word_permuter(BestChoice->permuter(), false); + bool is_case_ok = case_ok(*BestChoice, getUnicharset()); if (stopper_debug_level >= 1) tprintf("\nStopper: %s (word=%c, case=%c)\n", BestChoice->debug_string(getUnicharset()).string(), - (valid_word(*BestChoice) ? 'y' : 'n'), - (Context::case_ok(*BestChoice, getUnicharset()) ? 'y' : 'n')); + (is_valid_word ? 'y' : 'n'), + (is_case_ok ? 'y' : 'n')); - if (valid_word(*BestChoice) && - Context::case_ok(*BestChoice, getUnicharset())) { + // Do not accept invalid words in PASS1. + if (reject_offset_ <= 0.0f && !is_valid_word) return false; + if (is_valid_word && is_case_ok) { WordSize = LengthOfShortestAlphaRun(*BestChoice); WordSize -= stopper_smallword_size; if (WordSize < 0) @@ -195,33 +179,20 @@ int Dict::AcceptableChoice(BLOB_CHOICE_LIST_VECTOR *Choices, if (no_dang_ambigs && BestChoice->certainty() > CertaintyThreshold && UniformCertainties(*Choices, *BestChoice)) { - return (TRUE); + return true; } else { - return (FALSE); + if (stopper_debug_level >= 2) { + tprintf("AcceptableChoice() returned false" + " (no_dang_ambig:%d cert:%g thresh:%g uniform:%d)\n", + no_dang_ambigs, BestChoice->certainty(), + CertaintyThreshold, + UniformCertainties(*Choices, *BestChoice)); + } + return false; } -} /* AcceptableChoice */ - - -/*---------------------------------------------------------------------------*/ -int Dict::AcceptableResult(const WERD_CHOICE &BestChoice, - const WERD_CHOICE &RawChoice) { -/* - ** Parameters: - ** BestChoice best choice for current word - ** RawChoice best raw choice for current word - ** Variables Used: - ** stopper_nondict_certainty_base certainty for a non-dict word - ** stopper_smallword_size size of word to be treated as non-word - ** stopper_certainty_per_char certainty to add for each dict char - ** best_choices_ list of all good choices found - ** reject_offset_ allowed offset before a word is rejected - ** Operation: Return FALSE if the best choice for the current word - ** is questionable and should be tried again on the second - ** pass or should be flagged to the user. - ** Return: TRUE or FALSE. - ** Exceptions: none - ** History: Thu May 9 14:05:05 1991, DSJ, Created. - */ +} + +bool Dict::AcceptableResult(const WERD_CHOICE &BestChoice) { float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_; int WordSize; @@ -229,20 +200,19 @@ int Dict::AcceptableResult(const WERD_CHOICE &BestChoice, tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c)\n", BestChoice.debug_string(getUnicharset()).string(), (valid_word(BestChoice) ? 'y' : 'n'), - (Context::case_ok(BestChoice, getUnicharset()) ? 'y' : 'n'), - ((rest (best_choices_) != NIL) ? 'n' : 'y')); + (case_ok(BestChoice, getUnicharset()) ? 'y' : 'n'), + ((list_rest (best_choices_) != NIL_LIST) ? 'n' : 'y')); } if (BestChoice.length() == 0 || CurrentWordAmbig()) - return (FALSE); + return false; if (BestChoice.fragment_mark()) { if (stopper_debug_level >= 1) { cprintf("AcceptableResult(): a choice with fragments beats BestChoice\n"); } return false; } - if (valid_word(BestChoice) && - Context::case_ok(BestChoice, getUnicharset())) { + if (valid_word(BestChoice) && case_ok(BestChoice, getUnicharset())) { WordSize = LengthOfShortestAlphaRun(BestChoice); WordSize -= stopper_smallword_size; if (WordSize < 0) @@ -258,115 +228,47 @@ int Dict::AcceptableResult(const WERD_CHOICE &BestChoice, !stopper_no_acceptable_choices) { if (stopper_debug_level >= 1) cprintf("ACCEPTED\n"); - return (TRUE); + return true; } else { if (stopper_debug_level >= 1) cprintf("REJECTED\n"); - return (FALSE); + return false; } -} /* AcceptableResult */ - - -/*---------------------------------------------------------------------------*/ -int Dict::AlternativeChoicesWorseThan(FLOAT32 Threshold) { -/* - ** Parameters: - ** Threshold minimum adjust factor for alternative choices - ** Variables Used: - ** best_choices_ alternative choices for current word - ** Operation: This routine returns TRUE if there are no alternative - ** choices for the current word OR if all alternatives have - ** an adjust factor worse than Threshold. - ** Return: TRUE or FALSE. - ** Exceptions: none - ** History: Mon Jun 3 09:36:31 1991, DSJ, Created. - */ +} + +bool Dict::AlternativeChoicesWorseThan(FLOAT32 Threshold) { LIST Alternatives; VIABLE_CHOICE Choice; - - Alternatives = rest (best_choices_); + Alternatives = list_rest (best_choices_); iterate(Alternatives) { Choice = (VIABLE_CHOICE) first_node (Alternatives); if (Choice->AdjustFactor <= Threshold) - return (FALSE); + return false; } + return true; +} - return (TRUE); - -} /* AlternativeChoicesWorseThan */ - - -/*---------------------------------------------------------------------------*/ -int Dict::CurrentBestChoiceIs(const WERD_CHOICE &WordChoice) { -/* - ** Parameters: - ** Word word that will be compared to the best choice - ** Variables Used: - ** best_choices_ set of best choices for current word - ** Operation: Returns TRUE if Word is the same as the current best - ** choice, FALSE otherwise. - ** Return: TRUE or FALSE - ** Exceptions: none - ** History: Thu May 30 14:44:22 1991, DSJ, Created. - */ - return (best_choices_ != NIL && +bool Dict::CurrentBestChoiceIs(const WERD_CHOICE &WordChoice) { + return (best_choices_ != NIL_LIST && StringSameAs(WordChoice, (VIABLE_CHOICE)first_node(best_choices_))); -} /* CurrentBestChoiceIs */ - +} -/*---------------------------------------------------------------------------*/ FLOAT32 Dict::CurrentBestChoiceAdjustFactor() { -/* - ** Parameters: none - ** Variables Used: - ** best_choices_ set of best choices for current word - ** Operation: Return the adjustment factor for the best choice for - ** the current word. - ** Return: Adjust factor for current best choice. - ** Exceptions: none - ** History: Thu May 30 14:48:24 1991, DSJ, Created. - */ VIABLE_CHOICE BestChoice; - - if (best_choices_ == NIL) + if (best_choices_ == NIL_LIST) return (MAX_FLOAT32); - BestChoice = (VIABLE_CHOICE) first_node (best_choices_); return (BestChoice->AdjustFactor); +} -} /* CurrentBestChoiceAdjustFactor */ - - -/*---------------------------------------------------------------------------*/ -int Dict::CurrentWordAmbig() { -/* - ** Parameters: none - ** Variables Used: - ** best_choices_ set of best choices for current word - ** Operation: This routine returns TRUE if there are multiple good - ** choices for the current word and FALSE otherwise. - ** Return: TRUE or FALSE - ** Exceptions: none - ** History: Wed May 22 15:38:38 1991, DSJ, Created. - */ - return (rest (best_choices_) != NIL); -} /* CurrentWordAmbig */ +bool Dict::CurrentWordAmbig() { + return (list_rest (best_choices_) != NIL_LIST); +} -/*---------------------------------------------------------------------------*/ void Dict::DebugWordChoices() { -/* - ** Parameters: none - ** Variables Used: - ** best_raw_choice_ - ** best_choices_ - ** Operation: Print the current choices for this word to stdout. - ** Return: none - ** Exceptions: none - ** History: Wed May 15 13:52:08 1991, DSJ, Created. - */ LIST Choices; int i; char LabelString[80]; @@ -392,9 +294,8 @@ void Dict::DebugWordChoices() { i++; } } -} /* DebugWordChoices */ +} -// Print all the choices in raw_choices_ list for non 1-1 ambiguities. void Dict::PrintAmbigAlternatives(FILE *file, const char *label, int label_num_unichars) { iterate(raw_choices_) { @@ -412,68 +313,30 @@ void Dict::PrintAmbigAlternatives(FILE *file, const char *label, } } -/*---------------------------------------------------------------------------*/ void Dict::FilterWordChoices() { -/* - ** Parameters: none - ** Variables Used: - ** best_choices_ set of choices for current word - ** Operation: This routine removes from best_choices_ all choices which - ** are not within a reasonable range of the best choice. - ** Return: none - ** Exceptions: none - ** History: Wed May 15 13:08:24 1991, DSJ, Created. - */ EXPANDED_CHOICE BestChoice; - if (best_choices_ == NIL || second_node (best_choices_) == NIL) + if (best_choices_ == NIL_LIST || second_node (best_choices_) == NIL_LIST) return; - /* compute certainties and class for each chunk in best choice */ + // Compute certainties and class for each chunk in best choice. ExpandChoice((VIABLE_CHOICE_STRUCT *)first_node(best_choices_), &BestChoice); - set_rest (best_choices_, delete_d (rest (best_choices_), - &BestChoice, FreeBadChoice)); - -} /* FilterWordChoices */ + set_rest (best_choices_, delete_d(list_rest (best_choices_), + &BestChoice, FreeBadChoice)); +} -/*---------------------------------------------------------------------------*/ void Dict::FindClassifierErrors(FLOAT32 MinRating, FLOAT32 MaxRating, FLOAT32 RatingMargin, FLOAT32 Thresholds[]) { -/* - ** Parameters: - ** MinRating limits how tight to make a template - ** MaxRating limits how loose to make a template - ** RatingMargin amount of margin to put in template - ** Thresholds[] place to put error thresholds - ** Operation: This routine compares the best choice for the current - ** word to the best raw choice to determine which characters - ** were classified incorrectly by the classifier. It then - ** places a separate threshold into Thresholds for each - ** character in the word. If the classifier was correct, - ** MaxRating is placed into Thresholds. If the - ** classifier was incorrect, the avg. match rating (error - ** percentage) of the classifier's incorrect choice minus - ** some margin is - ** placed into thresholds. This can then be used by the - ** caller to try to create a new template for the desired - ** class that will classify the character with a rating better - ** than the threshold value. The match rating placed into - ** Thresholds is never allowed to be below MinRating in order - ** to prevent trying to make overly tight templates. - ** Return: none (results are placed in Thresholds) - ** Exceptions: none - ** History: Fri May 31 16:02:57 1991, DSJ, Created. - */ EXPANDED_CHOICE BestRaw; VIABLE_CHOICE Choice; int i, j, Chunk; FLOAT32 AvgRating; int NumErrorChunks; - assert (best_choices_ != NIL); + assert (best_choices_ != NIL_LIST); assert (best_raw_choice_ != NULL); ExpandChoice(best_raw_choice_, &BestRaw); @@ -502,19 +365,9 @@ void Dict::FindClassifierErrors(FLOAT32 MinRating, if (*Thresholds < MinRating) *Thresholds = MinRating; } -} /* FindClassifierErrors */ - +} -/*---------------------------------------------------------------------------*/ void Dict::InitChoiceAccum() { -/* - ** Parameters: none - ** Operation: This routine initializes the data structures used to - ** keep track the good word choices found for a word. - ** Return: none - ** Exceptions: none - ** History: Fri May 17 07:59:00 1991, DSJ, Created. - */ BLOB_WIDTH *BlobWidth, *End; if (best_raw_choice_) @@ -523,11 +376,11 @@ void Dict::InitChoiceAccum() { if (best_choices_) destroy_nodes(best_choices_, memfree); - best_choices_ = NIL; + best_choices_ = NIL_LIST; if (raw_choices_) destroy_nodes(raw_choices_, memfree); - raw_choices_ = NIL; + raw_choices_ = NIL_LIST; EnableChoiceAccum(); @@ -535,52 +388,24 @@ void Dict::InitChoiceAccum() { End = current_segmentation_ + MAX_NUM_CHUNKS; BlobWidth < End; *BlobWidth++ = 1); -} /* InitChoiceAccum */ +} +void Dict::ClearBestChoiceAccum() { + if (best_choices_) destroy_nodes(best_choices_, memfree); + best_choices_ = NIL_LIST; +} -/*---------------------------------------------------------------------------*/ void Dict::LogNewSegmentation(PIECES_STATE BlobWidth) { -/* - ** Parameters: - ** BlobWidth[] number of chunks in each blob in segmentation - ** Variables Used: - ** current_segmentation blob widths for current segmentation - ** Operation: This routine updates the blob widths in current_segmentation - ** to be the same as provided in BlobWidth. - ** Return: none - ** Exceptions: none - ** History: Mon May 20 11:52:26 1991, DSJ, Created. - */ BLOB_WIDTH *Segmentation; - for (Segmentation = current_segmentation_; *BlobWidth != 0; BlobWidth++, Segmentation++) *Segmentation = *BlobWidth; *Segmentation = 0; +} -} /* LogNewSegmentation */ - - -/*---------------------------------------------------------------------------*/ void Dict::LogNewSplit(int Blob) { -/* - ** Parameters: - ** Blob index of blob that was split - ** Variables Used: - ** best_raw_choice_ current best raw choice - ** best_choices_ list of best choices found so far - ** Operation: This routine adds 1 chunk to the specified blob for each - ** choice in best_choices_ and for the best_raw_choice_. - ** Return: none - ** Exceptions: none - ** History: Mon May 20 11:38:56 1991, DSJ, Created. - */ LIST Choices; - - if (best_raw_choice_) { - AddNewChunk(best_raw_choice_, Blob); - } - + if (best_raw_choice_) AddNewChunk(best_raw_choice_, Blob); Choices = best_choices_; iterate(Choices) { AddNewChunk ((VIABLE_CHOICE) first_node (Choices), Blob); @@ -589,31 +414,12 @@ void Dict::LogNewSplit(int Blob) { iterate(Choices) { AddNewChunk ((VIABLE_CHOICE) first_node (Choices), Blob); } -} /* LogNewSplit */ - +} -/*---------------------------------------------------------------------------*/ -void Dict::LogNewChoice(const WERD_CHOICE &WordChoice, - FLOAT32 AdjustFactor, +void Dict::LogNewChoice(FLOAT32 AdjustFactor, const float Certainties[], - bool raw_choice) { -/* - ** Parameters: - ** Choice new choice for current word - ** AdjustFactor adjustment factor which was applied to choice - ** Certainties certainties for each char in new choice - ** ChoicesList list with choices seen so far - ** Variables Used: - ** best_raw_choice_ best raw choice so far for current word - ** Operation: This routine adds Choice to ChoicesList if the - ** adjusted certainty for Choice is within a reasonable range - ** of the best choice in ChoicesList. The ChoicesList - ** list is kept in sorted order by rating. Duplicates are - ** removed. - ** Return: none - ** Exceptions: none - ** History: Wed May 15 09:57:19 1991, DSJ, Created. - */ + bool raw_choice, + WERD_CHOICE *WordChoice) { VIABLE_CHOICE NewChoice; LIST ChoicesList; LIST Choices; @@ -624,15 +430,15 @@ void Dict::LogNewChoice(const WERD_CHOICE &WordChoice, if (raw_choice) { if (!best_raw_choice_) - best_raw_choice_ = NewViableChoice(WordChoice, AdjustFactor, Certainties); - else if (WordChoice.rating() < best_raw_choice_->Rating) { - if (ChoiceSameAs(WordChoice, best_raw_choice_)) - FillViableChoice(WordChoice, AdjustFactor, Certainties, true, + best_raw_choice_ = NewViableChoice(*WordChoice, AdjustFactor, Certainties); + else if (WordChoice->rating() < best_raw_choice_->Rating) { + if (ChoiceSameAs(*WordChoice, best_raw_choice_)) + FillViableChoice(*WordChoice, AdjustFactor, Certainties, true, best_raw_choice_); else { memfree(best_raw_choice_); best_raw_choice_ = - NewViableChoice(WordChoice, AdjustFactor, Certainties); + NewViableChoice(*WordChoice, AdjustFactor, Certainties); } } if (!save_raw_choices) return; @@ -641,22 +447,31 @@ void Dict::LogNewChoice(const WERD_CHOICE &WordChoice, ChoicesList = best_choices_; } - /* throw out obviously bad choices to save some work */ - if (ChoicesList != NIL) { + // Throw out obviously bad choices to save some work. + if (ChoicesList != NIL_LIST) { Threshold = AmbigThreshold (BestFactor (ChoicesList), AdjustFactor); - if (Threshold > -stopper_ambiguity_threshold_offset) - Threshold = -stopper_ambiguity_threshold_offset; - if (WordChoice.certainty() - BestCertainty (ChoicesList) < Threshold) + if (Threshold > -kStopperAmbiguityThresholdOffset) + Threshold = -kStopperAmbiguityThresholdOffset; + if (WordChoice->certainty() - BestCertainty (ChoicesList) < Threshold) { + // Set the rating of the word to be terrible, so that it does not + // get chosen as the best choice. + if (stopper_debug_level >= 2) { + tprintf("Discarding a choice with an overly low certainty" + " %.4f vs best choice certainty %.4f\n", + WordChoice->certainty(), BestCertainty(ChoicesList)); + } + WordChoice->set_rating(WERD_CHOICE::kBadRating); return; + } } - /* see if a choice with the same text string has already been found */ + // See if a choice with the same text string has already been found. NewChoice = NULL; Choices = ChoicesList; iterate(Choices) { - if (ChoiceSameAs (WordChoice, (VIABLE_CHOICE) first_node (Choices))) { - if (WordChoice.rating() < BestRating (Choices)) { + if (ChoiceSameAs (*WordChoice, (VIABLE_CHOICE) first_node (Choices))) { + if (WordChoice->rating() < BestRating (Choices)) { NewChoice = (VIABLE_CHOICE) first_node (Choices); } else { return; @@ -665,11 +480,11 @@ void Dict::LogNewChoice(const WERD_CHOICE &WordChoice, } if (NewChoice) { - FillViableChoice(WordChoice, AdjustFactor, Certainties, true, NewChoice); + FillViableChoice(*WordChoice, AdjustFactor, Certainties, true, NewChoice); ChoicesList = delete_d(ChoicesList, NewChoice, is_same_node); } else { - NewChoice = NewViableChoice (WordChoice, AdjustFactor, Certainties); + NewChoice = NewViableChoice (*WordChoice, AdjustFactor, Certainties); } ChoicesList = s_adjoin (ChoicesList, NewChoice, CmpChoiceRatings); @@ -679,8 +494,8 @@ void Dict::LogNewChoice(const WERD_CHOICE &WordChoice, if (count (ChoicesList) > tessedit_truncate_wordchoice_log) { Choices = (LIST) nth_cell (ChoicesList, tessedit_truncate_wordchoice_log); - destroy_nodes (rest (Choices), Efree); - set_rest(Choices, NIL); + destroy_nodes (list_rest (Choices), Efree); + set_rest(Choices, NIL_LIST); } // Update raw_choices_/best_choices_ pointer. @@ -689,15 +504,13 @@ void Dict::LogNewChoice(const WERD_CHOICE &WordChoice, } else { best_choices_ = ChoicesList; } -} /* LogNewChoice */ - +} -/*---------------------------------------------------------------------------*/ -int Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, - DANGERR *fix_pt, - bool fix_replaceable, - BLOB_CHOICE_LIST_VECTOR *blob_choices, - bool *modified_blobs) { +bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, + DANGERR *fixpt, + bool fix_replaceable, + BLOB_CHOICE_LIST_VECTOR *blob_choices, + bool *modified_blobs) { if (stopper_debug_level > 2) { tprintf("\nRunning NoDangerousAmbig() for %s\n", best_choice->debug_string(getUnicharset()).string()); @@ -724,8 +537,8 @@ int Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, // // Note that during the execution of the for loop (on the first pass) // if replacements are made the length of best_choice might change. - for (int pass = 0; pass < 2; ++pass) { - bool replace = (pass == 0); + for (int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) { + bool replace = (fix_replaceable && pass == 0); const UnicharAmbigsVector &table = replace ? getUnicharAmbigs().replace_ambigs() : getUnicharAmbigs().dang_ambigs(); if (!replace) { @@ -737,14 +550,16 @@ int Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, BLOB_CHOICE_LIST *lst = new BLOB_CHOICE_LIST(); BLOB_CHOICE_IT lst_it(lst); lst_it.add_to_end(new BLOB_CHOICE(best_choice->unichar_id(i), - 0.0, 0.0, 0, -1)); + 0.0, 0.0, -1, -1, -1)); ambig_blob_choices.push_back(lst); } } UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1]; int wrong_ngram_index; int next_index; + int blob_index = 0; for (i = 0; i < best_choice->length(); ++i) { + if (i > 0) blob_index += best_choice->fragment_length(i-1); UNICHAR_ID curr_unichar_id = best_choice->unichar_id(i); if (stopper_debug_level > 2) { tprintf("Looking for %s ngrams starting with %s:\n", @@ -772,6 +587,19 @@ int Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, tprintf("comparison result: %d\n", compare); } if (compare == 0) { + // Record the place where we found an ambiguity. + if (fixpt != NULL) { + fixpt->push_back(DANGERR_INFO( + blob_index, blob_index+wrong_ngram_index, replace, + getUnicharset().get_isngram(ambig_spec->correct_ngram_id))); + if (stopper_debug_level > 1) { + tprintf("fixpt+=(%d %d %d %d)\n", blob_index, + blob_index+wrong_ngram_index, false, + getUnicharset().get_isngram( + ambig_spec->correct_ngram_id)); + } + } + if (replace) { if (stopper_debug_level > 2) { tprintf("replace ambiguity with: "); @@ -801,7 +629,8 @@ int Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, // choice is found. BLOB_CHOICE_IT bc_it(ambig_blob_choices[i+tmp_index]); bc_it.add_to_end(new BLOB_CHOICE( - ambig_spec->correct_fragments[tmp_index], -1.0, 0.0, 0, -1)); + ambig_spec->correct_fragments[tmp_index], -1.0, 0.0, + -1, -1, -1)); } } spec_it.forward(); @@ -834,9 +663,31 @@ int Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, } WERD_CHOICE *alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0); ambigs_found = (alt_word->rating() < 0.0); - if (ambigs_found && stopper_debug_level >= 1) { - tprintf ("Stopper: Possible ambiguous word = %s\n", - alt_word->debug_string(getUnicharset()).string()); + if (ambigs_found) { + if (stopper_debug_level >= 1) { + tprintf ("Stopper: Possible ambiguous word = %s\n", + alt_word->debug_string(getUnicharset()).string()); + } + if (fixpt != NULL) { + // Note: Currently character choices combined from fragments can only + // be generated by NoDangrousAmbigs(). This code should be updated if + // the capability to produce classifications combined from character + // fragments is added to other functions. + int orig_i = 0; + for (i = 0; i < alt_word->length(); ++i) { + if (alt_word->fragment_length(i) > 1) { + fixpt->push_back(DANGERR_INFO( + orig_i, orig_i+alt_word->fragment_length(i)-1, true, + getUnicharset().get_isngram(alt_word->unichar_id(i)))); + if (stopper_debug_level > 1) { + tprintf("fixpt->dangerous+=(%d %d %d %d)\n", orig_i, + (orig_i+alt_word->fragment_length(i)-1), true, + getUnicharset().get_isngram(alt_word->unichar_id(i))); + } + } + orig_i += alt_word->fragment_length(i); + } + } } delete alt_word; } @@ -846,56 +697,16 @@ int Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, void Dict::EndDangerousAmbigs() {} -/*---------------------------------------------------------------------------*/ void Dict::SettupStopperPass1() { -/* - ** Parameters: none - ** Variables Used: - ** reject_offset_ offset allowed before word is rejected - ** Operation: This routine performs any settup of stopper variables - ** that is needed in preparation for the first pass. - ** Return: none - ** Exceptions: none - ** History: Mon Jun 3 12:32:00 1991, DSJ, Created. - */ reject_offset_ = 0.0; -} /* SettupStopperPass1 */ - +} -/*---------------------------------------------------------------------------*/ void Dict::SettupStopperPass2() { -/* - ** Parameters: none - ** Variables Used: - ** reject_offset_ offset allowed before word is rejected - ** Operation: This routine performs any settup of stopper variables - ** that is needed in preparation for the second pass. - ** Return: none - ** Exceptions: none - ** History: Mon Jun 3 12:32:00 1991, DSJ, Created. - */ reject_offset_ = stopper_phase2_certainty_rejection_offset; -} /* SettupStopperPass2 */ -} // namespace tesseract - +} -/**---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -void AddNewChunk(VIABLE_CHOICE Choice, int Blob) { -/* - ** Parameters: - ** Choice choice to add a new chunk to - ** Blob index of blob being split - ** Operation: This routine increments the chunk count of the character - ** in Choice which corresponds to Blob. - ** Return: none - ** Exceptions: none - ** History: Mon May 20 11:43:27 1991, DSJ, Created. - */ +void Dict::AddNewChunk(VIABLE_CHOICE Choice, int Blob) { int i, LastChunk; - for (i = 0, LastChunk = 0; i < Choice->Length; i++) { LastChunk += Choice->Blob[i].NumChunks; if (Blob < LastChunk) { @@ -906,21 +717,9 @@ void AddNewChunk(VIABLE_CHOICE Choice, int Blob) { mem_tidy (1); cprintf ("AddNewChunk failed:Choice->Length=%d, LastChunk=%d, Blob=%d\n", Choice->Length, LastChunk, Blob); - assert(FALSE); /* this should never get executed */ - -} /* AddNewChunk */ - + assert(false); // this should never get executed +} -/*---------------------------------------------------------------------------*/ -namespace tesseract { -// Replaces the corresponding wrong ngram in werd_choice with the correct one. -// We indicate that this newly inserted ngram unichar is composed from several -// fragments and modify the corresponding entries in blob_choices to contain -// fragments of the correct ngram unichar instead of the original unichars. -// Ratings and certainties of entries in blob_choices and werd_choice are -// unichaged. E.g. for werd_choice mystring'' and ambiguity ''->": -// werd_choice becomes mystring", first ' in blob_choices becomes |"|0|2, -// second one is set to |"|1|2. void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, BLOB_CHOICE_LIST_VECTOR *blob_choices, @@ -987,216 +786,54 @@ void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, werd_choice->remove_unichar_id(wrong_ngram_begin_index); } } - if (stopper_debug_level >= 1) { - tprintf("ReplaceAmbigs() modified werd_choice: %s\n", - werd_choice->debug_string(getUnicharset()).string()); - werd_choice->print(); - if (modified_blobs != NULL && *modified_blobs && blob_choices != NULL) { + if (stopper_debug_level >= 1 && modified_blobs != NULL && + *modified_blobs && blob_choices != NULL) { + werd_choice->print("ReplaceAmbig() "); tprintf("Modified blob_choices: "); for (int i = 0; i < blob_choices->size(); ++i) { print_ratings_list("\n", blob_choices->get(i), getUnicharset()); - } } } } - -/*---------------------------------------------------------------------------*/ int Dict::ChoiceSameAs(const WERD_CHOICE &WordChoice, VIABLE_CHOICE ViableChoice) { -/* - ** Parameters: - ** Choice choice to compare to ViableChoice - ** ViableChoice viable choice to compare to Choice - ** Operation: This routine compares the corresponding strings of - ** Choice and ViableChoice and returns TRUE if they are the - ** same, FALSE otherwise. - ** Return: TRUE or FALSE. - ** Exceptions: none - ** History: Fri May 17 08:48:04 1991, DSJ, Created. - */ return (StringSameAs(WordChoice, ViableChoice)); +} -} /* ChoiceSameAs */ -} // namespace tesseract - - -/*---------------------------------------------------------------------------*/ -int CmpChoiceRatings(void *arg1, //VIABLE_CHOICE Choice1, - void *arg2) { //VIABLE_CHOICE Choice2) -/* - ** Parameters: - ** Choice1, Choice2 choices to compare ratings for - ** Operation: Return -1 if the rating for Choice1 is less than the - ** rating for Choice2, otherwise return (1). - ** Return: -1 or 1 - ** Exceptions: none - ** History: Wed May 15 13:02:37 1991, DSJ, Created. - */ - float R1, R2; - VIABLE_CHOICE Choice1 = (VIABLE_CHOICE) arg1; - VIABLE_CHOICE Choice2 = (VIABLE_CHOICE) arg2; - - R1 = Choice1->Rating; - R2 = Choice2->Rating; - - if (R1 < R2) - return (-1); - else - return (1); - -} /* CmpChoiceRatings */ - - -/*---------------------------------------------------------------------------*/ -void ExpandChoice(VIABLE_CHOICE Choice, EXPANDED_CHOICE *ExpandedChoice) { -/* - ** Parameters: - ** Choice choice to be expanded - ** ExpandedChoice place to put resulting expanded choice - ** Operation: This routine expands Choice and places the results - ** in ExpandedChoice. The primary function of expansion - ** is to create an two arrays, one which holds the corresponding - ** certainty for each chunk in Choice, and one which holds - ** the class for each chunk. - ** Return: none (results are placed in ExpandedChoice) - ** Exceptions: none - ** History: Fri May 31 15:21:57 1991, DSJ, Created. - */ - int i, j, Chunk; - - ExpandedChoice->Choice = Choice; - for (i = 0, Chunk = 0; i < Choice->Length; i++) - for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) { - ExpandedChoice->ChunkCertainty[Chunk] = Choice->Blob[i].Certainty; - ExpandedChoice->ChunkClass[Chunk] = Choice->Blob[i].Class; - } -} /* ExpandChoice */ - -/*---------------------------------------------------------------------------*/ -int FreeBadChoice(void *item1, //VIABLE_CHOICE Choice, - void *item2) { //EXPANDED_CHOICE *BestChoice) -/* - ** Parameters: - ** Choice choice to be tested - ** BestChoice best choice found - ** Variables Used: - ** stopper_ambiguity_threshold_gain - ** stopper_ambiguity_threshold_offset - ** Operation: If the certainty of any chunk in Choice is not ambiguous - ** with the corresponding chunk in the best choice, free - ** Choice and return TRUE. Otherwise, return FALSE. - ** Return: TRUE or FALSE. - ** Exceptions: none - ** History: Wed May 15 13:20:26 1991, DSJ, Created. - */ - int i, j, Chunk; - FLOAT32 Threshold; - VIABLE_CHOICE Choice; - EXPANDED_CHOICE *BestChoice; - - Choice = (VIABLE_CHOICE) item1; - BestChoice = (EXPANDED_CHOICE *) item2; - - Threshold = AmbigThreshold (BestChoice->Choice->AdjustFactor, - Choice->AdjustFactor); - - for (i = 0, Chunk = 0; i < Choice->Length; i++) - for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) - if (Choice->Blob[i].Class != BestChoice->ChunkClass[Chunk] && - Choice->Blob[i].Certainty - BestChoice->ChunkCertainty[Chunk] < - Threshold) { - memfree(Choice); - return (TRUE); - } - - return (FALSE); - -} /* FreeBadChoice */ - - -/*---------------------------------------------------------------------------*/ -namespace tesseract { int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) { -/* - ** Parameters: - ** Word word to be tested - ** Operation: Return the length of the shortest alpha run in Word. - ** Return: Return the length of the shortest alpha run in Word. - ** Exceptions: none - ** History: Tue May 14 07:50:45 1991, DSJ, Created. - */ - register int Shortest = MAX_INT32; - register int Length; - int x; - int y; - - for (x = 0; x < WordChoice.length(); ++x) { - if (getUnicharset().get_isalpha(WordChoice.unichar_id(x))) { - for (y = x + 1, Length = 1; - y < WordChoice.length() && - getUnicharset().get_isalpha(WordChoice.unichar_id(y)); - ++y, ++Length); - if (Length < Shortest) { - Shortest = Length; - } - if (y == WordChoice.length()) { - break; - } + int shortest = MAX_INT32; + int curr_len = 0; + for (int w = 0; w < WordChoice.length(); ++w) { + if (getUnicharset().get_isalpha(WordChoice.unichar_id(w))) { + curr_len++; + } else if (curr_len > 0) { + if (curr_len < shortest) shortest = curr_len; + curr_len = 0; } } - if (Shortest == MAX_INT32) - Shortest = 0; - - return (Shortest); - -} /* LengthOfShortestAlphaRun */ - + if (curr_len > 0 && curr_len < shortest) { + shortest = curr_len; + } else if (shortest == MAX_INT32) { + shortest = 0; + } + return shortest; +} -/*---------------------------------------------------------------------------*/ VIABLE_CHOICE Dict::NewViableChoice(const WERD_CHOICE &WordChoice, FLOAT32 AdjustFactor, const float Certainties[]) { -/* - ** Parameters: - ** Choice choice to be converted to a viable choice - ** AdjustFactor factor used to adjust ratings for Choice - ** Certainties certainty for each character in Choice - ** Variables Used: - ** current_segmentation segmentation corresponding to Choice - ** Operation: Allocate a new viable choice data structure, copy - ** Choice, Certainties, and current_segmentation_ into it, - ** and return a pointer to it. - ** Return: Ptr to new viable choice. - ** Exceptions: none - ** History: Thu May 16 15:28:29 1991, DSJ, Created. - */ int Length = WordChoice.length(); assert (Length <= MAX_NUM_CHUNKS && Length > 0); VIABLE_CHOICE NewChoice = (VIABLE_CHOICE) Emalloc ( sizeof (VIABLE_CHOICE_STRUCT) + (Length - 1) * sizeof (CHAR_CHOICE)); FillViableChoice(WordChoice, AdjustFactor, Certainties, false, NewChoice); return (NewChoice); -} /* NewViableChoice */ - +} -/*---------------------------------------------------------------------------*/ void Dict::PrintViableChoice(FILE *File, const char *Label, VIABLE_CHOICE Choice) { -/* - ** Parameters: - ** File open text file to print Choice to - ** Label text label to be printed with Choice - ** Choice choice to be printed - ** Operation: This routine dumps a text representation of the - ** specified Choice to File. - ** Return: none - ** Exceptions: none - ** History: Mon May 20 11:16:44 1991, DSJ, Created. - */ int i, j; - fprintf (File, "%s", Label); - fprintf(File, "(R=%5.1f, C=%4.1f, F=%4.2f, Frag=%d) ", Choice->Rating, Choice->Certainty, Choice->AdjustFactor, Choice->ComposedFromCharFragments); @@ -1223,30 +860,11 @@ void Dict::PrintViableChoice(FILE *File, const char *Label, VIABLE_CHOICE Choice fprintf(File, "%3d ", Choice->Blob[i].NumChunks); } fprintf(File, "\n"); -} /* PrintViableChoice */ - +} -/*---------------------------------------------------------------------------*/ void Dict::FillViableChoice(const WERD_CHOICE &WordChoice, FLOAT32 AdjustFactor, const float Certainties[], bool SameString, VIABLE_CHOICE ViableChoice) { -/* - ** Parameters: - ** WordChoice a choice with info that will be copied - ** AdjustFactor factor used to adjust ratings for AChoice - ** Certainties certainty for each character in AChoice - ** SameString if true the string in the viable choice - ** will not be changed - ** ViableChoice existing viable choice to fill in - ** Variables Used: - ** current_segmentation_ segmentation for NewChoice - ** Operation: - ** Fill ViableChoice with information from AChoice, - ** AdjustFactor, and Certainties. - ** Return: none - ** Exceptions: none - ** History: Fri May 17 13:35:58 1991, DSJ, Created. - */ CHAR_CHOICE *NewChar; BLOB_WIDTH *BlobWidth; int x; @@ -1275,11 +893,8 @@ void Dict::FillViableChoice(const WERD_CHOICE &WordChoice, ViableChoice->ComposedFromCharFragments = true; } } -} /* FillViableChoice */ - +} -// Compares unichar ids in word_choice to those in viable_choice, -// returns true if they are the same, false otherwise. bool Dict::StringSameAs(const WERD_CHOICE &WordChoice, VIABLE_CHOICE ViableChoice) { if (WordChoice.length() != ViableChoice->Length) { @@ -1296,21 +911,9 @@ bool Dict::StringSameAs(const WERD_CHOICE &WordChoice, return true; } -/*---------------------------------------------------------------------------*/ -int Dict::StringSameAs(const char *String, - const char *String_lengths, - VIABLE_CHOICE ViableChoice) { -/* - ** Parameters: - ** String string to compare to ViableChoice - ** String_lengths lengths of unichars in String - ** ViableChoice viable choice to compare to String - ** Operation: This routine compares String to ViableChoice and - ** returns TRUE if they are the same, FALSE otherwise. - ** Return: TRUE or FALSE. - ** Exceptions: none - ** History: Fri May 17 08:48:04 1991, DSJ, Created. - */ +bool Dict::StringSameAs(const char *String, + const char *String_lengths, + VIABLE_CHOICE ViableChoice) { CHAR_CHOICE *Char; int i; int current_unichar_length; @@ -1322,39 +925,13 @@ int Dict::StringSameAs(const char *String, if (current_unichar_length != *String_lengths || strncmp(String, getUnicharset().id_to_unichar(Char->Class), current_unichar_length) != 0) - return (FALSE); + return false; } + return (*String == 0) ? true : false; +} - if (*String == 0) - return (TRUE); - else - return (FALSE); - -} /* StringSameAs */ -} // namespace tesseract - -/*---------------------------------------------------------------------------*/ -int UniformCertainties(const BLOB_CHOICE_LIST_VECTOR &Choices, - const WERD_CHOICE &BestChoice) { -/* - ** Parameters: - ** Choices choices for current segmentation - ** BestChoice best choice for current segmentation - ** Variables Used: - ** stopper_allowable_character_badness - ** max allowed certainty variation - ** Operation: This routine returns TRUE if the certainty of the - ** BestChoice word is within a reasonable range of the average - ** certainties for the best choices for each character in - ** the segmentation. This test is used to catch words in which - ** one character is much worse than the other characters in - ** the word (i.e. FALSE will be returned in that case). - ** The algorithm computes the mean and std deviation of the - ** certainties in the word with the worst certainty thrown out. - ** Return: TRUE or FALSE. - ** Exceptions: none - ** History: Tue May 14 08:23:21 1991, DSJ, Created. - */ +int Dict::UniformCertainties(const BLOB_CHOICE_LIST_VECTOR &Choices, + const WERD_CHOICE &BestChoice) { float Certainty; float WorstCertainty = MAX_FLOAT32; float CertaintyThreshold; @@ -1366,7 +943,7 @@ int UniformCertainties(const BLOB_CHOICE_LIST_VECTOR &Choices, WordLength = Choices.length(); if (WordLength < 3) - return (TRUE); + return true; TotalCertainty = TotalCertaintySquared = 0.0; BLOB_CHOICE_IT BlobChoiceIt; @@ -1379,7 +956,7 @@ int UniformCertainties(const BLOB_CHOICE_LIST_VECTOR &Choices, WorstCertainty = Certainty; } - /* subtract off worst certainty from statistics */ + // Subtract off worst certainty from statistics. WordLength--; TotalCertainty -= WorstCertainty; TotalCertaintySquared -= WorstCertainty * WorstCertainty; @@ -1401,8 +978,10 @@ int UniformCertainties(const BLOB_CHOICE_LIST_VECTOR &Choices, cprintf("Stopper: Non-uniform certainty = %4.1f" " (m=%4.1f, s=%4.1f, t=%4.1f)\n", BestChoice.certainty(), Mean, StdDev, CertaintyThreshold); - return (FALSE); + return false; } else { - return (TRUE); + return true; } -} /* UniformCertainties */ +} + +} // namespace tesseract diff --git a/dict/stopper.h b/dict/stopper.h index 2fc5a3437e..d9993c4be3 100644 --- a/dict/stopper.h +++ b/dict/stopper.h @@ -22,59 +22,43 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ +#include "genericvector.h" +#include "params.h" #include "states.h" #include "unichar.h" -#include "varable.h" typedef uinT8 BLOB_WIDTH; -typedef struct -{ - inT16 index; - unsigned bad_length:8; - unsigned good_length:8; -} DANGERR; +struct DANGERR_INFO { + DANGERR_INFO() : + begin(-1), end(-1), dangerous(false), correct_is_ngram(false) {} + DANGERR_INFO(int b, int e, bool d, bool n) : + begin(b), end(e), dangerous(d), correct_is_ngram(n) {} + int begin; + int end; + bool dangerous; + bool correct_is_ngram; +}; + +typedef GenericVector DANGERR; enum ACCEPTABLE_CHOICE_CALLER { CHOPPER_CALLER, ASSOCIATOR_CALLER }; -typedef struct -{ + +struct CHAR_CHOICE { UNICHAR_ID Class; uinT16 NumChunks; float Certainty; -} - +}; -CHAR_CHOICE; - -typedef struct -{ +struct VIABLE_CHOICE_STRUCT { float Rating; float Certainty; FLOAT32 AdjustFactor; int Length; bool ComposedFromCharFragments; CHAR_CHOICE Blob[1]; -} VIABLE_CHOICE_STRUCT; -typedef VIABLE_CHOICE_STRUCT *VIABLE_CHOICE; - -/*--------------------------------------------------------------------------- - Variables ----------------------------------------------------------------------------*/ -extern double_VAR_H(stopper_certainty_per_char, -0.50, - "Certainty to add for each dict char above small word size."); +}; -extern double_VAR_H(stopper_nondict_certainty_base, -2.50, - "Certainty threshold for non-dict words"); - -extern double_VAR_H(stopper_phase2_certainty_rejection_offset, 1.0, - "Reject certainty offset"); - -extern INT_VAR_H(stopper_debug_level, 0, "Stopper debug level"); - -extern BOOL_VAR_H(stopper_no_acceptable_choices, false, - "Make AcceptableChoice() always return false. Useful" - " when there is a need to explore all segmentations"); - -extern BOOL_VAR_H(save_raw_choices, false, "Save all explored raw choices"); +typedef VIABLE_CHOICE_STRUCT *VIABLE_CHOICE; #endif diff --git a/dict/trie.cpp b/dict/trie.cpp index 1e2ecf1450..763fd45740 100644 --- a/dict/trie.cpp +++ b/dict/trie.cpp @@ -35,14 +35,22 @@ #include "dawg.h" #include "dict.h" #include "freelist.h" +#include "genericvector.h" #include "helpers.h" namespace tesseract { +const char Trie::kAlphaPatternUnicode[] = "\u2000"; +const char Trie::kDigitPatternUnicode[] = "\u2001"; +const char Trie::kAlphanumPatternUnicode[] = "\u2002"; +const char Trie::kPuncPatternUnicode[] = "\u2003"; +const char Trie::kLowerPatternUnicode[] = "\u2004"; +const char Trie::kUpperPatternUnicode[] = "\u2005"; + bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, int direction, bool word_end, UNICHAR_ID unichar_id, EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const { - if (dawg_debug_level == 3) { + if (debug_level_ == 3) { tprintf("edge_char_of() given node_ref " REFFORMAT " next_node " REFFORMAT " direction %d word_end %d unichar_id %d, exploring node:\n", node_ref, next_node, direction, word_end, unichar_id); @@ -90,8 +98,9 @@ bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, return false; // not found } -bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, - bool word_end, UNICHAR_ID unichar_id) { +bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, bool marker_flag, + int direction, bool word_end, + UNICHAR_ID unichar_id) { if (num_edges_ == max_num_edges_) return false; EDGE_VECTOR *vec = (direction == FORWARD_EDGE) ? &(nodes_[node1]->forward_edges) : &(nodes_[node1]->backward_edges); @@ -107,13 +116,13 @@ bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, search_index = vec->size(); // add is unsorted, so index does not matter } EDGE_RECORD edge_rec; - link_edge(&edge_rec, node2, direction, word_end, unichar_id); + link_edge(&edge_rec, node2, marker_flag, direction, word_end, unichar_id); if (search_index < vec->size()) { vec->insert(edge_rec, search_index); } else { vec->push_back(edge_rec); } - if (dawg_debug_level > 1) { + if (debug_level_ > 1) { tprintf("new edge in nodes_[" REFFORMAT "]: ", node1); print_edge_rec(edge_rec); tprintf("\n"); @@ -124,22 +133,30 @@ bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, void Trie::add_word_ending(EDGE_RECORD *edge_ptr, NODE_REF the_next_node, + bool marker_flag, UNICHAR_ID unichar_id) { EDGE_RECORD *back_edge_ptr; EDGE_INDEX back_edge_index; ASSERT_HOST(edge_char_of(the_next_node, NO_EDGE, BACKWARD_EDGE, false, unichar_id, &back_edge_ptr, &back_edge_index)); + if (marker_flag) { + *back_edge_ptr |= (MARKER_FLAG << flag_start_bit_); + *edge_ptr |= (MARKER_FLAG << flag_start_bit_); + } // Mark both directions as end of word. *back_edge_ptr |= (WERD_END_FLAG << flag_start_bit_); *edge_ptr |= (WERD_END_FLAG << flag_start_bit_); } -void Trie::add_word_to_dawg(const WERD_CHOICE &word) { +void Trie::add_word_to_dawg(const WERD_CHOICE &word, + const GenericVector *repetitions) { if (word.length() <= 0) return; // can't add empty words + if (repetitions != NULL) ASSERT_HOST(repetitions->size() == word.length()); EDGE_RECORD *edge_ptr; NODE_REF last_node = 0; NODE_REF the_next_node; + bool marker_flag = false; EDGE_INDEX edge_index; int i; inT32 still_finding_chars = true; @@ -147,16 +164,17 @@ void Trie::add_word_to_dawg(const WERD_CHOICE &word) { bool add_failed = false; bool found; - if (dawg_debug_level > 1) word.print("\nAdding word: "); + if (debug_level_ > 1) word.print("\nAdding word: "); UNICHAR_ID unichar_id; for (i = 0; i < word.length() - 1; ++i) { unichar_id = word.unichar_id(i); - if (dawg_debug_level > 1) tprintf("Adding letter %d\n", unichar_id); + marker_flag = (repetitions != NULL) ? (*repetitions)[i] : false; + if (debug_level_ > 1) tprintf("Adding letter %d\n", unichar_id); if (still_finding_chars) { found = edge_char_of(last_node, NO_EDGE, FORWARD_EDGE, word_end, unichar_id, &edge_ptr, &edge_index); - if (found && dawg_debug_level > 1) { + if (found && debug_level_ > 1) { tprintf("exploring edge " REFFORMAT " in node " REFFORMAT "\n", edge_index, last_node); } @@ -167,18 +185,20 @@ void Trie::add_word_to_dawg(const WERD_CHOICE &word) { still_finding_chars = false; remove_edge(last_node, 0, word_end, unichar_id); } else { + if (marker_flag) set_marker_flag_in_edge_rec(edge_ptr); last_node = next_node_from_edge_rec(*edge_ptr); } } if (!still_finding_chars) { the_next_node = new_dawg_node(); - if (dawg_debug_level > 1) + if (debug_level_ > 1) tprintf("adding node " REFFORMAT "\n", the_next_node); if (the_next_node == 0) { add_failed = true; break; } - if (!add_new_edge(last_node, the_next_node, word_end, unichar_id)) { + if (!add_new_edge(last_node, the_next_node, + marker_flag, word_end, unichar_id)) { add_failed = true; break; } @@ -188,16 +208,18 @@ void Trie::add_word_to_dawg(const WERD_CHOICE &word) { } the_next_node = 0; unichar_id = word.unichar_id(i); - if (dawg_debug_level > 1) tprintf("Adding letter %d\n", unichar_id); + marker_flag = (repetitions != NULL) ? (*repetitions)[i] : false; + if (debug_level_ > 1) tprintf("Adding letter %d\n", unichar_id); if (still_finding_chars && edge_char_of(last_node, NO_EDGE, FORWARD_EDGE, false, unichar_id, &edge_ptr, &edge_index)) { // An extension of this word already exists in the trie, so we // only have to add the ending flags in both directions. - add_word_ending(edge_ptr, next_node_from_edge_rec(*edge_ptr), unichar_id); + add_word_ending(edge_ptr, next_node_from_edge_rec(*edge_ptr), + marker_flag, unichar_id); } else { if (!add_failed && - !add_new_edge(last_node, the_next_node, true, unichar_id)) + !add_new_edge(last_node, the_next_node, marker_flag, true, unichar_id)) add_failed = true; } if (add_failed) { @@ -218,7 +240,7 @@ NODE_REF Trie::new_dawg_node() { bool Trie::read_word_list(const char *filename, const UNICHARSET &unicharset) { FILE *word_file; - char string [CHARS_PER_LINE]; + char string[CHARS_PER_LINE]; int word_count = 0; word_file = open_file (filename, "r"); @@ -227,7 +249,7 @@ bool Trie::read_word_list(const char *filename, chomp_string(string); // remove newline WERD_CHOICE word(string, unicharset); ++word_count; - if (dawg_debug_level && word_count % 10000 == 0) + if (debug_level_ && word_count % 10000 == 0) tprintf("Read %d words so far\n", word_count); if (word.length() != 0 && !word.contains_unichar_id(INVALID_UNICHAR_ID)) { if (!this->word_in_dawg(word)) { @@ -237,24 +259,162 @@ bool Trie::read_word_list(const char *filename, return false; } } - } else if (dawg_debug_level) { + } else if (debug_level_) { tprintf("Skipping invalid word %s\n", string); - if (dawg_debug_level >= 3) word.print(); + if (debug_level_ >= 3) word.print(); } } - if (dawg_debug_level) + if (debug_level_) tprintf("Read %d words total.\n", word_count); fclose(word_file); return true; } +void Trie::initialize_patterns(UNICHARSET *unicharset) { + unicharset->unichar_insert(kAlphaPatternUnicode); + alpha_pattern_ = unicharset->unichar_to_id(kAlphaPatternUnicode); + unicharset->unichar_insert(kDigitPatternUnicode); + digit_pattern_ = unicharset->unichar_to_id(kDigitPatternUnicode); + unicharset->unichar_insert(kAlphanumPatternUnicode); + alphanum_pattern_ = unicharset->unichar_to_id(kAlphanumPatternUnicode); + unicharset->unichar_insert(kPuncPatternUnicode); + punc_pattern_ = unicharset->unichar_to_id(kPuncPatternUnicode); + unicharset->unichar_insert(kLowerPatternUnicode); + lower_pattern_ = unicharset->unichar_to_id(kLowerPatternUnicode); + unicharset->unichar_insert(kUpperPatternUnicode); + upper_pattern_ = unicharset->unichar_to_id(kUpperPatternUnicode); + initialized_patterns_ = true; +} + +void Trie::unichar_id_to_patterns(UNICHAR_ID unichar_id, + const UNICHARSET &unicharset, + GenericVector *vec) const { + bool is_alpha = unicharset.get_isalpha(unichar_id); + if (is_alpha) { + vec->push_back(alpha_pattern_); + vec->push_back(alphanum_pattern_); + if (unicharset.get_islower(unichar_id)) { + vec->push_back(lower_pattern_); + } else if (unicharset.get_isupper(unichar_id)) { + vec->push_back(upper_pattern_); + } + } + if (unicharset.get_isdigit(unichar_id)) { + vec->push_back(digit_pattern_); + if (!is_alpha) vec->push_back(alphanum_pattern_); + } + if (unicharset.get_ispunctuation(unichar_id)) { + vec->push_back(punc_pattern_); + } +} + +UNICHAR_ID Trie::character_class_to_pattern(char ch) { + if (ch == 'c') { + return alpha_pattern_; + } else if (ch == 'd') { + return digit_pattern_; + } else if (ch == 'n') { + return alphanum_pattern_; + } else if (ch == 'p') { + return punc_pattern_; + } else if (ch == 'a') { + return lower_pattern_; + } else if (ch == 'A') { + return upper_pattern_; + } else { + return INVALID_UNICHAR_ID; + } +} + +bool Trie::read_pattern_list(const char *filename, + const UNICHARSET &unicharset) { + if (!initialized_patterns_) { + tprintf("please call initialize_patterns() before read_pattern_list()\n"); + return false; + } + + FILE *pattern_file = open_file (filename, "r"); + if (pattern_file == NULL) { + tprintf("Error opening pattern file %s\n", filename); + return false; + } + + int pattern_count = 0; + char string[CHARS_PER_LINE]; + while (fgets(string, CHARS_PER_LINE, pattern_file) != NULL) { + chomp_string(string); // remove newline + // Parse the pattern and construct a unichar id vector. + // Record the number of repetitions of each unichar in the parallel vector. + WERD_CHOICE word; + GenericVector repetitions_vec; + const char *str_ptr = string; + int step = unicharset.step(str_ptr); + bool failed = false; + while (step > 0) { + UNICHAR_ID curr_unichar_id = INVALID_UNICHAR_ID; + if (step == 1 && *str_ptr == '\\') { + ++str_ptr; + if (*str_ptr == '\\') { // regular '\' unichar that was escaped + curr_unichar_id = unicharset.unichar_to_id(str_ptr, step); + } else { + if (word.length() < kSaneNumConcreteChars) { + tprintf("Please provide at least %d concrete characters at the" + " beginning of the pattern\n", kSaneNumConcreteChars); + failed = true; + break; + } + // Parse character class from expression. + curr_unichar_id = character_class_to_pattern(*str_ptr); + } + } else { + curr_unichar_id = unicharset.unichar_to_id(str_ptr, step); + } + if (curr_unichar_id == INVALID_UNICHAR_ID) { + failed = true; + break; // failed to parse this pattern + } + word.append_unichar_id(curr_unichar_id, 1, 0.0, 0.0); + repetitions_vec.push_back(false); + str_ptr += step; + step = unicharset.step(str_ptr); + // Check if there is a repetition pattern specified after this unichar. + if (step == 1 && *str_ptr == '\\' && *(str_ptr+1) == '*') { + repetitions_vec[repetitions_vec.size()-1] = true; + str_ptr += 2; + step = unicharset.step(str_ptr); + } + } + if (failed) { + tprintf("Invalid user pattern %s\n", string); + continue; + } + // Insert the pattern into the trie. + if (debug_level_ > 2) { + tprintf("Inserting expanded user pattern %s\n", + word.debug_string(unicharset).string()); + } + if (!this->word_in_dawg(word)) { + this->add_word_to_dawg(word, &repetitions_vec); + if (!this->word_in_dawg(word)) { + tprintf("Error: failed to insert pattern '%s'\n", string); + } + } + ++pattern_count; + } + if (debug_level_) { + tprintf("Read %d valid patterns from %s\n", pattern_count, filename); + } + fclose(pattern_file); + return true; +} + void Trie::remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, bool word_end, UNICHAR_ID unichar_id) { - EDGE_RECORD *edge_ptr; - EDGE_INDEX edge_index; + EDGE_RECORD *edge_ptr = NULL; + EDGE_INDEX edge_index = 0; ASSERT_HOST(edge_char_of(node1, node2, direction, word_end, unichar_id, &edge_ptr, &edge_index)); - if (dawg_debug_level > 1) { + if (debug_level_ > 1) { tprintf("removed edge in nodes_[" REFFORMAT "]: ", node1); print_edge_rec(*edge_ptr); tprintf("\n"); @@ -268,7 +428,7 @@ void Trie::remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, } SquishedDawg *Trie::trie_to_dawg() { - if (dawg_debug_level > 2) { + if (debug_level_ > 2) { print_all("Before reduction:", MAX_NODE_EDGES_DISPLAY); } NODE_MARKER reduced_nodes = new bool[nodes_.size()]; @@ -276,7 +436,7 @@ SquishedDawg *Trie::trie_to_dawg() { this->reduce_node_input(0, reduced_nodes); delete[] reduced_nodes; - if (dawg_debug_level > 2) { + if (debug_level_ > 2) { print_all("After reduction:", MAX_NODE_EDGES_DISPLAY); } // Build a translation map from node indices in nodes_ vector to @@ -302,22 +462,22 @@ SquishedDawg *Trie::trie_to_dawg() { NODE_REF node_ref = next_node_from_edge_rec(edge_rec); ASSERT_HOST(node_ref < nodes_.size()); UNICHAR_ID unichar_id = unichar_id_from_edge_rec(edge_rec); - link_edge(edge_array_ptr, node_ref_map[node_ref], FORWARD_EDGE, + link_edge(edge_array_ptr, node_ref_map[node_ref], false, FORWARD_EDGE, end_of_word_from_edge_rec(edge_rec), unichar_id); - if (j == end - 1) set_last_flag_in_edge_rec(edge_array_ptr); + if (j == end - 1) set_marker_flag_in_edge_rec(edge_array_ptr); ++edge_array_ptr; } } delete[] node_ref_map; - return new SquishedDawg(edge_array, num_forward_edges, - type_, lang_, perm_, unicharset_size_); + return new SquishedDawg(edge_array, num_forward_edges, type_, lang_, + perm_, unicharset_size_, debug_level_); } bool Trie::eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD &edge1, const EDGE_RECORD &edge2) { - if (dawg_debug_level > 1) { + if (debug_level_ > 1) { tprintf("\nCollapsing node %d:\n", node); print_node(node, MAX_NODE_EDGES_DISPLAY); tprintf("Candidate edges: "); @@ -330,7 +490,7 @@ bool Trie::eliminate_redundant_edges(NODE_REF node, NODE_REF next_node2 = next_node_from_edge_rec(edge2); TRIE_NODE_RECORD *next_node2_ptr = nodes_[next_node2]; // Translate all edges going to/from next_node2 to go to/from next_node1. - EDGE_RECORD *edge_ptr; + EDGE_RECORD *edge_ptr = NULL; EDGE_INDEX edge_index; int i; // Remove the backward link in node to next_node2. @@ -344,7 +504,8 @@ bool Trie::eliminate_redundant_edges(NODE_REF node, NODE_REF curr_next_node = next_node_from_edge_rec(bkw_edge); UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(bkw_edge); int curr_word_end = end_of_word_from_edge_rec(bkw_edge); - add_edge_linkage(next_node1, curr_next_node, BACKWARD_EDGE, + bool marker_flag = marker_flag_from_edge_rec(bkw_edge); + add_edge_linkage(next_node1, curr_next_node, marker_flag, BACKWARD_EDGE, curr_word_end, curr_unichar_id); // Relocate the corresponding forward edge in curr_next_node ASSERT_HOST(edge_char_of(curr_next_node, next_node2, FORWARD_EDGE, @@ -354,7 +515,7 @@ bool Trie::eliminate_redundant_edges(NODE_REF node, } int next_node2_num_edges = (next_node2_ptr->forward_edges.size() + next_node2_ptr->backward_edges.size()); - if (dawg_debug_level > 1) { + if (debug_level_ > 1) { tprintf("removed %d edges from node " REFFORMAT "\n", next_node2_num_edges, next_node2); } @@ -369,7 +530,7 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, NODE_REF node, const EDGE_VECTOR &backward_edges, NODE_MARKER reduced_nodes) { - if (dawg_debug_level > 1) + if (debug_level_ > 1) tprintf("reduce_lettered_edges(edge=" REFFORMAT ")\n", edge_index); // Compare each of the edge pairs with the given unichar_id. bool did_something = false; @@ -418,7 +579,7 @@ void Trie::sort_edges(EDGE_VECTOR *edges) { void Trie::reduce_node_input(NODE_REF node, NODE_MARKER reduced_nodes) { - if (dawg_debug_level > 1) { + if (debug_level_ > 1) { tprintf("reduce_node_input(node=" REFFORMAT ")\n", node); print_node(node, MAX_NODE_EDGES_DISPLAY); } @@ -436,7 +597,7 @@ void Trie::reduce_node_input(NODE_REF node, } reduced_nodes[node] = true; // mark as reduced - if (dawg_debug_level > 1) { + if (debug_level_ > 1) { tprintf("Node " REFFORMAT " after reduction:\n", node); print_node(node, MAX_NODE_EDGES_DISPLAY); } diff --git a/dict/trie.h b/dict/trie.h index 886ebe5327..e4c85d52dc 100644 --- a/dict/trie.h +++ b/dict/trie.h @@ -27,6 +27,7 @@ #include "dawg.h" #include "cutil.h" +#include "genericvector.h" class UNICHARSET; @@ -35,6 +36,11 @@ class UNICHARSET; // and address indices. This does not seem to be needed immediately, // since currently the largest number of edges limit used by tesseract // (kMaxNumEdges in wordlist2dawg.cpp) is far less than max int32. +// There are also int casts below to satisfy the WIN32 compiler that would +// need to be changed. +// It might be cleanest to change the types of most of the Trie/Dawg related +// typedefs to int and restrict the casts to extracting these values from +// the 64 bit EDGE_RECORD. typedef inT64 EDGE_INDEX; // index of an edge in a given node typedef bool *NODE_MARKER; typedef GenericVector EDGE_VECTOR; @@ -55,17 +61,30 @@ namespace tesseract { */ class Trie : public Dawg { public: + // Minimum number of concrete characters at the beginning of user patterns. + static const int kSaneNumConcreteChars = 4; + // Various unicode whitespace characters are used to denote unichar patterns, + // (character classifier would never produce these whitespace characters as a + // valid classification). + static const char kAlphaPatternUnicode[]; + static const char kDigitPatternUnicode[]; + static const char kAlphanumPatternUnicode[]; + static const char kPuncPatternUnicode[]; + static const char kLowerPatternUnicode[]; + static const char kUpperPatternUnicode[]; + // max_num_edges argument allows limiting the amount of memory this // Trie can consume (if a new word insert would cause the Trie to // contain more edges than max_num_edges, all the edges are cleared // so that new inserts can proceed). Trie(DawgType type, const STRING &lang, PermuterType perm, - uinT64 max_num_edges, int unicharset_size) { - init(type, lang, perm, unicharset_size); + uinT64 max_num_edges, int unicharset_size, int debug_level) { + init(type, lang, perm, unicharset_size, debug_level); num_edges_ = 0; max_num_edges_ = max_num_edges; deref_node_index_mask_ = ~letter_mask_; new_dawg_node(); // need to allocate node 0 + initialized_patterns_ = false; } ~Trie() { nodes_.delete_data_pointers(); } @@ -84,7 +103,8 @@ class Trie : public Dawg { * corresponding EDGE_REFs) for which there is an edge out of this node. */ void unichar_ids_of(NODE_REF node, NodeChildVector *vec) const { - const EDGE_VECTOR &forward_edges = nodes_[(int)node]->forward_edges; + const EDGE_VECTOR &forward_edges = + nodes_[static_cast(node)]->forward_edges; for (int i = 0; i < forward_edges.size(); ++i) { vec->push_back(NodeChild(unichar_id_from_edge_rec(forward_edges[i]), make_edge_ref(node, i))); @@ -129,8 +149,84 @@ class Trie : public Dawg { bool read_word_list(const char *filename, const UNICHARSET &unicharset); + // Inserts the list of patterns from the given file into the Trie. + // The pattern list file should contain one pattern per line in UTF-8 format. + // + // Each pattern can contain any non-whitespace characters, however only the + // patterns that contain characters from the unicharset of the corresponding + // language will be useful. + // The only meta character is '\'. To be used in a pattern as an ordinary + // string it should be escaped with '\' (e.g. string "C:\Documents" should + // be written in the patterns file as "C:\\Documents"). + // This function supports a very limited regular expression syntax. One can + // express a character, a certain character class and a number of times the + // entity should be repeated in the pattern. + // + // To denote a character class use one of: + // \c - unichar for which UNICHARSET::get_isalpha() is true (character) + // \d - unichar for which UNICHARSET::get_isdigit() is true + // \n - unichar for which UNICHARSET::get_isdigit() and + // UNICHARSET::isalpha() are true + // \p - unichar for which UNICHARSET::get_ispunct() is true + // \a - unichar for which UNICHARSET::get_islower() is true + // \A - unichar for which UNICHARSET::get_isupper() is true + // + // \* could be specified after each character or pattern to indicate that + // the character/pattern can be repeated any number of times before the next + // character/pattern occurs. + // + // Examples: + // 1-8\d\d-GOOG-411 will be expanded to strings: + // 1-800-GOOG-411, 1-801-GOOG-411, ... 1-899-GOOG-411. + // + // http://www.\n\*.com will be expanded to strings like: + // http://www.a.com http://www.a123.com ... http://www.ABCDefgHIJKLMNop.com + // + // Note: In choosing which patterns to include please be aware of the fact + // providing very generic patterns will make tesseract run slower. + // For example \n\* at the beginning of the pattern will make Tesseract + // consider all the combinations of proposed character choices for each + // of the segmentations, which will be unacceptably slow. + // Because of potential problems with speed that could be difficult to + // identify, each user pattern has to have at least kSaneNumConcreteChars + // concrete characters from the unicharset at the beginning. + bool read_pattern_list(const char *filename, const UNICHARSET &unicharset); + + // Initializes the values of *_pattern_ unichar ids. + // This function should be called before calling read_pattern_list(). + void initialize_patterns(UNICHARSET *unicharset); + + // Fills in the given unichar id vector with the unichar ids that represent + // the patterns of the character classes of the given unichar_id. + void unichar_id_to_patterns(UNICHAR_ID unichar_id, + const UNICHARSET &unicharset, + GenericVector *vec) const; + + // Returns the given EDGE_REF if the EDGE_RECORD that it points to has + // a self loop and the given unichar_id matches the unichar_id stored in the + // EDGE_RECORD, returns NO_EDGE otherwise. + virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, + UNICHAR_ID unichar_id, + bool word_end) const { + if (edge_ref == NO_EDGE) return NO_EDGE; + EDGE_RECORD *edge_rec = deref_edge_ref(edge_ref); + return (marker_flag_from_edge_rec(*edge_rec) && + unichar_id == unichar_id_from_edge_rec(*edge_rec) && + word_end == end_of_word_from_edge_rec(*edge_rec)) ? + edge_ref : NO_EDGE; + } + // Adds a word to the Trie (creates the necessary nodes and edges). - void add_word_to_dawg(const WERD_CHOICE &word); + // + // If repetitions vector is not NULL, each entry in the vector indicates + // whether the unichar id with the corresponding index in the word is allowed + // to repeat an unlimited number of times. For each entry that is true, MARKER + // flag of the corresponding edge created for this unichar id is set to true). + void add_word_to_dawg(const WERD_CHOICE &word, + const GenericVector *repetitions); + void add_word_to_dawg(const WERD_CHOICE &word) { + add_word_to_dawg(word, NULL); + } protected: // The structure of an EDGE_REF for Trie edges is as follows: @@ -141,7 +237,7 @@ class Trie : public Dawg { // With this arrangement there are enough bits to represent edge indices // (each node can have at most unicharset_size_ forward edges and // the position of flag_start_bit is set to be log2(unicharset_size_)). - // It is also possible to accomodate a maximum number of nodes that is at + // It is also possible to accommodate a maximum number of nodes that is at // least as large as that of the SquishedDawg representation (in SquishedDawg // each EDGE_RECORD has 32-(flag_start_bit+NUM_FLAG_BITS) bits to represent // the next node index). @@ -151,11 +247,12 @@ class Trie : public Dawg { // of the edge from the information in the given EDGE_REF. // This function assumes that EDGE_REF holds valid node/edge indices. inline EDGE_RECORD *deref_edge_ref(EDGE_REF edge_ref) const { - uinT64 edge_index = (edge_ref & letter_mask_) >> LETTER_START_BIT; - uinT64 node_index = - (edge_ref & deref_node_index_mask_) >> flag_start_bit_; - TRIE_NODE_RECORD *node_rec = nodes_[(int)node_index]; - return &(node_rec->forward_edges[(int)edge_index]); + int edge_index = static_cast( + (edge_ref & letter_mask_) >> LETTER_START_BIT); + int node_index = static_cast( + (edge_ref & deref_node_index_mask_) >> flag_start_bit_); + TRIE_NODE_RECORD *node_rec = nodes_[node_index]; + return &(node_rec->forward_edges[edge_index]); } /** Constructs EDGE_REF from the given node_index and edge_index. */ inline EDGE_REF make_edge_ref(NODE_REF node_index, @@ -164,9 +261,10 @@ class Trie : public Dawg { (edge_index << LETTER_START_BIT)); } /** Sets up this edge record to the requested values. */ - inline void link_edge(EDGE_RECORD *edge, NODE_REF nxt, int direction, - bool word_end, UNICHAR_ID unichar_id) { + inline void link_edge(EDGE_RECORD *edge, NODE_REF nxt, bool repeats, + int direction, bool word_end, UNICHAR_ID unichar_id) { EDGE_RECORD flags = 0; + if (repeats) flags |= MARKER_FLAG; if (word_end) flags |= WERD_END_FLAG; if (direction == BACKWARD_EDGE) flags |= DIRECTION_FLAG; *edge = ((nxt << next_node_start_bit_) | @@ -175,7 +273,8 @@ class Trie : public Dawg { } /** Prints the given EDGE_RECORD. */ inline void print_edge_rec(const EDGE_RECORD &edge_rec) const { - tprintf("|" REFFORMAT "|%s%s|%d|", next_node_from_edge_rec(edge_rec), + tprintf("|" REFFORMAT "|%s%s%s|%d|", next_node_from_edge_rec(edge_rec), + marker_flag_from_edge_rec(edge_rec) ? "R," : "", (direction_from_edge_rec(edge_rec) == FORWARD_EDGE) ? "F" : "B", end_of_word_from_edge_rec(edge_rec) ? ",E" : "", unichar_id_from_edge_rec(edge_rec)); @@ -185,7 +284,7 @@ class Trie : public Dawg { inline bool can_be_eliminated(const EDGE_RECORD &edge_rec) { NODE_REF node_ref = next_node_from_edge_rec(edge_rec); return (node_ref != NO_EDGE && - nodes_[(int)node_ref]->forward_edges.size() == 1); + nodes_[static_cast(node_ref)]->forward_edges.size() == 1); } // Prints the contents of the Trie. @@ -206,16 +305,17 @@ class Trie : public Dawg { // Adds an single edge linkage between node1 and node2 in the direction // indicated by direction argument. - bool add_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, - bool word_end, UNICHAR_ID unichar_id); + bool add_edge_linkage(NODE_REF node1, NODE_REF node2, bool repeats, + int direction, bool word_end, + UNICHAR_ID unichar_id); // Adds forward edge linkage from node1 to node2 and the corresponding // backward edge linkage in the other direction. bool add_new_edge(NODE_REF node1, NODE_REF node2, - bool word_end, UNICHAR_ID unichar_id) { - return (add_edge_linkage(node1, node2, FORWARD_EDGE, + bool repeats, bool word_end, UNICHAR_ID unichar_id) { + return (add_edge_linkage(node1, node2, repeats, FORWARD_EDGE, word_end, unichar_id) && - add_edge_linkage(node2, node1, BACKWARD_EDGE, + add_edge_linkage(node2, node1, repeats, BACKWARD_EDGE, word_end, unichar_id)); } @@ -223,6 +323,7 @@ class Trie : public Dawg { // Returns true on success. void add_word_ending(EDGE_RECORD *edge, NODE_REF the_next_node, + bool repeats, UNICHAR_ID unichar_id); // Allocates space for a new node in the Trie. @@ -269,6 +370,8 @@ class Trie : public Dawg { /** Eliminates any redundant edges from this node in the Trie. */ void reduce_node_input(NODE_REF node, NODE_MARKER reduced_nodes); + // Returns the pattern unichar id for the given character class code. + UNICHAR_ID character_class_to_pattern(char ch); // Member variables TRIE_NODES nodes_; ///< vector of nodes in the Trie @@ -276,6 +379,15 @@ class Trie : public Dawg { uinT64 max_num_edges_; ///< maximum number of edges allowed uinT64 deref_direction_mask_; ///< mask for EDGE_REF to extract direction uinT64 deref_node_index_mask_; ///< mask for EDGE_REF to extract node index + // Variables for translating character class codes denoted in user patterns + // file to the unichar ids used to represent them in a Trie. + bool initialized_patterns_; + UNICHAR_ID alpha_pattern_; + UNICHAR_ID digit_pattern_; + UNICHAR_ID alphanum_pattern_; + UNICHAR_ID punc_pattern_; + UNICHAR_ID lower_pattern_; + UNICHAR_ID upper_pattern_; }; } // namespace tesseract diff --git a/image/imgs.h b/image/imgs.h index ca8426c00d..92816d2677 100644 --- a/image/imgs.h +++ b/image/imgs.h @@ -21,7 +21,7 @@ #define IMGS_H #include "img.h" -#include "varable.h" +#include "params.h" extern INT_VAR_H (image_default_resolution, 300, "Image resolution dpi"); diff --git a/neural_networks/runtime/Makefile.am b/neural_networks/runtime/Makefile.am new file mode 100644 index 0000000000..bf5c6f89ae --- /dev/null +++ b/neural_networks/runtime/Makefile.am @@ -0,0 +1,15 @@ +SUBDIRS = +AM_CPPFLAGS = \ + -DUSE_STD_NAMESPACE \ + -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \ + -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \ + -I$(top_srcdir)/image -I$(top_srcdir)/viewer + +include_HEADERS = \ + input_file_buffer.h neural_net.h neuron.h + +lib_LTLIBRARIES = libtesseract_neural.la +libtesseract_neural_la_SOURCES = \ + input_file_buffer.cpp neural_net.cpp neuron.cpp sigmoid_table.cpp + +libtesseract_neural_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) diff --git a/neural_networks/runtime/Makefile.in b/neural_networks/runtime/Makefile.in new file mode 100644 index 0000000000..c4d8f1732c --- /dev/null +++ b/neural_networks/runtime/Makefile.in @@ -0,0 +1,742 @@ +# Makefile.in generated by automake 1.11.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = neural_networks/runtime +DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(SHELL) $(top_srcdir)/config/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config_auto.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libtesseract_neural_la_LIBADD = +am_libtesseract_neural_la_OBJECTS = input_file_buffer.lo neural_net.lo \ + neuron.lo sigmoid_table.lo +libtesseract_neural_la_OBJECTS = $(am_libtesseract_neural_la_OBJECTS) +libtesseract_neural_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(libtesseract_neural_la_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/config/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(libtesseract_neural_la_SOURCES) +DIST_SOURCES = $(libtesseract_neural_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +HEADERS = $(include_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ + distdir +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GENERIC_API_VERSION = @GENERIC_API_VERSION@ +GENERIC_LIBRARY_NAME = @GENERIC_LIBRARY_NAME@ +GENERIC_LIBRARY_VERSION = @GENERIC_LIBRARY_VERSION@ +GENERIC_MAJOR_VERSION = @GENERIC_MAJOR_VERSION@ +GENERIC_RELEASE = @GENERIC_RELEASE@ +GENERIC_VERSION = @GENERIC_VERSION@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTIFF_CFLAGS = @LIBTIFF_CFLAGS@ +LIBTIFF_LIBS = @LIBTIFF_LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_DATE = @PACKAGE_DATE@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PACKAGE_YEAR = @PACKAGE_YEAR@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = +AM_CPPFLAGS = \ + -DUSE_STD_NAMESPACE \ + -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \ + -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \ + -I$(top_srcdir)/image -I$(top_srcdir)/viewer + +include_HEADERS = \ + input_file_buffer.h neural_net.h neuron.h + +lib_LTLIBRARIES = libtesseract_neural.la +libtesseract_neural_la_SOURCES = \ + input_file_buffer.cpp neural_net.cpp neuron.cpp sigmoid_table.cpp + +libtesseract_neural_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .cpp .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu neural_networks/runtime/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu neural_networks/runtime/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libtesseract_neural.la: $(libtesseract_neural_la_OBJECTS) $(libtesseract_neural_la_DEPENDENCIES) + $(libtesseract_neural_la_LINK) -rpath $(libdir) $(libtesseract_neural_la_OBJECTS) $(libtesseract_neural_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/input_file_buffer.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/neural_net.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/neuron.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sigmoid_table.Plo@am__quote@ + +.cpp.o: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cpp.lo: +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + test -n "$$files" || exit 0; \ + echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(includedir)" && rm -f $$files + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-includeHEADERS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ + install-am install-strip tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool ctags ctags-recursive \ + distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-recursive uninstall uninstall-am \ + uninstall-includeHEADERS uninstall-libLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/neural_networks/runtime/input_file_buffer.cpp b/neural_networks/runtime/input_file_buffer.cpp new file mode 100644 index 0000000000..c3ca67b604 --- /dev/null +++ b/neural_networks/runtime/input_file_buffer.cpp @@ -0,0 +1,36 @@ +// Copyright 2008 Google Inc. +// All Rights Reserved. +// Author: ahmadab@google.com (Ahmad Abdulkader) +// +// input_file_buffer.h: Declarations of a class for an object that +// represents an input file buffer. + +#include +#include "input_file_buffer.h" + +namespace tesseract { +// default and only contsructor +InputFileBuffer::InputFileBuffer(const string &file_name) + : file_name_(file_name) { + fp_ = NULL; +} + +// virtual destructor +InputFileBuffer::~InputFileBuffer() { + if (fp_ != NULL) { + fclose(fp_); + } +} + +// Read the specified number of bytes to the specified input buffer +int InputFileBuffer::Read(void *buffer, int bytes_to_read) { + // open the file if necessary + if (fp_ == NULL) { + fp_ = fopen(file_name_.c_str(), "rb"); + if (fp_ == NULL) { + return 0; + } + } + return fread(buffer, 1, bytes_to_read, fp_); +} +} diff --git a/neural_networks/runtime/input_file_buffer.h b/neural_networks/runtime/input_file_buffer.h new file mode 100644 index 0000000000..5aa7465c41 --- /dev/null +++ b/neural_networks/runtime/input_file_buffer.h @@ -0,0 +1,31 @@ +// Copyright 2008 Google Inc. +// All Rights Reserved. +// Author: ahmadab@google.com (Ahmad Abdulkader) +// +// input_file_buffer.h: Declarations of a class for an object that +// represents an input file buffer. +// + +#ifndef INPUT_FILE_BUFFER_H +#define INPUT_FILE_BUFFER_H + +#include +#include +#ifdef USE_STD_NAMESPACE +using std::string; +#endif + +namespace tesseract { +class InputFileBuffer { + public: + explicit InputFileBuffer(const string &file_name); + virtual ~InputFileBuffer(); + int Read(void *buffer, int bytes_to_read); + + protected: + string file_name_; + FILE *fp_; +}; +} + +#endif // INPUT_FILE_BUFFER_H__ diff --git a/neural_networks/runtime/neural_net.cpp b/neural_networks/runtime/neural_net.cpp new file mode 100644 index 0000000000..67bc2decf1 --- /dev/null +++ b/neural_networks/runtime/neural_net.cpp @@ -0,0 +1,305 @@ +// Copyright 2008 Google Inc. +// All Rights Reserved. +// Author: ahmadab@google.com (Ahmad Abdulkader) +// +// neural_net.cpp: Declarations of a class for an object that +// represents an arbitrary network of neurons +// +#include +#include +#include "neural_net.h" +#include "input_file_buffer.h" + +namespace tesseract { + +NeuralNet::NeuralNet() { + Init(); +} + +NeuralNet::~NeuralNet() { + // clean up the wts chunks vector + for(int vec = 0; vec < wts_vec_.size(); vec++) { + delete wts_vec_[vec]; + } + // clean up neurons + delete []neurons_; + // clean up nodes + for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) { + delete []fast_nodes_[node_idx].inputs; + } + +} + +// Initiaization function +void NeuralNet::Init() { + read_only_ = true; + auto_encoder_ = false; + alloc_wgt_cnt_ = 0; + wts_cnt_ = 0; + neuron_cnt_ = 0; + in_cnt_ = 0; + out_cnt_ = 0; + wts_vec_.clear(); + neurons_ = NULL; + inputs_mean_.clear(); + inputs_std_dev_.clear(); + inputs_min_.clear(); + inputs_max_.clear(); +} + +// Does a fast feedforward for read_only nets +// Templatized for float and double Types +template bool NeuralNet::FastFeedForward(const Type *inputs, + Type *outputs) { + int node_idx = 0; + Node *node = &fast_nodes_[0]; + // feed inputs in and offset them by the pre-computed bias + for (node_idx = 0; node_idx < in_cnt_; node_idx++, node++) { + node->out = inputs[node_idx] - node->bias; + } + // compute nodes activations and outputs + for (;node_idx < neuron_cnt_; node_idx++, node++) { + double activation = -node->bias; + for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) { + activation += (node->inputs[fan_in_idx].input_weight * + node->inputs[fan_in_idx].input_node->out); + } + node->out = Neuron::Sigmoid(activation); + } + // copy the outputs to the output buffers + node = &fast_nodes_[neuron_cnt_ - out_cnt_]; + for (node_idx = 0; node_idx < out_cnt_; node_idx++, node++) { + outputs[node_idx] = node->out; + } + return true; +} + +// Performs a feedforward for general nets. Used mainly in training mode +// Templatized for float and double Types +template bool NeuralNet::FeedForward(const Type *inputs, + Type *outputs) { + // call the fast version in case of readonly nets + if (read_only_) { + return FastFeedForward(inputs, outputs); + } + // clear all neurons + Clear(); + // for auto encoders, apply no input normalization + if (auto_encoder_) { + for (int in = 0; in < in_cnt_; in++) { + neurons_[in].set_output(inputs[in]); + } + } else { + // Input normalization : subtract mean and divide by stddev + for (int in = 0; in < in_cnt_; in++) { + neurons_[in].set_output((inputs[in] - inputs_min_[in]) / + (inputs_max_[in] - inputs_min_[in])); + neurons_[in].set_output((neurons_[in].output() - inputs_mean_[in]) / + inputs_std_dev_[in]); + } + } + // compute the net outputs: follow a pull model each output pulls the + // outputs of its input nodes and so on + for (int out = neuron_cnt_ - out_cnt_; out < neuron_cnt_; out++) { + neurons_[out].FeedForward(); + // copy the values to the output buffer + outputs[out] = neurons_[out].output(); + } + return true; +} + +// Sets a connection between two neurons +bool NeuralNet::SetConnection(int from, int to) { + // allocate the wgt + float *wts = AllocWgt(1); + if (wts == NULL) { + return false; + } + // register the connection + neurons_[to].AddFromConnection(neurons_ + from, wts, 1); + return true; +} + +// Create a fast readonly version of the net +bool NeuralNet::CreateFastNet() { + fast_nodes_.resize(neuron_cnt_); + // build the node structures + int wts_cnt = 0; + for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) { + Node *node = &fast_nodes_[node_idx]; + if (neurons_[node_idx].node_type() == Neuron::Input) { + // Input neurons have no fan-in + node->fan_in_cnt = 0; + node->inputs = NULL; + // Input bias is the normalization offset computed from + // training input stats + if (fabs(inputs_max_[node_idx] - inputs_min_[node_idx]) < + kMinInputRange) { + // if the range approaches zero, the stdev is not defined, + // this indicates that this input does not change. + // Set the bias to zero + node->bias = 0.0f; + } else { + node->bias = inputs_min_[node_idx] + (inputs_mean_[node_idx] * + (inputs_max_[node_idx] - inputs_min_[node_idx])); + } + } else { + node->bias = neurons_[node_idx].bias(); + node->fan_in_cnt = neurons_[node_idx].fan_in_cnt(); + // allocate memory for fan-in nodes + node->inputs = new WeightedNode[node->fan_in_cnt]; + if (node->inputs == NULL) { + return false; + } + for (int fan_in = 0; fan_in < node->fan_in_cnt; fan_in++) { + // identify fan-in neuron + const int id = neurons_[node_idx].fan_in(fan_in)->id(); + // Feedback connections are not allowed and should never happen + if (id >= node_idx) { + return false; + } + // add the the fan-in neuron and its wgt + node->inputs[fan_in].input_node = &fast_nodes_[id]; + float wgt_val = neurons_[node_idx].fan_in_wts(fan_in); + // for input neurons normalize the wgt by the input scaling + // values to save time during feedforward + if (neurons_[node_idx].fan_in(fan_in)->node_type() == Neuron::Input) { + // if the range approaches zero, the stdev is not defined, + // this indicates that this input does not change. + // Set the weight to zero + if (fabs(inputs_max_[id] - inputs_min_[id]) < kMinInputRange) { + wgt_val = 0.0f; + } else { + wgt_val /= ((inputs_max_[id] - inputs_min_[id]) * + inputs_std_dev_[id]); + } + } + node->inputs[fan_in].input_weight = wgt_val; + } + // incr wgt count to validate against at the end + wts_cnt += node->fan_in_cnt; + } + } + // sanity check + return wts_cnt_ == wts_cnt; +} + +// returns a pointer to the requested set of weights +// Allocates in chunks +float * NeuralNet::AllocWgt(int wgt_cnt) { + // see if need to allocate a new chunk of wts + if (wts_vec_.size() == 0 || (alloc_wgt_cnt_ + wgt_cnt) > kWgtChunkSize) { + // add the new chunck to the wts_chunks vector + wts_vec_.push_back(new vector (kWgtChunkSize)); + alloc_wgt_cnt_ = 0; + } + float *ret_ptr = &((*wts_vec_.back())[alloc_wgt_cnt_]); + // incr usage counts + alloc_wgt_cnt_ += wgt_cnt; + wts_cnt_ += wgt_cnt; + return ret_ptr; +} + +// create a new net object using an input file as a source +NeuralNet *NeuralNet::FromFile(const string file_name) { + // open the file + InputFileBuffer input_buff(file_name); + // create a new net object using input buffer + NeuralNet *net_obj = FromInputBuffer(&input_buff); + return net_obj; +} + +// create a net object from an input buffer +NeuralNet *NeuralNet::FromInputBuffer(InputFileBuffer *ib) { + // create a new net object + NeuralNet *net_obj = new NeuralNet(); + if (net_obj == NULL) { + return NULL; + } + // load the net + if (!net_obj->ReadBinary(ib)) { + delete net_obj; + net_obj = NULL; + } + return net_obj; +} + +// Compute the output of a specific output node. +// This function is useful for application that are interested in a single +// output of the net and do not want to waste time on the rest +// This is the fast-read-only version of this function +template bool NeuralNet::FastGetNetOutput(const Type *inputs, + int output_id, + Type *output) { + // feed inputs in and offset them by the pre-computed bias + int node_idx = 0; + Node *node = &fast_nodes_[0]; + for (node_idx = 0; node_idx < in_cnt_; node_idx++, node++) { + node->out = inputs[node_idx] - node->bias; + } + + // compute nodes' activations and outputs for hidden nodes if any + int hidden_node_cnt = neuron_cnt_ - out_cnt_; + for (;node_idx < hidden_node_cnt; node_idx++, node++) { + double activation = -node->bias; + for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) { + activation += (node->inputs[fan_in_idx].input_weight * + node->inputs[fan_in_idx].input_node->out); + } + node->out = Neuron::Sigmoid(activation); + } + + // compute the output of the required output node + node += output_id; + double activation = -node->bias; + for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) { + activation += (node->inputs[fan_in_idx].input_weight * + node->inputs[fan_in_idx].input_node->out); + } + (*output) = Neuron::Sigmoid(activation); + return true; +} + +// Performs a feedforward for general nets. Used mainly in training mode +// Templatized for float and double Types +template bool NeuralNet::GetNetOutput(const Type *inputs, + int output_id, + Type *output) { + // validate output id + if (output_id < 0 || output_id >= out_cnt_) { + return false; + } + + // call the fast version in case of readonly nets + if (read_only_) { + return FastGetNetOutput(inputs, output_id, output); + } + + // For the slow version, we'll just call FeedForward and return the + // appropriate output + vector outputs(out_cnt_); + if (!FeedForward(inputs, &outputs[0])) { + return false; + } + (*output) = outputs[output_id]; + + return true; +} + +// Instantiate all supported templates now that the functions have been defined. +template bool NeuralNet::FeedForward(const float *inputs, float *outputs); +template bool NeuralNet::FeedForward(const double *inputs, double *outputs); +template bool NeuralNet::FastFeedForward(const float *inputs, float *outputs); +template bool NeuralNet::FastFeedForward(const double *inputs, + double *outputs); +template bool NeuralNet::GetNetOutput(const float *inputs, int output_id, + float *output); +template bool NeuralNet::GetNetOutput(const double *inputs, int output_id, + double *output); +template bool NeuralNet::FastGetNetOutput(const float *inputs, int output_id, + float *output); +template bool NeuralNet::FastGetNetOutput(const double *inputs, int output_id, + double *output); +template bool NeuralNet::ReadBinary(InputFileBuffer *input_buffer); + +} diff --git a/neural_networks/runtime/neural_net.h b/neural_networks/runtime/neural_net.h new file mode 100644 index 0000000000..91d0d68a24 --- /dev/null +++ b/neural_networks/runtime/neural_net.h @@ -0,0 +1,246 @@ +// Copyright 2008 Google Inc. +// All Rights Reserved. +// Author: ahmadab@google.com (Ahmad Abdulkader) +// +// neural_net.h: Declarations of a class for an object that +// represents an arbitrary network of neurons +// + +#ifndef NEURAL_NET_H +#define NEURAL_NET_H + +#include +#include +#include "neuron.h" +#include "input_file_buffer.h" + +namespace tesseract { + +// Minimum input range below which we set the input weight to zero +static const float kMinInputRange = 1e-6f; + +class NeuralNet { + public: + NeuralNet(); + virtual ~NeuralNet(); + // create a net object from a file. Uses stdio + static NeuralNet *FromFile(const string file_name); + // create a net object from an input buffer + static NeuralNet *FromInputBuffer(InputFileBuffer *ib); + // Different flavors of feed forward function + template bool FeedForward(const Type *inputs, + Type *outputs); + // Compute the output of a specific output node. + // This function is useful for application that are interested in a single + // output of the net and do not want to waste time on the rest + template bool GetNetOutput(const Type *inputs, + int output_id, + Type *output); + // Accessor functions + int in_cnt() const { return in_cnt_; } + int out_cnt() const { return out_cnt_; } + + protected: + struct Node; + // A node-weight pair + struct WeightedNode { + Node *input_node; + float input_weight; + }; + // node struct used for fast feedforward in + // Read only nets + struct Node { + float out; + float bias; + int fan_in_cnt; + WeightedNode *inputs; + }; + // Read-Only flag (no training: On by default) + // will presumeably be set to false by + // the inherting TrainableNeuralNet class + bool read_only_; + // input count + int in_cnt_; + // output count + int out_cnt_; + // Total neuron count (including inputs) + int neuron_cnt_; + // count of unique weights + int wts_cnt_; + // Neuron vector + Neuron *neurons_; + // size of allocated weight chunk (in weights) + // This is basically the size of the biggest network + // that I have trained. However, the class will allow + // a bigger sized net if desired + static const int kWgtChunkSize = 0x10000; + // Magic number expected at the beginning of the NN + // binary file + static const unsigned int kNetSignature = 0xFEFEABD0; + // count of allocated wgts in the last chunk + int alloc_wgt_cnt_; + // vector of weights buffers + vector *>wts_vec_; + // Is the net an auto-encoder type + bool auto_encoder_; + // vector of input max values + vector inputs_max_; + // vector of input min values + vector inputs_min_; + // vector of input mean values + vector inputs_mean_; + // vector of input standard deviation values + vector inputs_std_dev_; + // vector of input offsets used by fast read-only + // feedforward function + vector fast_nodes_; + // Network Initialization function + void Init(); + // Clears all neurons + void Clear() { + for (int node = 0; node < neuron_cnt_; node++) { + neurons_[node].Clear(); + } + } + // Reads the net from an input buffer + template bool ReadBinary(ReadBuffType *input_buff) { + // Init vars + Init(); + // is this an autoencoder + unsigned int read_val; + unsigned int auto_encode; + // read and verify signature + if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) { + return false; + } + if (read_val != kNetSignature) { + return false; + } + if (input_buff->Read(&auto_encode, sizeof(auto_encode)) != + sizeof(auto_encode)) { + return false; + } + auto_encoder_ = auto_encode; + // read and validate total # of nodes + if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) { + return false; + } + neuron_cnt_ = read_val; + if (neuron_cnt_ <= 0) { + return false; + } + // set the size of the neurons vector + neurons_ = new Neuron[neuron_cnt_]; + if (neurons_ == NULL) { + return false; + } + // read & validate inputs + if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) { + return false; + } + in_cnt_ = read_val; + if (in_cnt_ <= 0) { + return false; + } + // read outputs + if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) { + return false; + } + out_cnt_ = read_val; + if (out_cnt_ <= 0) { + return false; + } + // set neuron ids and types + for (int idx = 0; idx < neuron_cnt_; idx++) { + neurons_[idx].set_id(idx); + // input type + if (idx < in_cnt_) { + neurons_[idx].set_node_type(Neuron::Input); + } else if (idx >= (neuron_cnt_ - out_cnt_)) { + neurons_[idx].set_node_type(Neuron::Output); + } else { + neurons_[idx].set_node_type(Neuron::Hidden); + } + } + // read the connections + for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) { + // read fanout + if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) { + return false; + } + // read the neuron's info + int fan_out_cnt = read_val; + for (int fan_out_idx = 0; fan_out_idx < fan_out_cnt; fan_out_idx++) { + // read the neuron id + if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) { + return false; + } + // create the connection + if (!SetConnection(node_idx, read_val)) { + return false; + } + } + } + // read all the neurons' fan-in connections + for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) { + // read + if (!neurons_[node_idx].ReadBinary(input_buff)) { + return false; + } + } + // size input stats vector to expected input size + inputs_mean_.resize(in_cnt_); + inputs_std_dev_.resize(in_cnt_); + inputs_min_.resize(in_cnt_); + inputs_max_.resize(in_cnt_); + // read stats + if (input_buff->Read(&(inputs_mean_.front()), + sizeof(inputs_mean_[0]) * in_cnt_) != + sizeof(inputs_mean_[0]) * in_cnt_) { + return false; + } + if (input_buff->Read(&(inputs_std_dev_.front()), + sizeof(inputs_std_dev_[0]) * in_cnt_) != + sizeof(inputs_std_dev_[0]) * in_cnt_) { + return false; + } + if (input_buff->Read(&(inputs_min_.front()), + sizeof(inputs_min_[0]) * in_cnt_) != + sizeof(inputs_min_[0]) * in_cnt_) { + return false; + } + if (input_buff->Read(&(inputs_max_.front()), + sizeof(inputs_max_[0]) * in_cnt_) != + sizeof(inputs_max_[0]) * in_cnt_) { + return false; + } + // create a readonly version for fast feedforward + if (read_only_) { + return CreateFastNet(); + } + return true; + } + + // creates a connection between two nodes + bool SetConnection(int from, int to); + // Create a read only version of the net that + // has faster feedforward performance + bool CreateFastNet(); + // internal function to allocate a new set of weights + // Centralized weight allocation attempts to increase + // weights locality of reference making it more cache friendly + float *AllocWgt(int wgt_cnt); + // different flavors read-only feedforward function + template bool FastFeedForward(const Type *inputs, + Type *outputs); + // Compute the output of a specific output node. + // This function is useful for application that are interested in a single + // output of the net and do not want to waste time on the rest + // This is the fast-read-only version of this function + template bool FastGetNetOutput(const Type *inputs, + int output_id, + Type *output); +}; +} + +#endif // NEURAL_NET_H__ diff --git a/neural_networks/runtime/neuron.cpp b/neural_networks/runtime/neuron.cpp new file mode 100644 index 0000000000..36309082d9 --- /dev/null +++ b/neural_networks/runtime/neuron.cpp @@ -0,0 +1,94 @@ +// Copyright 2008 Google Inc. +// All Rights Reserved. +// Author: ahmadab@google.com (Ahmad Abdulkader) +// +// neuron.cpp: The implementation of a class for an object +// that represents a single neuron in a neural network + +#include "neuron.h" +#include "input_file_buffer.h" + +namespace tesseract { + +// Instantiate all supported templates +template bool Neuron::ReadBinary(InputFileBuffer *input_buffer); + +// default and only constructor +Neuron::Neuron() { + Init(); +} + +// virtual destructor +Neuron::~Neuron() { +} + +// Initializer +void Neuron::Init() { + id_ = -1; + frwd_dirty_ = false; + fan_in_.clear(); + fan_in_weights_.clear(); + activation_ = 0.0f; + output_ = 0.0f; + bias_ = 0.0f; + node_type_ = Unknown; +} + +// Computes the activation and output of the neuron if not fresh +// by pulling the outputs of all fan-in neurons +void Neuron::FeedForward() { + if (!frwd_dirty_ ) { + return; + } + // nothing to do for input nodes: just pass the input to the o/p + // otherwise, pull the output of all fan-in neurons + if (node_type_ != Input) { + int fan_in_cnt = fan_in_.size(); + // sum out the activation + activation_ = -bias_; + for (int in = 0; in < fan_in_cnt; in++) { + if (fan_in_[in]->frwd_dirty_) { + fan_in_[in]->FeedForward(); + } + activation_ += ((*(fan_in_weights_[in])) * fan_in_[in]->output_); + } + // sigmoid it + output_ = Sigmoid(activation_); + } + frwd_dirty_ = false; +} + +// set the type of the neuron +void Neuron::set_node_type(NeuronTypes Type) { + node_type_ = Type; +} + +// Adds new connections *to* this neuron *From* +// a target neuron using specfied params +// Note that what is actually copied in this function are pointers to the +// specified Neurons and weights and not the actualt values. This is by +// design to centralize the alloction of neurons and weights and so +// increase the locality of reference and improve cache-hits resulting +// in a faster net. This technique resulted in a 2X-10X speedup +// (depending on network size and processor) +void Neuron::AddFromConnection(Neuron *neurons, + float *wts_offset, + int from_cnt) { + for (int in = 0; in < from_cnt; in++) { + fan_in_.push_back(neurons + in); + fan_in_weights_.push_back(wts_offset + in); + } +} + +// fast computation of sigmoid function using a lookup table +// defined in sigmoid_table.cpp +float Neuron::Sigmoid(float activation) { + if (activation <= -10.0f) { + return 0.0f; + } else if (activation >= 10.0f) { + return 1.0f; + } else { + return kSigmoidTable[static_cast(100 * (activation + 10.0))]; + } +} +} diff --git a/neural_networks/runtime/neuron.h b/neural_networks/runtime/neuron.h new file mode 100644 index 0000000000..a13d4a2eb7 --- /dev/null +++ b/neural_networks/runtime/neuron.h @@ -0,0 +1,147 @@ +// Copyright 2008 Google Inc. +// All Rights Reserved. +// Author: ahmadab@google.com (Ahmad Abdulkader) +// +// neuron.h: Declarations of a class for an object that +// represents a single neuron in a neural network +// + +#ifndef NEURON_H +#define NEURON_H + +#include +#include + +#ifdef USE_STD_NAMESPACE +using std::vector; +#endif + +namespace tesseract { + +// Input Node bias values +static const float kInputNodeBias = 0.0f; + +class Neuron { + public: + // Types of nodes + enum NeuronTypes { + Unknown = 0, + Input, + Hidden, + Output + }; + Neuron(); + ~Neuron(); + // set the forward dirty flag indicating that the + // activation of the net is not fresh + void Clear() { + frwd_dirty_ = true; + } + // Read a binary representation of the neuron info from + // an input buffer. + template bool ReadBinary(BuffType *input_buff) { + float val; + if (input_buff->Read(&val, sizeof(val)) != sizeof(val)) { + return false; + } + // input nodes should have no biases + if (node_type_ == Input) { + bias_ = kInputNodeBias; + } else { + bias_ = val; + } + // read fanin count + int fan_in_cnt; + if (input_buff->Read(&fan_in_cnt, sizeof(fan_in_cnt)) != + sizeof(fan_in_cnt)) { + return false; + } + // validate fan-in cnt + if (fan_in_cnt != fan_in_.size()) { + return false; + } + // read the weights + for (int in = 0; in < fan_in_cnt; in++) { + if (input_buff->Read(&val, sizeof(val)) != sizeof(val)) { + return false; + } + *(fan_in_weights_[in]) = val; + } + return true; + } + + // Add a new connection from this neuron *From* + // a target neuron using specfied params + // Note that what is actually copied in this function are pointers to the + // specified Neurons and weights and not the actualt values. This is by + // design to centralize the alloction of neurons and weights and so + // increase the locality of reference and improve cache-hits resulting + // in a faster net. This technique resulted in a 2X-10X speedup + // (depending on network size and processor) + void AddFromConnection(Neuron *neuron_vec, + float *wts_offset, + int from_cnt); + // Set the type of a neuron + void set_node_type(NeuronTypes type); + // Computes the output of the node by + // "pulling" the output of the fan-in nodes + void FeedForward(); + // fast computation of sigmoid function using a lookup table + // defined in sigmoid_table.cpp + static float Sigmoid(float activation); + // Accessor functions + float output() const { + return output_; + } + void set_output(float out_val) { + output_ = out_val; + } + int id() const { + return id_; + } + int fan_in_cnt() const { + return fan_in_.size(); + } + Neuron * fan_in(int idx) const { + return fan_in_[idx]; + } + float fan_in_wts(int idx) const { + return *(fan_in_weights_[idx]); + } + void set_id(int id) { + id_ = id; + } + float bias() const { + return bias_; + } + Neuron::NeuronTypes node_type() const { + return node_type_; + } + + protected: + // Type of Neuron + NeuronTypes node_type_; + // unqique id of the neuron + int id_; + // node bias + float bias_; + // node net activation + float activation_; + // node output + float output_; + // pointers to fanin nodes + vector fan_in_; + // pointers to fanin weights + vector fan_in_weights_; + // Sigmoid function lookup table used for fast computation + // of sigmoid function + static const float kSigmoidTable[]; + // flag determining if the activation of the node + // is fresh or not (dirty) + bool frwd_dirty_; + // Initializer + void Init(); +}; +} + +#endif // NEURON_H__ diff --git a/neural_networks/runtime/sigmoid_table.cpp b/neural_networks/runtime/sigmoid_table.cpp new file mode 100644 index 0000000000..f170a10844 --- /dev/null +++ b/neural_networks/runtime/sigmoid_table.cpp @@ -0,0 +1,514 @@ +// Copyright 2007 Google Inc. +// All Rights Reserved. +// Author: ahmadab@google.com (Ahmad Abdulkader) +// +// sigmoid_table.cpp: Sigmoid function lookup table + +#include "neuron.h" + +namespace tesseract { + +const float Neuron::kSigmoidTable[] = { + 4.53979E-05f, 4.58541E-05f, 4.63149E-05f, 4.67804E-05f, + 4.72505E-05f, 4.77254E-05f, 4.8205E-05f, 4.86894E-05f, + 4.91787E-05f, 4.9673E-05f, 5.01722E-05f, 5.06764E-05f, + 5.11857E-05f, 5.17001E-05f, 5.22196E-05f, 5.27444E-05f, + 5.32745E-05f, 5.38099E-05f, 5.43506E-05f, 5.48968E-05f, + 5.54485E-05f, 5.60058E-05f, 5.65686E-05f, 5.71371E-05f, + 5.77113E-05f, 5.82913E-05f, 5.88771E-05f, 5.94688E-05f, + 6.00664E-05f, 6.067E-05f, 6.12797E-05f, 6.18956E-05f, + 6.25176E-05f, 6.31459E-05f, 6.37805E-05f, 6.44214E-05f, + 6.50688E-05f, 6.57227E-05f, 6.63832E-05f, 6.70503E-05f, + 6.77241E-05f, 6.84047E-05f, 6.90922E-05f, 6.97865E-05f, + 7.04878E-05f, 7.11962E-05f, 7.19117E-05f, 7.26343E-05f, + 7.33643E-05f, 7.41016E-05f, 7.48462E-05f, 7.55984E-05f, + 7.63581E-05f, 7.71255E-05f, 7.79005E-05f, 7.86834E-05f, + 7.94741E-05f, 8.02728E-05f, 8.10794E-05f, 8.18942E-05f, + 8.27172E-05f, 8.35485E-05f, 8.43881E-05f, 8.52361E-05f, + 8.60927E-05f, 8.69579E-05f, 8.78317E-05f, 8.87144E-05f, + 8.96059E-05f, 9.05064E-05f, 9.14159E-05f, 9.23345E-05f, + 9.32624E-05f, 9.41996E-05f, 9.51463E-05f, 9.61024E-05f, + 9.70682E-05f, 9.80436E-05f, 9.90289E-05f, 0.000100024f, + 0.000101029f, 0.000102044f, 0.00010307f, 0.000104106f, + 0.000105152f, 0.000106209f, 0.000107276f, 0.000108354f, + 0.000109443f, 0.000110542f, 0.000111653f, 0.000112775f, + 0.000113909f, 0.000115053f, 0.000116209f, 0.000117377f, + 0.000118557f, 0.000119748f, 0.000120951f, 0.000122167f, + 0.000123395f, 0.000124635f, 0.000125887f, 0.000127152f, + 0.00012843f, 0.00012972f, 0.000131024f, 0.000132341f, + 0.00013367f, 0.000135014f, 0.00013637f, 0.000137741f, + 0.000139125f, 0.000140523f, 0.000141935f, 0.000143361f, + 0.000144802f, 0.000146257f, 0.000147727f, 0.000149211f, + 0.00015071f, 0.000152225f, 0.000153754f, 0.000155299f, + 0.00015686f, 0.000158436f, 0.000160028f, 0.000161636f, + 0.000163261f, 0.000164901f, 0.000166558f, 0.000168232f, + 0.000169922f, 0.00017163f, 0.000173354f, 0.000175096f, + 0.000176856f, 0.000178633f, 0.000180428f, 0.000182241f, + 0.000184072f, 0.000185922f, 0.00018779f, 0.000189677f, + 0.000191583f, 0.000193508f, 0.000195452f, 0.000197416f, + 0.0001994f, 0.000201403f, 0.000203427f, 0.000205471f, + 0.000207536f, 0.000209621f, 0.000211727f, 0.000213855f, + 0.000216003f, 0.000218174f, 0.000220366f, 0.00022258f, + 0.000224817f, 0.000227076f, 0.000229357f, 0.000231662f, + 0.00023399f, 0.000236341f, 0.000238715f, 0.000241114f, + 0.000243537f, 0.000245984f, 0.000248455f, 0.000250951f, + 0.000253473f, 0.00025602f, 0.000258592f, 0.00026119f, + 0.000263815f, 0.000266465f, 0.000269143f, 0.000271847f, + 0.000274578f, 0.000277337f, 0.000280123f, 0.000282938f, + 0.000285781f, 0.000288652f, 0.000291552f, 0.000294481f, + 0.00029744f, 0.000300429f, 0.000303447f, 0.000306496f, + 0.000309575f, 0.000312685f, 0.000315827f, 0.000319f, + 0.000322205f, 0.000325442f, 0.000328712f, 0.000332014f, + 0.00033535f, 0.000338719f, 0.000342122f, 0.00034556f, + 0.000349031f, 0.000352538f, 0.00035608f, 0.000359657f, + 0.00036327f, 0.00036692f, 0.000370606f, 0.000374329f, + 0.00037809f, 0.000381888f, 0.000385725f, 0.0003896f, + 0.000393514f, 0.000397467f, 0.00040146f, 0.000405494f, + 0.000409567f, 0.000413682f, 0.000417838f, 0.000422035f, + 0.000426275f, 0.000430557f, 0.000434882f, 0.000439251f, + 0.000443664f, 0.000448121f, 0.000452622f, 0.000457169f, + 0.000461762f, 0.0004664f, 0.000471085f, 0.000475818f, + 0.000480597f, 0.000485425f, 0.000490301f, 0.000495226f, + 0.000500201f, 0.000505226f, 0.000510301f, 0.000515427f, + 0.000520604f, 0.000525833f, 0.000531115f, 0.00053645f, + 0.000541839f, 0.000547281f, 0.000552779f, 0.000558331f, + 0.000563939f, 0.000569604f, 0.000575325f, 0.000581104f, + 0.00058694f, 0.000592836f, 0.00059879f, 0.000604805f, + 0.000610879f, 0.000617015f, 0.000623212f, 0.000629472f, + 0.000635794f, 0.00064218f, 0.00064863f, 0.000655144f, + 0.000661724f, 0.00066837f, 0.000675083f, 0.000681863f, + 0.000688711f, 0.000695628f, 0.000702614f, 0.00070967f, + 0.000716798f, 0.000723996f, 0.000731267f, 0.000738611f, + 0.000746029f, 0.000753521f, 0.000761088f, 0.000768731f, + 0.000776451f, 0.000784249f, 0.000792124f, 0.000800079f, + 0.000808113f, 0.000816228f, 0.000824425f, 0.000832703f, + 0.000841065f, 0.000849511f, 0.000858041f, 0.000866657f, + 0.00087536f, 0.000884149f, 0.000893027f, 0.000901994f, + 0.000911051f, 0.000920199f, 0.000929439f, 0.000938771f, + 0.000948197f, 0.000957717f, 0.000967333f, 0.000977045f, + 0.000986855f, 0.000996763f, 0.001006771f, 0.001016879f, + 0.001027088f, 0.0010374f, 0.001047815f, 0.001058334f, + 0.00106896f, 0.001079691f, 0.00109053f, 0.001101478f, + 0.001112536f, 0.001123705f, 0.001134985f, 0.001146379f, + 0.001157887f, 0.00116951f, 0.00118125f, 0.001193108f, + 0.001205084f, 0.001217181f, 0.001229399f, 0.001241739f, + 0.001254203f, 0.001266792f, 0.001279507f, 0.00129235f, + 0.001305321f, 0.001318423f, 0.001331655f, 0.001345021f, + 0.00135852f, 0.001372155f, 0.001385926f, 0.001399835f, + 0.001413884f, 0.001428073f, 0.001442405f, 0.00145688f, + 0.001471501f, 0.001486267f, 0.001501182f, 0.001516247f, + 0.001531462f, 0.001546829f, 0.001562351f, 0.001578028f, + 0.001593862f, 0.001609855f, 0.001626008f, 0.001642323f, + 0.001658801f, 0.001675444f, 0.001692254f, 0.001709233f, + 0.001726381f, 0.001743701f, 0.001761195f, 0.001778864f, + 0.00179671f, 0.001814734f, 0.001832939f, 0.001851326f, + 0.001869898f, 0.001888655f, 0.0019076f, 0.001926735f, + 0.001946061f, 0.001965581f, 0.001985296f, 0.002005209f, + 0.00202532f, 0.002045634f, 0.00206615f, 0.002086872f, + 0.002107801f, 0.00212894f, 0.00215029f, 0.002171854f, + 0.002193633f, 0.002215631f, 0.002237849f, 0.002260288f, + 0.002282953f, 0.002305844f, 0.002328964f, 0.002352316f, + 0.002375901f, 0.002399721f, 0.002423781f, 0.00244808f, + 0.002472623f, 0.002497411f, 0.002522447f, 0.002547734f, + 0.002573273f, 0.002599068f, 0.00262512f, 0.002651433f, + 0.002678009f, 0.002704851f, 0.002731961f, 0.002759342f, + 0.002786996f, 0.002814927f, 0.002843137f, 0.002871629f, + 0.002900406f, 0.00292947f, 0.002958825f, 0.002988472f, + 0.003018416f, 0.003048659f, 0.003079205f, 0.003110055f, + 0.003141213f, 0.003172683f, 0.003204467f, 0.003236568f, + 0.00326899f, 0.003301735f, 0.003334807f, 0.00336821f, + 0.003401946f, 0.003436018f, 0.003470431f, 0.003505187f, + 0.00354029f, 0.003575744f, 0.003611551f, 0.003647715f, + 0.00368424f, 0.003721129f, 0.003758387f, 0.003796016f, + 0.00383402f, 0.003872403f, 0.00391117f, 0.003950322f, + 0.003989865f, 0.004029802f, 0.004070138f, 0.004110875f, + 0.004152019f, 0.004193572f, 0.00423554f, 0.004277925f, + 0.004320734f, 0.004363968f, 0.004407633f, 0.004451734f, + 0.004496273f, 0.004541256f, 0.004586687f, 0.004632571f, + 0.004678911f, 0.004725713f, 0.00477298f, 0.004820718f, + 0.004868931f, 0.004917624f, 0.004966802f, 0.005016468f, + 0.005066629f, 0.005117289f, 0.005168453f, 0.005220126f, + 0.005272312f, 0.005325018f, 0.005378247f, 0.005432006f, + 0.005486299f, 0.005541132f, 0.005596509f, 0.005652437f, + 0.005708921f, 0.005765966f, 0.005823577f, 0.005881761f, + 0.005940522f, 0.005999867f, 0.006059801f, 0.006120331f, + 0.006181461f, 0.006243198f, 0.006305547f, 0.006368516f, + 0.006432108f, 0.006496332f, 0.006561193f, 0.006626697f, + 0.006692851f, 0.006759661f, 0.006827132f, 0.006895273f, + 0.006964089f, 0.007033587f, 0.007103774f, 0.007174656f, + 0.00724624f, 0.007318533f, 0.007391541f, 0.007465273f, + 0.007539735f, 0.007614933f, 0.007690876f, 0.00776757f, + 0.007845023f, 0.007923242f, 0.008002235f, 0.008082009f, + 0.008162571f, 0.00824393f, 0.008326093f, 0.008409068f, + 0.008492863f, 0.008577485f, 0.008662944f, 0.008749246f, + 0.0088364f, 0.008924415f, 0.009013299f, 0.009103059f, + 0.009193705f, 0.009285246f, 0.009377689f, 0.009471044f, + 0.009565319f, 0.009660523f, 0.009756666f, 0.009853756f, + 0.009951802f, 0.010050814f, 0.010150801f, 0.010251772f, + 0.010353738f, 0.010456706f, 0.010560688f, 0.010665693f, + 0.01077173f, 0.01087881f, 0.010986943f, 0.011096138f, + 0.011206406f, 0.011317758f, 0.011430203f, 0.011543752f, + 0.011658417f, 0.011774206f, 0.011891132f, 0.012009204f, + 0.012128435f, 0.012248835f, 0.012370415f, 0.012493186f, + 0.012617161f, 0.012742349f, 0.012868764f, 0.012996417f, + 0.013125318f, 0.013255481f, 0.013386918f, 0.01351964f, + 0.013653659f, 0.013788989f, 0.01392564f, 0.014063627f, + 0.014202961f, 0.014343656f, 0.014485724f, 0.014629178f, + 0.014774032f, 0.014920298f, 0.01506799f, 0.015217121f, + 0.015367706f, 0.015519757f, 0.015673288f, 0.015828314f, + 0.015984848f, 0.016142905f, 0.016302499f, 0.016463645f, + 0.016626356f, 0.016790648f, 0.016956536f, 0.017124033f, + 0.017293157f, 0.01746392f, 0.01763634f, 0.017810432f, + 0.01798621f, 0.018163691f, 0.018342891f, 0.018523825f, + 0.01870651f, 0.018890962f, 0.019077197f, 0.019265233f, + 0.019455085f, 0.01964677f, 0.019840306f, 0.020035709f, + 0.020232997f, 0.020432187f, 0.020633297f, 0.020836345f, + 0.021041347f, 0.021248323f, 0.02145729f, 0.021668266f, + 0.021881271f, 0.022096322f, 0.022313439f, 0.022532639f, + 0.022753943f, 0.02297737f, 0.023202938f, 0.023430668f, + 0.023660578f, 0.023892689f, 0.024127021f, 0.024363594f, + 0.024602428f, 0.024843544f, 0.025086962f, 0.025332703f, + 0.025580788f, 0.025831239f, 0.026084075f, 0.02633932f, + 0.026596994f, 0.026857119f, 0.027119717f, 0.027384811f, + 0.027652422f, 0.027922574f, 0.028195288f, 0.028470588f, + 0.028748496f, 0.029029036f, 0.029312231f, 0.029598104f, + 0.02988668f, 0.030177981f, 0.030472033f, 0.030768859f, + 0.031068484f, 0.031370932f, 0.031676228f, 0.031984397f, + 0.032295465f, 0.032609455f, 0.032926395f, 0.033246309f, + 0.033569223f, 0.033895164f, 0.034224158f, 0.03455623f, + 0.034891409f, 0.035229719f, 0.035571189f, 0.035915846f, + 0.036263716f, 0.036614828f, 0.036969209f, 0.037326887f, + 0.037687891f, 0.038052247f, 0.038419986f, 0.038791134f, + 0.039165723f, 0.03954378f, 0.039925334f, 0.040310415f, + 0.040699054f, 0.041091278f, 0.041487119f, 0.041886607f, + 0.042289772f, 0.042696644f, 0.043107255f, 0.043521635f, + 0.043939815f, 0.044361828f, 0.044787703f, 0.045217473f, + 0.045651171f, 0.046088827f, 0.046530475f, 0.046976146f, + 0.047425873f, 0.04787969f, 0.048337629f, 0.048799723f, + 0.049266006f, 0.049736512f, 0.050211273f, 0.050690325f, + 0.051173701f, 0.051661435f, 0.052153563f, 0.052650118f, + 0.053151136f, 0.053656652f, 0.0541667f, 0.054681317f, + 0.055200538f, 0.055724398f, 0.056252934f, 0.056786181f, + 0.057324176f, 0.057866955f, 0.058414556f, 0.058967013f, + 0.059524366f, 0.06008665f, 0.060653903f, 0.061226163f, + 0.061803466f, 0.062385851f, 0.062973356f, 0.063566018f, + 0.064163876f, 0.064766969f, 0.065375333f, 0.065989009f, + 0.066608036f, 0.067232451f, 0.067862294f, 0.068497604f, + 0.06913842f, 0.069784783f, 0.070436731f, 0.071094304f, + 0.071757542f, 0.072426485f, 0.073101173f, 0.073781647f, + 0.074467945f, 0.075160109f, 0.07585818f, 0.076562197f, + 0.077272202f, 0.077988235f, 0.078710337f, 0.079438549f, + 0.080172912f, 0.080913467f, 0.081660255f, 0.082413318f, + 0.083172696f, 0.083938432f, 0.084710566f, 0.085489139f, + 0.086274194f, 0.087065772f, 0.087863915f, 0.088668663f, + 0.089480059f, 0.090298145f, 0.091122961f, 0.09195455f, + 0.092792953f, 0.093638212f, 0.094490369f, 0.095349465f, + 0.096215542f, 0.097088641f, 0.097968804f, 0.098856073f, + 0.099750489f, 0.100652094f, 0.101560928f, 0.102477033f, + 0.103400451f, 0.104331223f, 0.10526939f, 0.106214992f, + 0.10716807f, 0.108128667f, 0.109096821f, 0.110072574f, + 0.111055967f, 0.112047039f, 0.11304583f, 0.114052381f, + 0.115066732f, 0.116088922f, 0.117118991f, 0.118156978f, + 0.119202922f, 0.120256862f, 0.121318838f, 0.122388887f, + 0.123467048f, 0.124553358f, 0.125647857f, 0.12675058f, + 0.127861566f, 0.128980852f, 0.130108474f, 0.131244469f, + 0.132388874f, 0.133541723f, 0.134703052f, 0.135872897f, + 0.137051293f, 0.138238273f, 0.139433873f, 0.140638126f, + 0.141851065f, 0.143072723f, 0.144303134f, 0.145542329f, + 0.14679034f, 0.148047198f, 0.149312935f, 0.15058758f, + 0.151871164f, 0.153163716f, 0.154465265f, 0.15577584f, + 0.157095469f, 0.158424179f, 0.159761997f, 0.16110895f, + 0.162465063f, 0.163830361f, 0.16520487f, 0.166588614f, + 0.167981615f, 0.169383897f, 0.170795482f, 0.172216392f, + 0.173646647f, 0.175086268f, 0.176535275f, 0.177993686f, + 0.179461519f, 0.180938793f, 0.182425524f, 0.183921727f, + 0.185427419f, 0.186942614f, 0.188467325f, 0.190001566f, + 0.191545349f, 0.193098684f, 0.194661584f, 0.196234056f, + 0.197816111f, 0.199407757f, 0.201009f, 0.202619846f, + 0.204240302f, 0.205870372f, 0.207510059f, 0.209159365f, + 0.210818293f, 0.212486844f, 0.214165017f, 0.215852811f, + 0.217550224f, 0.219257252f, 0.220973892f, 0.222700139f, + 0.224435986f, 0.226181426f, 0.227936451f, 0.229701051f, + 0.231475217f, 0.233258936f, 0.235052196f, 0.236854984f, + 0.238667285f, 0.240489083f, 0.242320361f, 0.244161101f, + 0.246011284f, 0.247870889f, 0.249739894f, 0.251618278f, + 0.253506017f, 0.255403084f, 0.257309455f, 0.259225101f, + 0.261149994f, 0.263084104f, 0.265027401f, 0.266979851f, + 0.268941421f, 0.270912078f, 0.272891784f, 0.274880502f, + 0.276878195f, 0.278884822f, 0.280900343f, 0.282924715f, + 0.284957894f, 0.286999837f, 0.289050497f, 0.291109827f, + 0.293177779f, 0.295254302f, 0.297339346f, 0.299432858f, + 0.301534784f, 0.30364507f, 0.30576366f, 0.307890496f, + 0.310025519f, 0.312168669f, 0.314319886f, 0.316479106f, + 0.318646266f, 0.320821301f, 0.323004144f, 0.325194727f, + 0.327392983f, 0.32959884f, 0.331812228f, 0.334033073f, + 0.336261303f, 0.338496841f, 0.340739612f, 0.342989537f, + 0.345246539f, 0.347510538f, 0.349781451f, 0.352059198f, + 0.354343694f, 0.356634854f, 0.358932594f, 0.361236825f, + 0.36354746f, 0.365864409f, 0.368187582f, 0.370516888f, + 0.372852234f, 0.375193526f, 0.377540669f, 0.379893568f, + 0.382252125f, 0.384616244f, 0.386985824f, 0.389360766f, + 0.391740969f, 0.394126332f, 0.39651675f, 0.398912121f, + 0.40131234f, 0.403717301f, 0.406126897f, 0.408541022f, + 0.410959566f, 0.413382421f, 0.415809477f, 0.418240623f, + 0.420675748f, 0.423114739f, 0.425557483f, 0.428003867f, + 0.430453776f, 0.432907095f, 0.435363708f, 0.437823499f, + 0.440286351f, 0.442752145f, 0.445220765f, 0.44769209f, + 0.450166003f, 0.452642382f, 0.455121108f, 0.457602059f, + 0.460085115f, 0.462570155f, 0.465057055f, 0.467545694f, + 0.470035948f, 0.472527696f, 0.475020813f, 0.477515175f, + 0.48001066f, 0.482507142f, 0.485004498f, 0.487502604f, + 0.490001333f, 0.492500562f, 0.495000167f, 0.497500021f, + 0.5f, 0.502499979f, 0.504999833f, 0.507499438f, + 0.509998667f, 0.512497396f, 0.514995502f, 0.517492858f, + 0.51998934f, 0.522484825f, 0.524979187f, 0.527472304f, + 0.529964052f, 0.532454306f, 0.534942945f, 0.537429845f, + 0.539914885f, 0.542397941f, 0.544878892f, 0.547357618f, + 0.549833997f, 0.55230791f, 0.554779235f, 0.557247855f, + 0.559713649f, 0.562176501f, 0.564636292f, 0.567092905f, + 0.569546224f, 0.571996133f, 0.574442517f, 0.576885261f, + 0.579324252f, 0.581759377f, 0.584190523f, 0.586617579f, + 0.589040434f, 0.591458978f, 0.593873103f, 0.596282699f, + 0.59868766f, 0.601087879f, 0.60348325f, 0.605873668f, + 0.608259031f, 0.610639234f, 0.613014176f, 0.615383756f, + 0.617747875f, 0.620106432f, 0.622459331f, 0.624806474f, + 0.627147766f, 0.629483112f, 0.631812418f, 0.634135591f, + 0.63645254f, 0.638763175f, 0.641067406f, 0.643365146f, + 0.645656306f, 0.647940802f, 0.650218549f, 0.652489462f, + 0.654753461f, 0.657010463f, 0.659260388f, 0.661503159f, + 0.663738697f, 0.665966927f, 0.668187772f, 0.67040116f, + 0.672607017f, 0.674805273f, 0.676995856f, 0.679178699f, + 0.681353734f, 0.683520894f, 0.685680114f, 0.687831331f, + 0.689974481f, 0.692109504f, 0.69423634f, 0.69635493f, + 0.698465216f, 0.700567142f, 0.702660654f, 0.704745698f, + 0.706822221f, 0.708890173f, 0.710949503f, 0.713000163f, + 0.715042106f, 0.717075285f, 0.719099657f, 0.721115178f, + 0.723121805f, 0.725119498f, 0.727108216f, 0.729087922f, + 0.731058579f, 0.733020149f, 0.734972599f, 0.736915896f, + 0.738850006f, 0.740774899f, 0.742690545f, 0.744596916f, + 0.746493983f, 0.748381722f, 0.750260106f, 0.752129111f, + 0.753988716f, 0.755838899f, 0.757679639f, 0.759510917f, + 0.761332715f, 0.763145016f, 0.764947804f, 0.766741064f, + 0.768524783f, 0.770298949f, 0.772063549f, 0.773818574f, + 0.775564014f, 0.777299861f, 0.779026108f, 0.780742748f, + 0.782449776f, 0.784147189f, 0.785834983f, 0.787513156f, + 0.789181707f, 0.790840635f, 0.792489941f, 0.794129628f, + 0.795759698f, 0.797380154f, 0.798991f, 0.800592243f, + 0.802183889f, 0.803765944f, 0.805338416f, 0.806901316f, + 0.808454651f, 0.809998434f, 0.811532675f, 0.813057386f, + 0.814572581f, 0.816078273f, 0.817574476f, 0.819061207f, + 0.820538481f, 0.822006314f, 0.823464725f, 0.824913732f, + 0.826353353f, 0.827783608f, 0.829204518f, 0.830616103f, + 0.832018385f, 0.833411386f, 0.83479513f, 0.836169639f, + 0.837534937f, 0.83889105f, 0.840238003f, 0.841575821f, + 0.842904531f, 0.84422416f, 0.845534735f, 0.846836284f, + 0.848128836f, 0.84941242f, 0.850687065f, 0.851952802f, + 0.85320966f, 0.854457671f, 0.855696866f, 0.856927277f, + 0.858148935f, 0.859361874f, 0.860566127f, 0.861761727f, + 0.862948707f, 0.864127103f, 0.865296948f, 0.866458277f, + 0.867611126f, 0.868755531f, 0.869891526f, 0.871019148f, + 0.872138434f, 0.87324942f, 0.874352143f, 0.875446642f, + 0.876532952f, 0.877611113f, 0.878681162f, 0.879743138f, + 0.880797078f, 0.881843022f, 0.882881009f, 0.883911078f, + 0.884933268f, 0.885947619f, 0.88695417f, 0.887952961f, + 0.888944033f, 0.889927426f, 0.890903179f, 0.891871333f, + 0.89283193f, 0.893785008f, 0.89473061f, 0.895668777f, + 0.896599549f, 0.897522967f, 0.898439072f, 0.899347906f, + 0.900249511f, 0.901143927f, 0.902031196f, 0.902911359f, + 0.903784458f, 0.904650535f, 0.905509631f, 0.906361788f, + 0.907207047f, 0.90804545f, 0.908877039f, 0.909701855f, + 0.910519941f, 0.911331337f, 0.912136085f, 0.912934228f, + 0.913725806f, 0.914510861f, 0.915289434f, 0.916061568f, + 0.916827304f, 0.917586682f, 0.918339745f, 0.919086533f, + 0.919827088f, 0.920561451f, 0.921289663f, 0.922011765f, + 0.922727798f, 0.923437803f, 0.92414182f, 0.924839891f, + 0.925532055f, 0.926218353f, 0.926898827f, 0.927573515f, + 0.928242458f, 0.928905696f, 0.929563269f, 0.930215217f, + 0.93086158f, 0.931502396f, 0.932137706f, 0.932767549f, + 0.933391964f, 0.934010991f, 0.934624667f, 0.935233031f, + 0.935836124f, 0.936433982f, 0.937026644f, 0.937614149f, + 0.938196534f, 0.938773837f, 0.939346097f, 0.93991335f, + 0.940475634f, 0.941032987f, 0.941585444f, 0.942133045f, + 0.942675824f, 0.943213819f, 0.943747066f, 0.944275602f, + 0.944799462f, 0.945318683f, 0.9458333f, 0.946343348f, + 0.946848864f, 0.947349882f, 0.947846437f, 0.948338565f, + 0.948826299f, 0.949309675f, 0.949788727f, 0.950263488f, + 0.950733994f, 0.951200277f, 0.951662371f, 0.95212031f, + 0.952574127f, 0.953023854f, 0.953469525f, 0.953911173f, + 0.954348829f, 0.954782527f, 0.955212297f, 0.955638172f, + 0.956060185f, 0.956478365f, 0.956892745f, 0.957303356f, + 0.957710228f, 0.958113393f, 0.958512881f, 0.958908722f, + 0.959300946f, 0.959689585f, 0.960074666f, 0.96045622f, + 0.960834277f, 0.961208866f, 0.961580014f, 0.961947753f, + 0.962312109f, 0.962673113f, 0.963030791f, 0.963385172f, + 0.963736284f, 0.964084154f, 0.964428811f, 0.964770281f, + 0.965108591f, 0.96544377f, 0.965775842f, 0.966104836f, + 0.966430777f, 0.966753691f, 0.967073605f, 0.967390545f, + 0.967704535f, 0.968015603f, 0.968323772f, 0.968629068f, + 0.968931516f, 0.969231141f, 0.969527967f, 0.969822019f, + 0.97011332f, 0.970401896f, 0.970687769f, 0.970970964f, + 0.971251504f, 0.971529412f, 0.971804712f, 0.972077426f, + 0.972347578f, 0.972615189f, 0.972880283f, 0.973142881f, + 0.973403006f, 0.97366068f, 0.973915925f, 0.974168761f, + 0.974419212f, 0.974667297f, 0.974913038f, 0.975156456f, + 0.975397572f, 0.975636406f, 0.975872979f, 0.976107311f, + 0.976339422f, 0.976569332f, 0.976797062f, 0.97702263f, + 0.977246057f, 0.977467361f, 0.977686561f, 0.977903678f, + 0.978118729f, 0.978331734f, 0.97854271f, 0.978751677f, + 0.978958653f, 0.979163655f, 0.979366703f, 0.979567813f, + 0.979767003f, 0.979964291f, 0.980159694f, 0.98035323f, + 0.980544915f, 0.980734767f, 0.980922803f, 0.981109038f, + 0.98129349f, 0.981476175f, 0.981657109f, 0.981836309f, + 0.98201379f, 0.982189568f, 0.98236366f, 0.98253608f, + 0.982706843f, 0.982875967f, 0.983043464f, 0.983209352f, + 0.983373644f, 0.983536355f, 0.983697501f, 0.983857095f, + 0.984015152f, 0.984171686f, 0.984326712f, 0.984480243f, + 0.984632294f, 0.984782879f, 0.98493201f, 0.985079702f, + 0.985225968f, 0.985370822f, 0.985514276f, 0.985656344f, + 0.985797039f, 0.985936373f, 0.98607436f, 0.986211011f, + 0.986346341f, 0.98648036f, 0.986613082f, 0.986744519f, + 0.986874682f, 0.987003583f, 0.987131236f, 0.987257651f, + 0.987382839f, 0.987506814f, 0.987629585f, 0.987751165f, + 0.987871565f, 0.987990796f, 0.988108868f, 0.988225794f, + 0.988341583f, 0.988456248f, 0.988569797f, 0.988682242f, + 0.988793594f, 0.988903862f, 0.989013057f, 0.98912119f, + 0.98922827f, 0.989334307f, 0.989439312f, 0.989543294f, + 0.989646262f, 0.989748228f, 0.989849199f, 0.989949186f, + 0.990048198f, 0.990146244f, 0.990243334f, 0.990339477f, + 0.990434681f, 0.990528956f, 0.990622311f, 0.990714754f, + 0.990806295f, 0.990896941f, 0.990986701f, 0.991075585f, + 0.9911636f, 0.991250754f, 0.991337056f, 0.991422515f, + 0.991507137f, 0.991590932f, 0.991673907f, 0.99175607f, + 0.991837429f, 0.991917991f, 0.991997765f, 0.992076758f, + 0.992154977f, 0.99223243f, 0.992309124f, 0.992385067f, + 0.992460265f, 0.992534727f, 0.992608459f, 0.992681467f, + 0.99275376f, 0.992825344f, 0.992896226f, 0.992966413f, + 0.993035911f, 0.993104727f, 0.993172868f, 0.993240339f, + 0.993307149f, 0.993373303f, 0.993438807f, 0.993503668f, + 0.993567892f, 0.993631484f, 0.993694453f, 0.993756802f, + 0.993818539f, 0.993879669f, 0.993940199f, 0.994000133f, + 0.994059478f, 0.994118239f, 0.994176423f, 0.994234034f, + 0.994291079f, 0.994347563f, 0.994403491f, 0.994458868f, + 0.994513701f, 0.994567994f, 0.994621753f, 0.994674982f, + 0.994727688f, 0.994779874f, 0.994831547f, 0.994882711f, + 0.994933371f, 0.994983532f, 0.995033198f, 0.995082376f, + 0.995131069f, 0.995179282f, 0.99522702f, 0.995274287f, + 0.995321089f, 0.995367429f, 0.995413313f, 0.995458744f, + 0.995503727f, 0.995548266f, 0.995592367f, 0.995636032f, + 0.995679266f, 0.995722075f, 0.99576446f, 0.995806428f, + 0.995847981f, 0.995889125f, 0.995929862f, 0.995970198f, + 0.996010135f, 0.996049678f, 0.99608883f, 0.996127597f, + 0.99616598f, 0.996203984f, 0.996241613f, 0.996278871f, + 0.99631576f, 0.996352285f, 0.996388449f, 0.996424256f, + 0.99645971f, 0.996494813f, 0.996529569f, 0.996563982f, + 0.996598054f, 0.99663179f, 0.996665193f, 0.996698265f, + 0.99673101f, 0.996763432f, 0.996795533f, 0.996827317f, + 0.996858787f, 0.996889945f, 0.996920795f, 0.996951341f, + 0.996981584f, 0.997011528f, 0.997041175f, 0.99707053f, + 0.997099594f, 0.997128371f, 0.997156863f, 0.997185073f, + 0.997213004f, 0.997240658f, 0.997268039f, 0.997295149f, + 0.997321991f, 0.997348567f, 0.99737488f, 0.997400932f, + 0.997426727f, 0.997452266f, 0.997477553f, 0.997502589f, + 0.997527377f, 0.99755192f, 0.997576219f, 0.997600279f, + 0.997624099f, 0.997647684f, 0.997671036f, 0.997694156f, + 0.997717047f, 0.997739712f, 0.997762151f, 0.997784369f, + 0.997806367f, 0.997828146f, 0.99784971f, 0.99787106f, + 0.997892199f, 0.997913128f, 0.99793385f, 0.997954366f, + 0.99797468f, 0.997994791f, 0.998014704f, 0.998034419f, + 0.998053939f, 0.998073265f, 0.9980924f, 0.998111345f, + 0.998130102f, 0.998148674f, 0.998167061f, 0.998185266f, + 0.99820329f, 0.998221136f, 0.998238805f, 0.998256299f, + 0.998273619f, 0.998290767f, 0.998307746f, 0.998324556f, + 0.998341199f, 0.998357677f, 0.998373992f, 0.998390145f, + 0.998406138f, 0.998421972f, 0.998437649f, 0.998453171f, + 0.998468538f, 0.998483753f, 0.998498818f, 0.998513733f, + 0.998528499f, 0.99854312f, 0.998557595f, 0.998571927f, + 0.998586116f, 0.998600165f, 0.998614074f, 0.998627845f, + 0.99864148f, 0.998654979f, 0.998668345f, 0.998681577f, + 0.998694679f, 0.99870765f, 0.998720493f, 0.998733208f, + 0.998745797f, 0.998758261f, 0.998770601f, 0.998782819f, + 0.998794916f, 0.998806892f, 0.99881875f, 0.99883049f, + 0.998842113f, 0.998853621f, 0.998865015f, 0.998876295f, + 0.998887464f, 0.998898522f, 0.99890947f, 0.998920309f, + 0.99893104f, 0.998941666f, 0.998952185f, 0.9989626f, + 0.998972912f, 0.998983121f, 0.998993229f, 0.999003237f, + 0.999013145f, 0.999022955f, 0.999032667f, 0.999042283f, + 0.999051803f, 0.999061229f, 0.999070561f, 0.999079801f, + 0.999088949f, 0.999098006f, 0.999106973f, 0.999115851f, + 0.99912464f, 0.999133343f, 0.999141959f, 0.999150489f, + 0.999158935f, 0.999167297f, 0.999175575f, 0.999183772f, + 0.999191887f, 0.999199921f, 0.999207876f, 0.999215751f, + 0.999223549f, 0.999231269f, 0.999238912f, 0.999246479f, + 0.999253971f, 0.999261389f, 0.999268733f, 0.999276004f, + 0.999283202f, 0.99929033f, 0.999297386f, 0.999304372f, + 0.999311289f, 0.999318137f, 0.999324917f, 0.99933163f, + 0.999338276f, 0.999344856f, 0.99935137f, 0.99935782f, + 0.999364206f, 0.999370528f, 0.999376788f, 0.999382985f, + 0.999389121f, 0.999395195f, 0.99940121f, 0.999407164f, + 0.99941306f, 0.999418896f, 0.999424675f, 0.999430396f, + 0.999436061f, 0.999441669f, 0.999447221f, 0.999452719f, + 0.999458161f, 0.99946355f, 0.999468885f, 0.999474167f, + 0.999479396f, 0.999484573f, 0.999489699f, 0.999494774f, + 0.999499799f, 0.999504774f, 0.999509699f, 0.999514575f, + 0.999519403f, 0.999524182f, 0.999528915f, 0.9995336f, + 0.999538238f, 0.999542831f, 0.999547378f, 0.999551879f, + 0.999556336f, 0.999560749f, 0.999565118f, 0.999569443f, + 0.999573725f, 0.999577965f, 0.999582162f, 0.999586318f, + 0.999590433f, 0.999594506f, 0.99959854f, 0.999602533f, + 0.999606486f, 0.9996104f, 0.999614275f, 0.999618112f, + 0.99962191f, 0.999625671f, 0.999629394f, 0.99963308f, + 0.99963673f, 0.999640343f, 0.99964392f, 0.999647462f, + 0.999650969f, 0.99965444f, 0.999657878f, 0.999661281f, + 0.99966465f, 0.999667986f, 0.999671288f, 0.999674558f, + 0.999677795f, 0.999681f, 0.999684173f, 0.999687315f, + 0.999690425f, 0.999693504f, 0.999696553f, 0.999699571f, + 0.99970256f, 0.999705519f, 0.999708448f, 0.999711348f, + 0.999714219f, 0.999717062f, 0.999719877f, 0.999722663f, + 0.999725422f, 0.999728153f, 0.999730857f, 0.999733535f, + 0.999736185f, 0.99973881f, 0.999741408f, 0.99974398f, + 0.999746527f, 0.999749049f, 0.999751545f, 0.999754016f, + 0.999756463f, 0.999758886f, 0.999761285f, 0.999763659f, + 0.99976601f, 0.999768338f, 0.999770643f, 0.999772924f, + 0.999775183f, 0.99977742f, 0.999779634f, 0.999781826f, + 0.999783997f, 0.999786145f, 0.999788273f, 0.999790379f, + 0.999792464f, 0.999794529f, 0.999796573f, 0.999798597f, + 0.9998006f, 0.999802584f, 0.999804548f, 0.999806492f, + 0.999808417f, 0.999810323f, 0.99981221f, 0.999814078f, + 0.999815928f, 0.999817759f, 0.999819572f, 0.999821367f, + 0.999823144f, 0.999824904f, 0.999826646f, 0.99982837f, + 0.999830078f, 0.999831768f, 0.999833442f, 0.999835099f, + 0.999836739f, 0.999838364f, 0.999839972f, 0.999841564f, + 0.99984314f, 0.999844701f, 0.999846246f, 0.999847775f, + 0.99984929f, 0.999850789f, 0.999852273f, 0.999853743f, + 0.999855198f, 0.999856639f, 0.999858065f, 0.999859477f, + 0.999860875f, 0.999862259f, 0.99986363f, 0.999864986f, + 0.99986633f, 0.999867659f, 0.999868976f, 0.99987028f, + 0.99987157f, 0.999872848f, 0.999874113f, 0.999875365f, + 0.999876605f, 0.999877833f, 0.999879049f, 0.999880252f, + 0.999881443f, 0.999882623f, 0.999883791f, 0.999884947f, + 0.999886091f, 0.999887225f, 0.999888347f, 0.999889458f, + 0.999890557f, 0.999891646f, 0.999892724f, 0.999893791f, + 0.999894848f, 0.999895894f, 0.99989693f, 0.999897956f, + 0.999898971f, 0.999899976f, 0.999900971f, 0.999901956f, + 0.999902932f, 0.999903898f, 0.999904854f, 0.9999058f, + 0.999906738f, 0.999907665f, 0.999908584f, 0.999909494f, + 0.999910394f, 0.999911286f, 0.999912168f, 0.999913042f, + 0.999913907f, 0.999914764f, 0.999915612f, 0.999916452f, + 0.999917283f, 0.999918106f, 0.999918921f, 0.999919727f, + 0.999920526f, 0.999921317f, 0.999922099f, 0.999922875f, + 0.999923642f, 0.999924402f, 0.999925154f, 0.999925898f, + 0.999926636f, 0.999927366f, 0.999928088f, 0.999928804f, + 0.999929512f, 0.999930213f, 0.999930908f, 0.999931595f, + 0.999932276f, 0.99993295f, 0.999933617f, 0.999934277f, + 0.999934931f, 0.999935579f, 0.99993622f, 0.999936854f, + 0.999937482f, 0.999938104f, 0.99993872f, 0.99993933f, + 0.999939934f, 0.999940531f, 0.999941123f, 0.999941709f, + 0.999942289f, 0.999942863f, 0.999943431f, 0.999943994f, + 0.999944551f, 0.999945103f, 0.999945649f, 0.99994619f, + 0.999946726f, 0.999947256f, 0.99994778f, 0.9999483f, + 0.999948814f, 0.999949324f, 0.999949828f, 0.999950327f, + 0.999950821f, 0.999951311f, 0.999951795f, 0.999952275f, + 0.999952749f, 0.99995322f, 0.999953685f, 0.999954146f, + 0.999954602f +}; +} // namespace tesseract diff --git a/po/tesseract.pot b/po/tesseract.pot index deee172806..6f11edfcb8 100644 --- a/po/tesseract.pot +++ b/po/tesseract.pot @@ -5,9 +5,9 @@ #, fuzzy msgid "" msgstr "" -"Project-Id-Version: tesseract 3.00\n" +"Project-Id-Version: tesseract 3.01\n" "Report-Msgid-Bugs-To: joregan@gmail.com\n" -"POT-Creation-Date: 2010-07-19 03:05+0100\n" +"POT-Creation-Date: 2010-09-23 18:46-0700\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -15,69 +15,51 @@ msgstr "" "Content-Type: text/plain; charset=CHARSET\n" "Content-Transfer-Encoding: 8bit\n" -#: api/tesseractmain.cpp:148 +#: api/tesseractmain.cpp:127 #, c-format msgid "Error: Must have a unlv zone file %s to read!\n" msgstr "" -#: api/tesseractmain.cpp:187 +#: api/tesseractmain.cpp:169 #, c-format msgid "Error: -l must be arg3, not %d\n" msgstr "" -#: api/tesseractmain.cpp:221 -msgid "Tesseract Open Source OCR Engine" +#: api/tesseractmain.cpp:202 +msgid "Tesseract Open Source OCR Engine with Leptonica\n" msgstr "" -#: api/tesseractmain.cpp:223 -msgid " with Leptonica\n" -msgstr "" - -#: api/tesseractmain.cpp:225 -msgid " with LibTiff\n" -msgstr "" - -#: api/tesseractmain.cpp:237 +#: api/tesseractmain.cpp:213 #, c-format msgid "Image file %s cannot be opened!\n" msgstr "" -#: api/tesseractmain.cpp:248 +#: api/tesseractmain.cpp:222 #, c-format msgid "Error reading file %s!\n" msgstr "" -#: api/tesseractmain.cpp:261 api/tesseractmain.cpp:325 +#: api/tesseractmain.cpp:233 #, c-format msgid "Page %d\n" msgstr "" -#: api/tesseractmain.cpp:280 +#: api/tesseractmain.cpp:252 #, c-format msgid "File %s cannot be opened!\n" msgstr "" -#: api/tesseractmain.cpp:289 +#: api/tesseractmain.cpp:261 #, c-format msgid "Image file %s cannot be read!\n" msgstr "" -#: api/tesseractmain.cpp:293 +#: api/tesseractmain.cpp:265 #, c-format msgid "Page %d : %s\n" msgstr "" -#: api/tesseractmain.cpp:321 api/tesseractmain.cpp:349 -#, c-format -msgid "Read of file %s failed.\n" -msgstr "" - -#: api/tesseractmain.cpp:353 -#, c-format -msgid "Read of image %s" -msgstr "" - -#: api/tesseractmain.cpp:370 +#: api/tesseractmain.cpp:285 #, c-format msgid "Cannot create output file %s\n" msgstr "" diff --git a/tessdata/Makefile.am b/tessdata/Makefile.am index 7b1ae202fe..f15a3190a6 100644 --- a/tessdata/Makefile.am +++ b/tessdata/Makefile.am @@ -1,4 +1,14 @@ -#datadir = @datadir@/tessdata +datadir = @datadir@/tessdata +data_DATA = eng.cube.bigrams \ + eng.cube.lm \ + eng.cube.params \ + eng.cube.word-freq \ + eng.traineddata \ + eng.cube.fold \ + eng.cube.nn \ + eng.cube.size \ + eng.tesseract_cube.nn + #data_DATA = bul.traineddata \ # cat.traineddata \ # ces.traineddata \ diff --git a/tessdata/Makefile.in b/tessdata/Makefile.in index 23fb0c149d..22ad213a46 100644 --- a/tessdata/Makefile.in +++ b/tessdata/Makefile.in @@ -15,7 +15,18 @@ @SET_MAKE@ -#datadir = @datadir@/tessdata +datadir = @datadir@/tessdata + +data_DATA = eng.cube.bigrams \ + eng.cube.lm \ + eng.cube.params \ + eng.cube.word-freq \ + eng.traineddata \ + eng.cube.fold \ + eng.cube.nn \ + eng.cube.size \ + eng.tesseract_cube.nn + #data_DATA = bul.traineddata \ # cat.traineddata \ # ces.traineddata \ diff --git a/tessdata/configs/Makefile.am b/tessdata/configs/Makefile.am index 08b0e5f980..c3477dbf7c 100644 --- a/tessdata/configs/Makefile.am +++ b/tessdata/configs/Makefile.am @@ -1,3 +1,3 @@ datadir = @datadir@/tessdata/configs -data_DATA = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr logfile digits hocr -EXTRA_DIST = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr logfile digits hocr +data_DATA = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr logfile digits hocr linebox rebox strokewidth +EXTRA_DIST = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr logfile digits hocr linebox rebox strokewidth diff --git a/tessdata/configs/Makefile.in b/tessdata/configs/Makefile.in index e232bf05d1..c88d964cc4 100644 --- a/tessdata/configs/Makefile.in +++ b/tessdata/configs/Makefile.in @@ -104,14 +104,21 @@ GENERIC_LIBRARY_VERSION = @GENERIC_LIBRARY_VERSION@ GENERIC_MAJOR_VERSION = @GENERIC_MAJOR_VERSION@ GENERIC_RELEASE = @GENERIC_RELEASE@ GENERIC_VERSION = @GENERIC_VERSION@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ GREP = @GREP@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ LD = @LD@ LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTIFF_CFLAGS = @LIBTIFF_CFLAGS@ @@ -119,10 +126,15 @@ LIBTIFF_LIBS = @LIBTIFF_LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ LTLIBOBJS = @LTLIBOBJS@ MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ @@ -139,12 +151,17 @@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PACKAGE_YEAR = @PACKAGE_YEAR@ PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ +USE_NLS = @USE_NLS@ VERSION = @VERSION@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ @@ -198,8 +215,8 @@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -data_DATA = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr logfile digits hocr -EXTRA_DIST = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr logfile digits hocr +data_DATA = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr logfile digits hocr linebox rebox strokewidth +EXTRA_DIST = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr logfile digits hocr linebox rebox strokewidth all: all-am .SUFFIXES: diff --git a/tessdata/configs/ambigs.train b/tessdata/configs/ambigs.train index 3cdacf371f..aa0ba0a85f 100644 --- a/tessdata/configs/ambigs.train +++ b/tessdata/configs/ambigs.train @@ -1,2 +1,6 @@ -global_tessedit_ambigs_training 1 -global_ambigs_debug_level 3 +tessedit_ambigs_training 1 +load_freq_dawg 0 +load_punc_dawg 0 +load_system_dawg 0 +load_number_dawg 0 +ambigs_debug_level 3 diff --git a/tessdata/configs/box.train b/tessdata/configs/box.train index cf5699b112..50c19a6c46 100644 --- a/tessdata/configs/box.train +++ b/tessdata/configs/box.train @@ -1,5 +1,4 @@ file_type .bl -tessedit_use_nn F textord_fast_pitch_test T tessedit_single_match 0 tessedit_zero_rejection T @@ -11,6 +10,4 @@ edges_childarea 0.65 edges_boxarea 0.9 tessedit_resegment_from_boxes T tessedit_train_from_boxes T -textord_repeat_extraction F textord_no_rejects T -debug_file tesseract.log diff --git a/tessdata/configs/inter b/tessdata/configs/inter index 7523fa3ea1..d4e0f2d946 100755 --- a/tessdata/configs/inter +++ b/tessdata/configs/inter @@ -1,4 +1,3 @@ interactive_mode T -edit_variables T tessedit_draw_words T tessedit_draw_outwords T diff --git a/tessdata/configs/linebox b/tessdata/configs/linebox new file mode 100644 index 0000000000..bd9c114df6 --- /dev/null +++ b/tessdata/configs/linebox @@ -0,0 +1,2 @@ +tessedit_resegment_from_line_boxes 1 +tessedit_make_boxes_from_boxes 1 diff --git a/tessdata/configs/rebox b/tessdata/configs/rebox new file mode 100644 index 0000000000..f8342b4c2c --- /dev/null +++ b/tessdata/configs/rebox @@ -0,0 +1,2 @@ +tessedit_resegment_from_boxes 1 +tessedit_make_boxes_from_boxes 1 diff --git a/tessdata/configs/strokewidth b/tessdata/configs/strokewidth new file mode 100644 index 0000000000..e95b59263d --- /dev/null +++ b/tessdata/configs/strokewidth @@ -0,0 +1,12 @@ +textord_show_blobs 0 +textord_debug_tabfind 3 +textord_tabfind_show_partitions 1 +textord_tabfind_show_initial_partitions 1 +textord_tabfind_show_columns 1 +textord_tabfind_show_blocks 1 +textord_tabfind_show_initialtabs 1 +textord_tabfind_show_finaltabs 1 +textord_tabfind_show_strokewidths 1 +textord_tabfind_show_vlines 0 +textord_tabfind_show_images 1 +tessedit_dump_pageseg_images 0 diff --git a/tessdata/tessconfigs/Makefile.in b/tessdata/tessconfigs/Makefile.in index e89f1f251e..9d6a1e91a4 100644 --- a/tessdata/tessconfigs/Makefile.in +++ b/tessdata/tessconfigs/Makefile.in @@ -104,14 +104,21 @@ GENERIC_LIBRARY_VERSION = @GENERIC_LIBRARY_VERSION@ GENERIC_MAJOR_VERSION = @GENERIC_MAJOR_VERSION@ GENERIC_RELEASE = @GENERIC_RELEASE@ GENERIC_VERSION = @GENERIC_VERSION@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ GREP = @GREP@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ LD = @LD@ LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTIFF_CFLAGS = @LIBTIFF_CFLAGS@ @@ -119,10 +126,15 @@ LIBTIFF_LIBS = @LIBTIFF_LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ LTLIBOBJS = @LTLIBOBJS@ MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ @@ -139,12 +151,17 @@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PACKAGE_YEAR = @PACKAGE_YEAR@ PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ +USE_NLS = @USE_NLS@ VERSION = @VERSION@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/tessdata/tessconfigs/msdemo b/tessdata/tessconfigs/msdemo index 9683d58323..a1af21fe61 100644 --- a/tessdata/tessconfigs/msdemo +++ b/tessdata/tessconfigs/msdemo @@ -10,4 +10,4 @@ wordrec_display_splits 0 wordrec_display_all_words 1 wordrec_display_all_blobs 1 wordrec_display_segmentations 2 -tord_display_ratings 1 +classify_debug_level 1 diff --git a/tessdata/tessconfigs/segdemo b/tessdata/tessconfigs/segdemo index fb66358813..d7d90ae690 100755 --- a/tessdata/tessconfigs/segdemo +++ b/tessdata/tessconfigs/segdemo @@ -6,4 +6,5 @@ wordrec_display_splits 0 wordrec_display_all_words 1 wordrec_display_all_blobs 1 wordrec_display_segmentations 2 -tord_display_ratings 1 +classify_debug_level 1 +stopper_debug_level 1 diff --git a/textord/Makefile.am b/textord/Makefile.am index a3bc2e9e6e..54a1e8d055 100644 --- a/textord/Makefile.am +++ b/textord/Makefile.am @@ -5,26 +5,28 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/ccmain -I$(top_srcdir)/wordrec -I$(top_srcdir)/api \ -I$(top_srcdir)/cutil -I$(top_srcdir)/classify -I$(top_srcdir)/dict -EXTRA_DIST = textord.vcproj - include_HEADERS = \ - alignedblob.h bbgrid.h blkocc.h blobcmpl.h \ + alignedblob.h bbgrid.h blkocc.h \ colfind.h colpartition.h colpartitionset.h \ + colpartitiongrid.h \ drawedg.h drawtord.h edgblob.h edgloop.h \ fpchop.h gap_map.h imagefind.h linefind.h makerow.h oldbasel.h \ pithsync.h pitsync1.h scanedg.h sortflts.h strokewidth.h \ - tabfind.h tablefind.h tabvector.h tessout.h \ - topitch.h tordmain.h tospace.h tovars.h \ + tabfind.h tablefind.h tabvector.h \ + tablerecog.h textord.h \ + topitch.h tordmain.h tovars.h \ underlin.h wordseg.h workingpartset.h lib_LTLIBRARIES = libtesseract_textord.la libtesseract_textord_la_SOURCES = \ alignedblob.cpp bbgrid.cpp blkocc.cpp \ colfind.cpp colpartition.cpp colpartitionset.cpp \ + colpartitiongrid.cpp \ drawedg.cpp drawtord.cpp edgblob.cpp edgloop.cpp \ fpchop.cpp gap_map.cpp imagefind.cpp linefind.cpp makerow.cpp oldbasel.cpp \ - pagesegmain.cpp pithsync.cpp pitsync1.cpp scanedg.cpp sortflts.cpp strokewidth.cpp \ + pithsync.cpp pitsync1.cpp scanedg.cpp sortflts.cpp strokewidth.cpp \ tabfind.cpp tablefind.cpp tabvector.cpp \ + tablerecog.cpp textord.cpp \ topitch.cpp tordmain.cpp tospace.cpp tovars.cpp \ underlin.cpp wordseg.cpp workingpartset.cpp diff --git a/textord/Makefile.in b/textord/Makefile.in index 62b0c6910b..abbd7a3720 100644 --- a/textord/Makefile.in +++ b/textord/Makefile.in @@ -73,12 +73,12 @@ LTLIBRARIES = $(lib_LTLIBRARIES) libtesseract_textord_la_LIBADD = am_libtesseract_textord_la_OBJECTS = alignedblob.lo bbgrid.lo \ blkocc.lo colfind.lo colpartition.lo colpartitionset.lo \ - drawedg.lo drawtord.lo edgblob.lo edgloop.lo fpchop.lo \ - gap_map.lo imagefind.lo linefind.lo makerow.lo oldbasel.lo \ - pagesegmain.lo pithsync.lo pitsync1.lo scanedg.lo sortflts.lo \ - strokewidth.lo tabfind.lo tablefind.lo tabvector.lo topitch.lo \ - tordmain.lo tospace.lo tovars.lo underlin.lo wordseg.lo \ - workingpartset.lo + colpartitiongrid.lo drawedg.lo drawtord.lo edgblob.lo \ + edgloop.lo fpchop.lo gap_map.lo imagefind.lo linefind.lo \ + makerow.lo oldbasel.lo pithsync.lo pitsync1.lo scanedg.lo \ + sortflts.lo strokewidth.lo tabfind.lo tablefind.lo \ + tabvector.lo tablerecog.lo textord.lo topitch.lo tordmain.lo \ + tospace.lo tovars.lo underlin.lo wordseg.lo workingpartset.lo libtesseract_textord_la_OBJECTS = \ $(am_libtesseract_textord_la_OBJECTS) libtesseract_textord_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ @@ -253,7 +253,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -276,25 +275,28 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/ccmain -I$(top_srcdir)/wordrec -I$(top_srcdir)/api \ -I$(top_srcdir)/cutil -I$(top_srcdir)/classify -I$(top_srcdir)/dict -EXTRA_DIST = textord.vcproj include_HEADERS = \ - alignedblob.h bbgrid.h blkocc.h blobcmpl.h \ + alignedblob.h bbgrid.h blkocc.h \ colfind.h colpartition.h colpartitionset.h \ + colpartitiongrid.h \ drawedg.h drawtord.h edgblob.h edgloop.h \ fpchop.h gap_map.h imagefind.h linefind.h makerow.h oldbasel.h \ pithsync.h pitsync1.h scanedg.h sortflts.h strokewidth.h \ - tabfind.h tablefind.h tabvector.h tessout.h \ - topitch.h tordmain.h tospace.h tovars.h \ + tabfind.h tablefind.h tabvector.h \ + tablerecog.h textord.h \ + topitch.h tordmain.h tovars.h \ underlin.h wordseg.h workingpartset.h lib_LTLIBRARIES = libtesseract_textord.la libtesseract_textord_la_SOURCES = \ alignedblob.cpp bbgrid.cpp blkocc.cpp \ colfind.cpp colpartition.cpp colpartitionset.cpp \ + colpartitiongrid.cpp \ drawedg.cpp drawtord.cpp edgblob.cpp edgloop.cpp \ fpchop.cpp gap_map.cpp imagefind.cpp linefind.cpp makerow.cpp oldbasel.cpp \ - pagesegmain.cpp pithsync.cpp pitsync1.cpp scanedg.cpp sortflts.cpp strokewidth.cpp \ + pithsync.cpp pitsync1.cpp scanedg.cpp sortflts.cpp strokewidth.cpp \ tabfind.cpp tablefind.cpp tabvector.cpp \ + tablerecog.cpp textord.cpp \ topitch.cpp tordmain.cpp tospace.cpp tovars.cpp \ underlin.cpp wordseg.cpp workingpartset.cpp @@ -378,6 +380,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blkocc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/colfind.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/colpartition.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/colpartitiongrid.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/colpartitionset.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/drawedg.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/drawtord.Plo@am__quote@ @@ -389,7 +392,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/linefind.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/makerow.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oldbasel.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pagesegmain.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pithsync.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pitsync1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scanedg.Plo@am__quote@ @@ -397,7 +399,9 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strokewidth.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tabfind.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tablefind.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tablerecog.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tabvector.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/textord.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/topitch.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tordmain.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tospace.Plo@am__quote@ diff --git a/textord/alignedblob.cpp b/textord/alignedblob.cpp index 13c63c1604..80f7e3348d 100644 --- a/textord/alignedblob.cpp +++ b/textord/alignedblob.cpp @@ -449,6 +449,9 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, tprintf("gutter\n"); return NULL; } + if ((p.right_tab && neighbour->leader_on_right()) || + (!p.right_tab && neighbour->leader_on_left())) + continue; // Neigbours of leaders are not allowed to be used. if (n_x <= x_at_n_y + p.r_align_tolerance && n_x >= x_at_n_y - p.l_align_tolerance) { // Aligned so keep it. If it is a marked tab save it as result, diff --git a/textord/bbgrid.cpp b/textord/bbgrid.cpp index 0c988b5ba1..47c0809691 100644 --- a/textord/bbgrid.cpp +++ b/textord/bbgrid.cpp @@ -19,10 +19,141 @@ /////////////////////////////////////////////////////////////////////// #include "bbgrid.h" +#include "helpers.h" #include "ocrblock.h" namespace tesseract { +/////////////////////////////////////////////////////////////////////// +// BBGrid IMPLEMENTATION. +/////////////////////////////////////////////////////////////////////// +GridBase::GridBase() { +} + +GridBase::GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright) { + Init(gridsize, bleft, tright); +} + +GridBase::~GridBase() { +} + +// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, +// and bleft, tright are the bounding box of everything to go in it. +void GridBase::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { + gridsize_ = gridsize; + bleft_ = bleft; + tright_ = tright; + if (gridsize_ == 0) + gridsize_ = 1; + gridwidth_ = (tright.x() - bleft.x() + gridsize_ - 1) / gridsize_; + gridheight_ = (tright.y() - bleft.y() + gridsize_ - 1) / gridsize_; + gridbuckets_ = gridwidth_ * gridheight_; +} + +// Compute the given grid coordinates from image coords. +void GridBase::GridCoords(int x, int y, int* grid_x, int* grid_y) const { + *grid_x = (x - bleft_.x()) / gridsize_; + *grid_y = (y - bleft_.y()) / gridsize_; + ClipGridCoords(grid_x, grid_y); +} + +// Clip the given grid coordinates to fit within the grid. +void GridBase::ClipGridCoords(int* x, int* y) const { + *x = ClipToRange(*x, 0, gridwidth_ - 1); + *y = ClipToRange(*y, 0, gridheight_ - 1); +} + +IntGrid::IntGrid() { + grid_ = NULL; +} + +IntGrid::IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright) + : grid_(NULL) { + Init(gridsize, bleft, tright); +} + +IntGrid::~IntGrid() { + if (grid_ != NULL) + delete [] grid_; +} + +// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, +// and bleft, tright are the bounding box of everything to go in it. +void IntGrid::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { + GridBase::Init(gridsize, bleft, tright); + if (grid_ != NULL) + delete [] grid_; + grid_ = new int[gridbuckets_]; + Clear(); +} + +// Clear all the ints in the grid to zero. +void IntGrid::Clear() { + for (int i = 0; i < gridbuckets_; ++i) { + grid_[i] = 0; + } +} + +// Rotate the grid by rotation, keeping cell contents. +// rotation must be a multiple of 90 degrees. +// NOTE: due to partial cells, cell coverage in the rotated grid will be +// inexact. This is why there is no Rotate for the generic BBGrid. +// TODO(rays) investigate fixing this inaccuracy by moving the origin after +// rotation. +void IntGrid::Rotate(const FCOORD& rotation) { + ASSERT_HOST(rotation.x() == 0.0f || rotation.y() == 0.0f); + ICOORD old_bleft(bleft()); + ICOORD old_tright(tright()); + int old_width = gridwidth(); + int old_height = gridheight(); + TBOX box(bleft(), tright()); + box.rotate(rotation); + int* old_grid = grid_; + grid_ = NULL; + Init(gridsize(), box.botleft(), box.topright()); + // Iterate over the old grid, copying data to the rotated position in the new. + int oldi = 0; + FCOORD x_step(rotation); + x_step *= gridsize(); + for (int oldy = 0; oldy < old_height; ++oldy) { + FCOORD line_pos(old_bleft.x(), old_bleft.y() + gridsize() * oldy); + line_pos.rotate(rotation); + for (int oldx = 0; oldx < old_width; ++oldx, line_pos += x_step, ++oldi) { + int grid_x, grid_y; + GridCoords(static_cast(line_pos.x() + 0.5), + static_cast(line_pos.y() + 0.5), + &grid_x, &grid_y); + grid_[grid_y * gridwidth() + grid_x] = old_grid[oldi]; + } + } + delete [] old_grid; +} + +// Returns a new IntGrid containing values equal to the sum of all the +// neighbouring cells. The returned grid must be deleted after use. +// For ease of implementation, edge cells are double counted, to make them +// have the same range as the non-edge cells. +IntGrid* IntGrid::NeighbourhoodSum() const { + IntGrid* sumgrid = new IntGrid(gridsize(), bleft(), tright()); + for (int y = 0; y < gridheight(); ++y) { + for (int x = 0; x < gridwidth(); ++x) { + int cell_count = 0; + for (int yoffset = -1; yoffset <= 1; ++yoffset) { + for (int xoffset = -1; xoffset <= 1; ++xoffset) { + int grid_x = x + xoffset; + int grid_y = y + yoffset; + ClipGridCoords(&grid_x, &grid_y); + cell_count += GridCellValue(grid_x, grid_y); + } + } + if (GridCellValue(x, y) > 1) + sumgrid->SetGridCell(x, y, cell_count); + } + } + return sumgrid; +} + + #ifdef HAVE_LIBLEPT // Make a Pix of the correct scaled size for the TraceOutline functions. Pix* GridReducedPix(const TBOX& box, int gridsize, diff --git a/textord/bbgrid.h b/textord/bbgrid.h index e50bfde6fd..75f5f2be2a 100644 --- a/textord/bbgrid.h +++ b/textord/bbgrid.h @@ -55,6 +55,91 @@ Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, template class GridSearch; +// The GridBase class is the base class for BBGrid and IntGrid. +// It holds the geometry and scale of the grid. +class GridBase { + public: + GridBase(); + GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright); + virtual ~GridBase(); + + // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, + // and bleft, tright are the bounding box of everything to go in it. + void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright); + + // Simple accessors. + int gridsize() const { + return gridsize_; + } + int gridwidth() const { + return gridwidth_; + } + int gridheight() const { + return gridheight_; + } + const ICOORD& bleft() const { + return bleft_; + } + const ICOORD& tright() const { + return tright_; + } + // Compute the given grid coordinates from image coords. + void GridCoords(int x, int y, int* grid_x, int* grid_y) const; + + // Clip the given grid coordinates to fit within the grid. + void ClipGridCoords(int* x, int* y) const; + + protected: + // TODO(rays) Make these private and migrate to the accessors in subclasses. + int gridsize_; // Pixel size of each grid cell. + int gridwidth_; // Size of the grid in cells. + int gridheight_; + int gridbuckets_; // Total cells in grid. + ICOORD bleft_; // Pixel coords of bottom-left of grid. + ICOORD tright_; // Pixel coords of top-right of grid. + + private: +}; + +// The IntGrid maintains a single int for each cell in a grid. +class IntGrid : public GridBase { + public: + IntGrid(); + IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); + virtual ~IntGrid(); + + // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, + // and bleft, tright are the bounding box of everything to go in it. + void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright); + + // Clear all the ints in the grid to zero. + void Clear(); + + // Rotate the grid by rotation, keeping cell contents. + // rotation must be a multiple of 90 degrees. + // NOTE: due to partial cells, cell coverage in the rotated grid will be + // inexact. This is why there is no Rotate for the generic BBGrid. + void Rotate(const FCOORD& rotation); + + // Returns a new IntGrid containing values equal to the sum of all the + // neighbouring cells. The returned grid must be deleted after use. + IntGrid* NeighbourhoodSum() const; + + int GridCellValue(int grid_x, int grid_y) const { + ASSERT_HOST(grid_x >= 0 && grid_x < gridwidth()); + ASSERT_HOST(grid_y >= 0 && grid_y < gridheight()); + return grid_[grid_y * gridwidth_ + grid_x]; + } + void SetGridCell(int grid_x, int grid_y, int value) { + ASSERT_HOST(grid_x >= 0 && grid_x < gridwidth()); + ASSERT_HOST(grid_y >= 0 && grid_y < gridheight()); + grid_[grid_y * gridwidth_ + grid_x] = value; + } + + private: + int* grid_; // 2-d array of ints. +}; + // The BBGrid class holds C_LISTs of template classes BBC (bounding box class) // in a grid for fast neighbour access. // The BBC class must have a member const TBOX& bounding_box() const. @@ -69,7 +154,8 @@ template class GridSearch; // thereby making most of the ugly template notation go away. // The friend class GridSearch, with the same template arguments, is // used to search a grid efficiently in one of several search patterns. -template class BBGrid { +template class BBGrid + : public GridBase { friend class GridSearch; public: BBGrid(); @@ -86,23 +172,6 @@ template class BBGrid { // intact. void ClearGridData(void (*free_method)(BBC*)); - // Simple accessors. - int gridsize() const { - return gridsize_; - } - int gridwidth() const { - return gridwidth_; - } - int gridheight() const { - return gridheight_; - } - ICOORD bleft() const { - return bleft_; - } - ICOORD tright() const { - return tright_; - } - // Insert a bbox into the appropriate place in the grid. // If h_spread, then all cells covered horizontally by the box are // used, otherwise, just the bottom-left. Similarly for v_spread. @@ -126,11 +195,12 @@ template class BBGrid { // If a GridSearch is operating, call GridSearch::RemoveBBox() instead. void RemoveBBox(BBC* bbox); - // Compute the given grid coordinates from image coords. - void GridCoords(int x, int y, int* grid_x, int* grid_y); + // Returns true if the given rectangle has no overlapping elements. + bool RectangleEmpty(const TBOX& rect); - // Clip the given grid coordinates to fit within the grid. - void ClipGridCoords(int* x, int* y); + // Returns an IntGrid showing the number of elements in each cell. + // Returned IntGrid must be deleted after use. + IntGrid* CountCellElements(); // Make a window of an appropriate size to display things in the grid. ScrollView* MakeWindow(int x, int y, const char* window_name); @@ -147,12 +217,6 @@ template class BBGrid { virtual void HandleClick(int x, int y); protected: - int gridsize_; // Pixel size of each grid cell. - int gridwidth_; // Size of the grid in cells. - int gridheight_; - int gridbuckets_; // Total cells in grid. - ICOORD bleft_; // Pixel coords of bottom-left of grid. - ICOORD tright_; // Pixel coords of top-right of grid. BBC_CLIST* grid_; // 2-d array of CLISTS of BBC elements. private: @@ -162,7 +226,8 @@ template class BBGrid { template class GridSearch { public: GridSearch(BBGrid* grid) - : grid_(grid), previous_return_(NULL), next_return_(NULL) { + : grid_(grid), unique_mode_(false), + previous_return_(NULL), next_return_(NULL) { } // Get the grid x, y coords of the most recently returned BBC. @@ -172,9 +237,19 @@ template class GridSearch { int GridY() const { return y_; } + + // Sets the search mode to return a box only once. + // Efficiency warning: Implementation currently uses a squared-order + // search in the number of returned elements. Use only where a small + // number of elements are spread over a wide area, eg ColPartitions. + void SetUniqueMode(bool mode) { + unique_mode_ = mode; + } + // TODO(rays) Replace calls to ReturnedSeedElement with SetUniqueMode. + // It only works if the search includes the bottom-left corner. // Apart from full search, all other searches return a box several // times if the box is inserted with h_spread or v_spread. - // This method will return true for only one occurrance of each box + // This method will return true for only one occurrence of each box // that was inserted with both h_spread and v_spread as true. // It will usually return false for boxes that were not inserted with // both h_spread=true and v_spread=true @@ -195,6 +270,9 @@ template class GridSearch { // match the search conditions, since they return everything in the // covered grid cells. It is up to the caller to check for // appropriateness. + // TODO(rays) NextRectSearch only returns valid elements. Make the other + // searches test before return also and remove the tests from code + // that uses GridSearch. // Start a new full search. Will iterate all stored blobs, from the top. // If the blobs have been inserted using InsertBBox, (not InsertPixPtBBox) @@ -271,10 +349,13 @@ template class GridSearch { TBOX rect_; int x_; // The current location in grid coords, of the current search. int y_; + bool unique_mode_; BBC* previous_return_; // Previous return from Next*. BBC* next_return_; // Current value of it_.data() used for repositioning. // An iterator over the list at (x_, y_) in the grid_. BBC_C_IT it_; + // List of unique returned elements used when unique_mode_ is true. + BBC_CLIST returns_; }; // Sort function to sort a BBC by bounding_box().left(). @@ -283,7 +364,34 @@ int SortByBoxLeft(const void* void1, const void* void2) { // The void*s are actually doubly indirected, so get rid of one level. const BBC* p1 = *reinterpret_cast(void1); const BBC* p2 = *reinterpret_cast(void2); - return p1->bounding_box().left() - p2->bounding_box().left(); + int result = p1->bounding_box().left() - p2->bounding_box().left(); + if (result != 0) + return result; + result = p1->bounding_box().right() - p2->bounding_box().right(); + if (result != 0) + return result; + result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); + if (result != 0) + return result; + return p1->bounding_box().top() - p2->bounding_box().top(); +} + +// Sort function to sort a BBC by bounding_box().bottom(). +template +int SortByBoxBottom(const void* void1, const void* void2) { + // The void*s are actually doubly indirected, so get rid of one level. + const BBC* p1 = *reinterpret_cast(void1); + const BBC* p2 = *reinterpret_cast(void2); + int result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); + if (result != 0) + return result; + result = p1->bounding_box().top() - p2->bounding_box().top(); + if (result != 0) + return result; + result = p1->bounding_box().left() - p2->bounding_box().left(); + if (result != 0) + return result; + return p1->bounding_box().right() - p2->bounding_box().right(); } /////////////////////////////////////////////////////////////////////// @@ -312,16 +420,9 @@ template void BBGrid::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { - gridsize_ = gridsize; - bleft_ = bleft; - tright_ = tright; + GridBase::Init(gridsize, bleft, tright); if (grid_ != NULL) delete [] grid_; - if (gridsize_ == 0) - gridsize_ = 1; - gridwidth_ = (tright.x() - bleft.x() + gridsize_ - 1) / gridsize_; - gridheight_ = (tright.y() - bleft.y() + gridsize_ - 1) / gridsize_; - gridbuckets_ = gridwidth_ * gridheight_; grid_ = new BBC_CLIST[gridbuckets_]; } @@ -424,24 +525,29 @@ void BBGrid::RemoveBBox(BBC* bbox) { } } -// Compute the given grid coordinates from image coords. +// Returns true if the given rectangle has no overlapping elements. template -void BBGrid::GridCoords(int x, int y, - int* grid_x, int* grid_y) { - *grid_x = (x - bleft_.x()) / gridsize_; - *grid_y = (y - bleft_.y()) / gridsize_; - ClipGridCoords(grid_x, grid_y); +bool BBGrid::RectangleEmpty(const TBOX& rect) { + GridSearch rsearch(this); + rsearch.StartRectSearch(rect); + return rsearch.NextRectSearch() == NULL; } -// Clip the given grid coordinates to fit within the grid. +// Returns an IntGrid showing the number of elements in each cell. +// Returned IntGrid must be deleted after use. template -void BBGrid::ClipGridCoords(int* x, int* y) { - if (*x < 0) *x = 0; - if (*x >= gridwidth_) *x = gridwidth_ - 1; - if (*y < 0) *y = 0; - if (*y >= gridheight_) *y = gridheight_ - 1; +IntGrid* BBGrid::CountCellElements() { + IntGrid* intgrid = new IntGrid(gridsize(), bleft(), tright()); + for (int y = 0; y < gridheight(); ++y) { + for (int x = 0; x < gridwidth(); ++x) { + int cell_count = grid_[y * gridwidth() + x].length(); + intgrid->SetGridCell(x, y, cell_count); + } + } + return intgrid; } + template class TabEventHandler : public SVEventHandler { public: explicit TabEventHandler(G* grid) : grid_(grid) { @@ -472,7 +578,7 @@ ScrollView* BBGrid::MakeWindow( new TabEventHandler >(this); tab_win->AddEventHandler(handler); tab_win->Pen(ScrollView::GREY); - tab_win->Rectangle(0, 0, tright_.x(), tright_.y()); + tab_win->Rectangle(0, 0, tright_.x() - bleft_.x(), tright_.y() - bleft_.y()); #endif return tab_win; } @@ -583,28 +689,32 @@ void GridSearch::StartRadSearch(int x, int y, // maximum radius has been reached. template BBC* GridSearch::NextRadSearch() { - while (it_.cycled_list()) { - ++rad_index_; - if (rad_index_ >= radius_) { - ++rad_dir_; - rad_index_ = 0; - if (rad_dir_ >= 4) { - ++radius_; - if (radius_ > max_radius_) - return CommonEnd(); - rad_dir_ = 0; + do { + while (it_.cycled_list()) { + ++rad_index_; + if (rad_index_ >= radius_) { + ++rad_dir_; + rad_index_ = 0; + if (rad_dir_ >= 4) { + ++radius_; + if (radius_ > max_radius_) + return CommonEnd(); + rad_dir_ = 0; + } } + ICOORD offset = C_OUTLINE::chain_step(rad_dir_); + offset *= radius_ - rad_index_; + offset += C_OUTLINE::chain_step(rad_dir_ + 1) * rad_index_; + x_ = x_origin_ + offset.x(); + y_ = y_origin_ + offset.y(); + if (x_ >= 0 && x_ < grid_->gridwidth_ && + y_ >= 0 && y_ < grid_->gridheight_) + SetIterator(); } - ICOORD offset = C_OUTLINE::chain_step(rad_dir_); - offset *= radius_ - rad_index_; - offset += C_OUTLINE::chain_step(rad_dir_ + 1) * rad_index_; - x_ = x_origin_ + offset.x(); - y_ = y_origin_ + offset.y(); - if (x_ >= 0 && x_ < grid_->gridwidth_ && - y_ >= 0 && y_ < grid_->gridheight_) - SetIterator(); - } - return CommonNext(); + CommonNext(); + } while (unique_mode_ && + !returns_.add_sorted(SortByBoxLeft, true, previous_return_)); + return previous_return_; } // Start a new left or right-looking search. Will search to the side @@ -626,22 +736,26 @@ void GridSearch::StartSideSearch(int x, // according to the flag. template BBC* GridSearch::NextSideSearch(bool right_to_left) { - while (it_.cycled_list()) { - ++rad_index_; - if (rad_index_ > radius_) { - if (right_to_left) - --x_; - else - ++x_; - rad_index_ = 0; - if (x_ < 0 || x_ >= grid_->gridwidth_) - return CommonEnd(); + do { + while (it_.cycled_list()) { + ++rad_index_; + if (rad_index_ > radius_) { + if (right_to_left) + --x_; + else + ++x_; + rad_index_ = 0; + if (x_ < 0 || x_ >= grid_->gridwidth_) + return CommonEnd(); + } + y_ = y_origin_ - rad_index_; + if (y_ >= 0 && y_ < grid_->gridheight_) + SetIterator(); } - y_ = y_origin_ - rad_index_; - if (y_ >= 0 && y_ < grid_->gridheight_) - SetIterator(); - } - return CommonNext(); + CommonNext(); + } while (unique_mode_ && + !returns_.add_sorted(SortByBoxLeft, true, previous_return_)); + return previous_return_; } // Start a vertical-looking search. Will search up or down @@ -663,22 +777,26 @@ void GridSearch::StartVerticalSearch(int xmin, template BBC* GridSearch::NextVerticalSearch( bool top_to_bottom) { - while (it_.cycled_list()) { - ++rad_index_; - if (rad_index_ > radius_) { - if (top_to_bottom) - --y_; - else - ++y_; - rad_index_ = 0; - if (y_ < 0 || y_ >= grid_->gridheight_) - return CommonEnd(); + do { + while (it_.cycled_list()) { + ++rad_index_; + if (rad_index_ > radius_) { + if (top_to_bottom) + --y_; + else + ++y_; + rad_index_ = 0; + if (y_ < 0 || y_ >= grid_->gridheight_) + return CommonEnd(); + } + x_ = x_origin_ + rad_index_; + if (x_ >= 0 && x_ < grid_->gridwidth_) + SetIterator(); } - x_ = x_origin_ + rad_index_; - if (x_ >= 0 && x_ < grid_->gridwidth_) - SetIterator(); - } - return CommonNext(); + CommonNext(); + } while (unique_mode_ && + !returns_.add_sorted(SortByBoxLeft, true, previous_return_)); + return previous_return_; } // Start a rectangular search. Will search for a box that overlaps the @@ -697,17 +815,22 @@ void GridSearch::StartRectSearch(const TBOX& rect) { // Return the next bbox in the rectangular search or NULL if complete. template BBC* GridSearch::NextRectSearch() { - while (it_.cycled_list()) { - ++x_; - if (x_ > max_radius_) { - --y_; - x_ = x_origin_; - if (y_ < y_origin_) - return CommonEnd(); + do { + while (it_.cycled_list()) { + ++x_; + if (x_ > max_radius_) { + --y_; + x_ = x_origin_; + if (y_ < y_origin_) + return CommonEnd(); + } + SetIterator(); } - SetIterator(); - } - return CommonNext(); + CommonNext(); + } while (!rect_.overlap(previous_return_->bounding_box()) || + (unique_mode_ && + !returns_.add_sorted(SortByBoxLeft, true, previous_return_))); + return previous_return_; } // Remove the last returned BBC. Will not invalidate this. May invalidate @@ -745,6 +868,13 @@ void GridSearch::RepositionIterator() { // If the previous_return_ is no longer in the list, then // next_return_ serves as a backup. it_.move_to_first(); + // Special case, the first element was removed and reposition + // iterator was called. In this case, the data is fine, but the + // cycle point is not. Detect it and return. + if (!it_.empty() && it_.data() == next_return_) { + it_.mark_cycle_pt(); + return; + } for (it_.mark_cycle_pt(); !it_.cycled_list(); it_.forward()) { if (it_.data() == previous_return_ || it_.data_relative(1) == next_return_) { @@ -766,6 +896,7 @@ void GridSearch::CommonStart(int x, int y) { SetIterator(); previous_return_ = NULL; next_return_ = it_.empty() ? NULL : it_.data(); + returns_.shallow_clear(); } // Factored out helper to complete a next search. diff --git a/textord/blkocc.cpp b/textord/blkocc.cpp index 94306cb7b3..a2bba2261c 100644 --- a/textord/blkocc.cpp +++ b/textord/blkocc.cpp @@ -29,187 +29,21 @@ */ #include "mfcpch.h" -#include -#include -#include "errcode.h" -#include "drawtord.h" -#include "blkocc.h" -#include "notdll.h" +#include +#include +#include "errcode.h" +#include "drawtord.h" +#include "blkocc.h" +#include "helpers.h" +#include "notdll.h" -const ERRCODE BLOCKOCC = "BlockOcc"; - -ELISTIZE (REGION_OCC) -#define EXTERN -EXTERN BOOL_VAR (blockocc_show_result, FALSE, -"Show intermediate results"); - -/* The values given here should reflect the values of bln_x_height and - * bln_baseline_offset. These are defined as part of the word class - * definition */ - -EXTERN INT_VAR (blockocc_desc_height, 0, -"Descender height after normalisation"); -EXTERN INT_VAR (blockocc_asc_height, 255, -"Ascender height after normalisation"); - -EXTERN INT_VAR (blockocc_band_count, 4, "Number of bands used"); - -EXTERN double_VAR (textord_underline_threshold, 0.5, -"Fraction of width occupied"); +double_VAR(textord_underline_threshold, 0.5, "Fraction of width occupied"); // Forward declarations of static functions - -//project outlines -static void horizontal_cblob_projection(C_BLOB *blob, //blob to project - STATS *stats //output - ); -static void horizontal_coutline_projection( //project outlines - C_OUTLINE *outline, //outline to project - STATS *stats //output - ); -static void set_bands( //init from varibles - float baseline, //top of bottom band - float xheight //height of split band - ); - //blob to do -static void find_transitions(PBLOB *blob, REGION_OCC_LIST *region_occ_list); -static void record_region( //add region on list - inT16 band, - float new_min, - float new_max, - inT16 region_type, - REGION_OCC_LIST *region_occ_list); -static inT16 find_containing_maximal_band( //find range's band - float y1, - float y2, - BOOL8 *doubly_contained); -static void find_significant_line(POLYPT_IT it, inT16 *band); -static inT16 find_overlapping_minimal_band( //find range's band - float y1, - float y2); -static inT16 find_region_type(inT16 entry_band, - inT16 current_band, - inT16 exit_band, - float entry_x, - float exit_x); -static void find_trans_point(POLYPT_IT *pt_it, - inT16 current_band, - inT16 next_band, - FCOORD *transition_pt); -static void next_region(POLYPT_IT *start_pt_it, - inT16 start_band, - inT16 *to_band, - float *min_x, - float *max_x, - inT16 *increment, - FCOORD *exit_pt); -static inT16 find_band( // find POINT's band - float y); -static void compress_region_list( // join open regions - REGION_OCC_LIST *region_occ_list); -static void find_fbox(OUTLINE_IT *out_it, - float *min_x, - float *min_y, - float *max_x, - float *max_y); -static void maintain_limits(float *min_x, float *max_x, float x); - - -/** -A note on transitions. - -We want to record occupancy in various bands. In general we need to consider -7 situations: - -@verbatim -(1) (2) (3) (4) - \ / \ / \ / -__\_____/_____\_________/_____\_________/______ Upper Limit - \ / \ / \ / - / \ \-->--/ \--<--/ /-----\ - v ^ / \(7) - \ \ \ / - \ \ /--<--\ /-->--\ \-----/ -____\______\____/_______\____/_______\_________ Lower Limit - \ \ / \ / \ - (5) (6) -@endverbatim - -We know that following "next" pointers around an outline keeps the black area -on the LEFT. We only need be concerned with situations 1,2,3,5 and 7. -4 and 6 can be ignored as they represent small incursions into a large black -region which will be recorded elsewhere. Situations 3 and 5 define encloseed -areas bounded by the upper and lower limits respectively. Situation 1 is open -to the right, awaiting a closure by a situation 2 which is open to the right. -Situation 7 is entirely enclosed within the band. - -The situations are refered to as region types and are determined by -find_region_type. - -An empty region type is used to denote entry to an adjacent band and return -to the original band at the same x location. -***********************************************************************/ - -#define REGION_TYPE_EMPTY 0 -#define REGION_TYPE_OPEN_RIGHT 1 -#define REGION_TYPE_OPEN_LEFT 2 -#define REGION_TYPE_UPPER_BOUND 3 -#define REGION_TYPE_UPPER_UNBOUND 4 -#define REGION_TYPE_LOWER_BOUND 5 -#define REGION_TYPE_LOWER_UNBOUND 6 -#define REGION_TYPE_ENCLOSED 7 - -BAND bands[MAX_NUM_BANDS + 1]; // band defns - -/** - * test_underline - * - * Check to see if the blob is an underline. - * Return TRUE if it is. - */ - -BOOL8 test_underline( //look for underlines - BOOL8 testing_on, //< drawing blob - PBLOB *blob, //< blob to test - float baseline, //< coords of baseline - float xheight //< height of line - ) { - inT16 occ; - inT16 blob_width; //width of blob - TBOX blob_box; //bounding box - float occs[MAX_NUM_BANDS + 1]; //total occupancy - - blob_box = blob->bounding_box (); - set_bands(baseline, xheight); //setup block occ - blob_width = blob->bounding_box ().width (); - if (testing_on) { - // blob->plot(to_win,GOLDENROD,GOLDENROD); - // line_color_index(to_win,GOLDENROD); - // move2d(to_win,blob_box.left(),baseline); - // draw2d(to_win,blob_box.right(),baseline); - // move2d(to_win,blob_box.left(),baseline+xheight); - // draw2d(to_win,blob_box.right(),baseline+xheight); - tprintf - ("Testing underline on blob at (%d,%d)->(%d,%d), base=%g\nOccs:", - blob->bounding_box ().left (), blob->bounding_box ().bottom (), - blob->bounding_box ().right (), blob->bounding_box ().top (), - baseline); - } - block_occ(blob, occs); - if (testing_on) { - for (occ = 0; occ <= MAX_NUM_BANDS; occ++) - tprintf ("%g ", occs[occ]); - tprintf ("\n"); - } - if (occs[1] > occs[2] + occs[2] && occs[1] > occs[3] + occs[3] - && occs[1] > blob_width * textord_underline_threshold) - return TRUE; //real underline - if (occs[4] > occs[2] + occs[2] - && occs[4] > blob_width * textord_underline_threshold) - return TRUE; //overline - return FALSE; //neither -} - +static void horizontal_cblob_projection(C_BLOB *blob, // blob to project + STATS *stats); // output +static void horizontal_coutline_projection(C_OUTLINE *outline, + STATS *stats); // output /** * test_underline @@ -336,535 +170,3 @@ static void horizontal_coutline_projection( //project outlin horizontal_coutline_projection (out_it.data (), stats); } } - - -static void set_bands( //init from varibles - float baseline, //top of bottom band - float xheight //height of split band - ) { - inT16 int_bl, int_xh; //for band.set - - bands[DOT_BAND].set (0, 0, 0, 0, 0, 0); - - int_bl = (inT16) baseline; - int_xh = (inT16) xheight; - bands[1].set (int_bl, int_bl, int_bl, - NO_LOWER_LIMIT, NO_LOWER_LIMIT, NO_LOWER_LIMIT); - - bands[2].set (int_bl + int_xh / 2, int_bl + int_xh / 2, int_bl + int_xh / 2, - int_bl, int_bl, int_bl); - - bands[3].set (int_bl + int_xh, int_bl + int_xh, int_bl + int_xh, - int_bl + int_xh / 2, int_bl + int_xh / 2, - int_bl + int_xh / 2); - - bands[4].set (NO_UPPER_LIMIT, NO_UPPER_LIMIT, NO_UPPER_LIMIT, - int_bl + int_xh, int_bl + int_xh, int_bl + int_xh); -} - - -void -block_occ (PBLOB * blob, //blob to do -float occs[] //output histogram -) { - int band_index; //current band - REGION_OCC *region; //current segment - REGION_OCC_LIST region_occ_list[MAX_NUM_BANDS + 1]; - REGION_OCC_IT region_it; //region iterator - - find_transitions(blob, region_occ_list); - compress_region_list(region_occ_list); - for (band_index = 0; band_index <= MAX_NUM_BANDS; band_index++) { - occs[band_index] = 0.0f; - region_it.set_to_list (®ion_occ_list[band_index]); - for (region_it.mark_cycle_pt (); !region_it.cycled_list (); - region_it.forward ()) { - region = region_it.data (); - occs[band_index] += region->max_x - region->min_x; - } - } -} - - -void find_transitions(PBLOB *blob, //blob to do - REGION_OCC_LIST *region_occ_list) { - OUTLINE_IT outline_it; - TBOX box; - POLYPT_IT pt_it; - FCOORD point1; - FCOORD point2; - FCOORD *entry_pt = &point1; - FCOORD *exit_pt = &point2; - FCOORD *temp_pt; - inT16 increment; - inT16 prev_band; - inT16 band; - inT16 next_band; - float min_x; - float max_x; - float min_y; - float max_y; - BOOL8 doubly_contained; - - outline_it = blob->out_list (); - for (outline_it.mark_cycle_pt (); !outline_it.cycled_list (); - outline_it.forward ()) { - find_fbox(&outline_it, &min_x, &min_y, &max_x, &max_y); - - if (bands[DOT_BAND].range_in_nominal (max_y, min_y)) { - record_region(DOT_BAND, - min_x, - max_x, - REGION_TYPE_ENCLOSED, - region_occ_list); - } - else { - band = find_containing_maximal_band (max_y, min_y, - &doubly_contained); - if (band != UNDEFINED_BAND) { - //No transitions - if (!doubly_contained) - record_region(band, - min_x, - max_x, - REGION_TYPE_ENCLOSED, - region_occ_list); - else { - // if (wordocc_debug_on && blockocc_show_result) - // { - // fprintf( db_win, - // "Ignoring doubly contained outline (%d, %d) (%d, %d)\n", - // box.left(), box.top(), - // box.right(), box.bottom()); - // fprintf( db_win, "\tContained in bands %d and %d\n", - // band, band + 1 ); - // } - } - } - else { - //There are transitns - /* - Determining a good start point for recognising transitions between bands - is complicated by error limits on bands. We need to find a line which - significantly occupies a band. - - Having found such a point, we need to find a significant transition out of - its band and start the walk around the outline from there. - - Note that we are relying on having recognised and dealt with elsewhere, - outlines which do not significantly occupy more than one region. A - particularly nasty case of this are outlines which do not significantly - occupy ANY band. I.e. they lie entirely within the error limits. - Given this condition, all remaining outlines must contain at least one - significant line. */ - - pt_it = outline_it.data ()->polypts (); - - find_significant_line(pt_it, &band); - *entry_pt = pt_it.data ()->pos; - next_region(&pt_it, - band, - &next_band, - &min_x, - &max_x, - &increment, - exit_pt); - pt_it.mark_cycle_pt (); - - // Found the first real transition, so start walking the outline from here. - - do { - prev_band = band; - band = band + increment; - - while (band != next_band) { - temp_pt = entry_pt; - entry_pt = exit_pt; - exit_pt = temp_pt; - min_x = max_x = entry_pt->x (); - - find_trans_point (&pt_it, band, band + increment, - exit_pt); - maintain_limits (&min_x, &max_x, exit_pt->x ()); - - record_region (band, - min_x, - max_x, - find_region_type (prev_band, - band, - band + increment, - entry_pt->x (), - exit_pt->x ()), - region_occ_list); - prev_band = band; - band = band + increment; - } - - temp_pt = entry_pt; - entry_pt = exit_pt; - exit_pt = temp_pt; - min_x = max_x = entry_pt->x (); - next_region(&pt_it, - band, - &next_band, - &min_x, - &max_x, - &increment, - exit_pt); - - record_region (band, - min_x, - max_x, - find_region_type (prev_band, - band, - band + increment, - entry_pt->x (), - exit_pt->x ()), - region_occ_list); - } - while (!pt_it.cycled_list ()); - } - } - } -} - - -static void record_region( //add region on list - inT16 band, - float new_min, - float new_max, - inT16 region_type, - REGION_OCC_LIST *region_occ_list) { - REGION_OCC_IT it (&(region_occ_list[band])); - - // if (wordocc_debug_on && blockocc_show_result) - // fprintf( db_win, "\nBand %d, region type %d, from %f to %f", - // band, region_type, new_min, new_max ); - - if ((region_type == REGION_TYPE_UPPER_UNBOUND) || - (region_type == REGION_TYPE_LOWER_UNBOUND) || - (region_type == REGION_TYPE_EMPTY)) - return; - - if (it.empty ()) { - it.add_after_stay_put (new REGION_OCC (new_min, new_max, region_type)); - } - else { - - /* Insert in sorted order of average limit */ - - while ((new_min + new_max > it.data ()->min_x + it.data ()->max_x) && - (!it.at_last ())) - it.forward (); - - if ((it.at_last ()) && //at the end - (new_min + new_max > it.data ()->min_x + it.data ()->max_x)) - //new range > current - it.add_after_stay_put (new REGION_OCC (new_min, - new_max, region_type)); - else { - it.add_before_stay_put (new REGION_OCC (new_min, - new_max, region_type)); - } - } -} - - -static inT16 find_containing_maximal_band( //find range's band - float y1, - float y2, - BOOL8 *doubly_contained) { - inT16 band; - - *doubly_contained = FALSE; - - for (band = 1; band <= blockocc_band_count; band++) { - if (bands[band].range_in_maximal (y1, y2)) { - if ((band < blockocc_band_count) && - (bands[band + 1].range_in_maximal (y1, y2))) - *doubly_contained = TRUE; - return band; - } - } - return UNDEFINED_BAND; -} - - -static void find_significant_line(POLYPT_IT it, inT16 *band) { - - /* Look for a line which significantly occupies at least one band. I.e. part - of the line is in the non-margin part of the band. */ - - *band = find_overlapping_minimal_band (it.data ()->pos.y (), - it.data ()->pos.y () + - it.data ()->vec.y ()); - - while (*band == UNDEFINED_BAND) { - it.forward (); - *band = find_overlapping_minimal_band (it.data ()->pos.y (), - it.data ()->pos.y () + - it.data ()->vec.y ()); - } -} - - -static inT16 find_overlapping_minimal_band( //find range's band - float y1, - float y2) { - inT16 band; - - for (band = 1; band <= blockocc_band_count; band++) { - if (bands[band].range_overlaps_minimal (y1, y2)) - return band; - } - return UNDEFINED_BAND; -} - - -static inT16 find_region_type(inT16 entry_band, - inT16 current_band, - inT16 exit_band, - float entry_x, - float exit_x) { - if (entry_band > exit_band) - return REGION_TYPE_OPEN_RIGHT; - if (entry_band < exit_band) - return REGION_TYPE_OPEN_LEFT; - if (entry_x == exit_x) - return REGION_TYPE_EMPTY; - if (entry_band > current_band) { - if (entry_x < exit_x) - return REGION_TYPE_UPPER_BOUND; - else - return REGION_TYPE_UPPER_UNBOUND; - } - else { - if (entry_x > exit_x) - return REGION_TYPE_LOWER_BOUND; - else - return REGION_TYPE_LOWER_UNBOUND; - } -} - - -static void find_trans_point(POLYPT_IT *pt_it, - inT16 current_band, - inT16 next_band, - FCOORD *transition_pt) { - float x1, x2, y1, y2; // points of edge - float gradient; // m in y = mx + c - float offset; // c in y = mx + c - - if (current_band < next_band) - transition_pt->set_y (bands[current_band].max); - //going up - else - transition_pt->set_y (bands[current_band].min); - //going down - - x1 = pt_it->data ()->pos.x (); - x2 = x1 + pt_it->data ()->vec.x (); - y1 = pt_it->data ()->pos.y (); - y2 = y1 + pt_it->data ()->vec.y (); - - if (x1 == x2) - transition_pt->set_x (x1); //avoid div by 0 - else { - if (y1 == y2) //avoid div by 0 - transition_pt->set_x ((x1 + x2) / 2.0); - else { - gradient = (y1 - y2) / (float) (x1 - x2); - offset = y1 - x1 * gradient; - transition_pt->set_x ((transition_pt->y () - offset) / gradient); - } - } -} - - -static void next_region(POLYPT_IT *start_pt_it, - inT16 start_band, - inT16 *to_band, - float *min_x, - float *max_x, - inT16 *increment, - FCOORD *exit_pt) { - /* - Given an edge and a band which the edge significantly occupies, find the - significant end of the region containing the band. I.e. find an edge which - points to another band such that the outline subsequetly moves significantly - out of the starting band. - - Note that we can assume that we are significantly inside the current band to - start with because the edges passed will be from previous calls to this - routine apart from the first - the result of which is only used to establish - the start of the first region. - */ - - inT16 band; //band of current edge - inT16 prev_band = start_band; //band of prev edge - //edge crossing out - POLYPT_IT last_transition_out_it; - //band it pts to - inT16 last_trans_out_to_band = 0; - float ext_min_x = 0.0f; - float ext_max_x = 0.0f; - - start_pt_it->forward (); - band = find_band (start_pt_it->data ()->pos.y ()); - - while ((band == start_band) || - bands[start_band].in_maximal (start_pt_it->data ()->pos.y ())) { - if (band == start_band) { - //Return to start band - if (prev_band != start_band) { - *min_x = ext_min_x; - *max_x = ext_max_x; - } - maintain_limits (min_x, max_x, start_pt_it->data ()->pos.x ()); - } - else { - if (prev_band == start_band) { - //Exit from start band - //so remember edge - last_transition_out_it = *start_pt_it; - //before we left - last_transition_out_it.backward (); - //and band it pts to - last_trans_out_to_band = band; - ext_min_x = *min_x; - ext_max_x = *max_x; - } - maintain_limits (&ext_min_x, &ext_max_x, - start_pt_it->data ()->pos.x ()); - } - prev_band = band; - start_pt_it->forward (); - band = find_band (start_pt_it->data ()->pos.y ()); - } - - if (prev_band == start_band) { //exit from start band - *to_band = band; - //so remember edge - last_transition_out_it = *start_pt_it; - //before we left - last_transition_out_it.backward (); - } - else { - *to_band = last_trans_out_to_band; - } - - if (*to_band > start_band) - *increment = 1; - else - *increment = -1; - - find_trans_point (&last_transition_out_it, start_band, - start_band + *increment, exit_pt); - maintain_limits (min_x, max_x, exit_pt->x ()); - *start_pt_it = last_transition_out_it; -} - - -static inT16 find_band( // find POINT's band - float y) { - inT16 band; - - for (band = 1; band <= blockocc_band_count; band++) { - if (bands[band].in_nominal (y)) - return band; - } - BLOCKOCC.error ("find_band", ABORT, "Cant find band for %d", y); - return 0; -} - - -static void compress_region_list( // join open regions - REGION_OCC_LIST *region_occ_list) { - REGION_OCC_IT it (&(region_occ_list[0])); - REGION_OCC *open_right = NULL; - - inT16 i = 0; - - for (i = 0; i <= blockocc_band_count; i++) { - it.set_to_list (&(region_occ_list[i])); - if (!it.empty ()) { - /* First check for left right pairs. Merge them into the open right and delete - the open left. */ - open_right = NULL; - for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { - switch (it.data ()->region_type) { - case REGION_TYPE_OPEN_RIGHT: - { - if (open_right != NULL) - BLOCKOCC.error ("compress_region_list", ABORT, - "unmatched right"); - else - open_right = it.data (); - break; - } - case REGION_TYPE_OPEN_LEFT: - { - if (open_right == NULL) - BLOCKOCC.error ("compress_region_list", ABORT, - "unmatched left"); - else { - open_right->max_x = it.data ()->max_x; - open_right = NULL; - delete it.extract (); - } - break; - } - default: - break; - } - } - if (open_right != NULL) - BLOCKOCC.error ("compress_region_list", ABORT, - "unmatched right remaining"); - - /* Now cycle the list again, merging and deleting any redundant regions */ - it.move_to_first (); - open_right = it.data (); - while (!it.at_last ()) { - it.forward (); - if (it.data ()->min_x <= open_right->max_x) { - // Overlaps - if (it.data ()->max_x > open_right->max_x) - open_right->max_x = it.data ()->max_x; - // Extend - delete it.extract (); - } - else - open_right = it.data (); - } - } - } -} - - -static void find_fbox(OUTLINE_IT *out_it, - float *min_x, - float *min_y, - float *max_x, - float *max_y) { - POLYPT_IT pt_it = out_it->data ()->polypts (); - FCOORD pt; - *min_x = 9999.0f; - *min_y = 9999.0f; - *max_x = 0.0f; - *max_y = 0.0f; - - for (pt_it.mark_cycle_pt (); !pt_it.cycled_list (); pt_it.forward ()) { - pt = pt_it.data ()->pos; - maintain_limits (min_x, max_x, pt.x ()); - maintain_limits (min_y, max_y, pt.y ()); - } -} - - -static void maintain_limits(float *min_x, float *max_x, float x) { - if (x > *max_x) - *max_x = x; - if (x < *min_x) - *min_x = x; -} diff --git a/textord/blkocc.h b/textord/blkocc.h index 55325dcaec..9df07a9a4b 100644 --- a/textord/blkocc.h +++ b/textord/blkocc.h @@ -25,7 +25,7 @@ #ifndef BLKOCC_H #define BLKOCC_H -#include "varable.h" +#include "params.h" #include "polyblob.h" #include "elst.h" #include "notdll.h" diff --git a/textord/colfind.cpp b/textord/colfind.cpp index 3149e4fbe8..451d2c28fa 100644 --- a/textord/colfind.cpp +++ b/textord/colfind.cpp @@ -29,8 +29,8 @@ #include "strokewidth.h" #include "blobbox.h" #include "scrollview.h" -#include "tessvars.h" -#include "varable.h" +#include "tablefind.h" +#include "params.h" #include "workingpartset.h" // Include automatically generated configuration file if running autoconf. @@ -40,12 +40,12 @@ namespace tesseract { -// Minimum width to be considered when making columns. +// Minimum width (in pixels) to be considered when making columns. +// TODO(rays) convert to inches, dependent on resolution. const int kMinColumnWidth = 100; -// When assigning columns, the max number of misfits that can be ignored. +// When assigning columns, the max number of misfit grid rows/ColPartitionSets +// that can be ignored. const int kMaxIncompatibleColumnCount = 2; -// Max vertical distance of neighbouring ColPartition for it to be a partner. -const double kMaxPartitionSpacing = 1.75; // Min fraction of ColPartition height to be overlapping for margin purposes. const double kMarginOverlapFraction = 0.25; // Max fraction of mean_column_gap_ for the gap between two partitions within a @@ -53,16 +53,18 @@ const double kMarginOverlapFraction = 0.25; const double kHorizontalGapMergeFraction = 0.5; // Min fraction of grid size to not be considered likely noise. const double kMinNonNoiseFraction = 0.5; +// Minimum gutter width as a fraction of gridsize +const double kMinGutterWidthGrid = 0.5; // Search radius to use for finding large neighbours of smaller blobs. const int kSmallBlobSearchRadius = 2; -BOOL_VAR(textord_tabfind_show_strokewidths, false, "Show stroke widths"); BOOL_VAR(textord_tabfind_show_initial_partitions, false, "Show partition bounds"); INT_VAR(textord_tabfind_show_partitions, 0, "Show partition bounds, waiting if >1"); BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds"); BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds"); +BOOL_VAR(textord_tabfind_find_tables, false, "run table detection"); ScrollView* ColumnFinder::blocks_win_ = NULL; @@ -74,32 +76,26 @@ ScrollView* ColumnFinder::blocks_win_ = NULL; // the sum logical vertical vector produced by LineFinder::FindVerticalLines. ColumnFinder::ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright, + int resolution, TabVector_LIST* vlines, TabVector_LIST* hlines, int vertical_x, int vertical_y) - : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y), + : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, + resolution), + min_gutter_width_(static_cast(kMinGutterWidthGrid * gridsize)), mean_column_gap_(tright.x() - bleft.x()), - global_median_xheight_(0), global_median_ledding_(0), - reskew_(1.0f, 0.0f), rerotate_(1.0f, 0.0f), - best_columns_(NULL) { + reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f), + best_columns_(NULL), stroke_width_(NULL) { TabVector_IT h_it(&horizontal_lines_); h_it.add_list_after(hlines); } -// Templated helper function used to create destructor callbacks for the -// BBGrid::ClearGridData() method. -template void DeleteObject(T *object) { - delete object; -} - ColumnFinder::~ColumnFinder() { column_sets_.delete_data_pointers(); if (best_columns_ != NULL) { delete [] best_columns_; } - // ColPartitions and ColSegments created by this class for storage in grids - // need to be deleted explicitly. - clean_part_grid_.ClearGridData(&DeleteObject); - col_seg_grid_.ClearGridData(&DeleteObject); + if (stroke_width_ != NULL) + delete stroke_width_; // The ColPartitions are destroyed automatically, but any boxes in // the noise_parts_ list are owned and need to be deleted explicitly. @@ -127,23 +123,108 @@ ColumnFinder::~ColumnFinder() { } } +// Tests for vertical alignment of text (returning true if so), and generates a +// list of blobs for orientation and script detection. +bool ColumnFinder::IsVerticallyAlignedText(TO_BLOCK* block, + BLOBNBOX_CLIST* osd_blobs) { + // Test page-wide writing direction. + if (stroke_width_ != NULL) + delete stroke_width_; + stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); + min_gutter_width_ = static_cast(kMinGutterWidthGrid * gridsize()); + // TODO(rays) experiment with making broken CJK fixing dependent on the + // language, and keeping the merged blobs on output instead of exploding at + // ColPartition::MakeBlock. + bool result = stroke_width_->TestVerticalTextDirection(true, block, this, + osd_blobs); + return result; +} + +// Rotates the blobs and the TabVectors so that the gross writing direction +// (text lines) are horizontal and lines are read down the page. +// Applied rotation stored in rotation_. +// A second rotation is calculated for application during recognition to +// make the rotated blobs upright for recognition. +// Subsequent rotation stored in text_rotation_. +// +// Arguments: +// vertical_text_lines true if the text lines are vertical. +// recognition_rotation [0..3] is the number of anti-clockwise 90 degree +// rotations from osd required for the text to be upright and readable. +void ColumnFinder::CorrectOrientation(TO_BLOCK* block, + bool vertical_text_lines, + int recognition_rotation) { + const FCOORD anticlockwise90(0.0f, 1.0f); + const FCOORD clockwise90(0.0f, -1.0f); + const FCOORD rotation180(-1.0f, 0.0f); + const FCOORD norotation(1.0f, 0.0f); + + text_rotation_ = norotation; + // Rotate the page to make the text upright, as implied by + // recognition_rotation. + rotation_ = norotation; + if (recognition_rotation == 1) { + rotation_ = anticlockwise90; + } else if (recognition_rotation == 2) { + rotation_ = rotation180; + } else if (recognition_rotation == 3) { + rotation_ = clockwise90; + } + // We infer text writing direction to be vertical if there are several + // vertical text lines detected, and horizontal if not. But if the page + // orientation was determined to be 90 or 270 degrees, the true writing + // direction is the opposite of what we inferred. + if (recognition_rotation & 1) { + vertical_text_lines = !vertical_text_lines; + } + // If we still believe the writing direction is vertical, we use the + // convention of rotating the page ccw 90 degrees to make the text lines + // horizontal, and mark the blobs for rotation cw 90 degrees for + // classification so that the text order is correct after recognition. + if (vertical_text_lines) { + rotation_.rotate(anticlockwise90); + text_rotation_.rotate(clockwise90); + } + // Set rerotate_ to the inverse of rotation_. + rerotate_ = FCOORD(rotation_.x(), -rotation_.y()); + if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) { + // Rotate all the blobs and tab vectors. + RotateBlobList(rotation_, &block->large_blobs); + RotateBlobList(rotation_, &block->blobs); + RotateBlobList(rotation_, &block->small_blobs); + RotateBlobList(rotation_, &block->noise_blobs); + TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, + &min_gutter_width_); + // Re-mark all the blobs with the correct orientation. + stroke_width_->CorrectForRotation(rotation_, block, this); + } + if (textord_debug_tabfind) { + tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", + vertical_text_lines, recognition_rotation, + rotation_.x(), rotation_.y(), + text_rotation_.x(), text_rotation_.y()); + } +} + // Finds the text and image blocks, returning them in the blocks and to_blocks // lists. (Each TO_BLOCK points to the basic BLOCK and adds more information.) // If boxa and pixa are not NULL, they are assumed to be the output of // ImageFinder::FindImages, and are used to generate image blocks. // The input boxa and pixa are destroyed. -// Imageheight and resolution should be the pixel height and resolution in -// pixels per inch of the original image. +// Imageheight should be the pixel height of the original image. // The input block is the result of a call to find_components, and contains // the blobs found in the image. These blobs will be removed and placed // in the output blocks, while unused ones will be deleted. // If single_column is true, the input is treated as single column, but // it is still divided into blocks of equal line spacing/text size. // Returns -1 if the user requested retry with more debug info. -int ColumnFinder::FindBlocks(int imageheight, int resolution, - bool single_column, TO_BLOCK* block, +int ColumnFinder::FindBlocks(bool single_column, int imageheight, + TO_BLOCK* block, Boxa* boxa, Pixa* pixa, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { + stroke_width_->FindLeaderPartitions(block, this); + delete stroke_width_; + stroke_width_ = NULL; #ifdef HAVE_LIBLEPT if (boxa != NULL) { // Convert the boxa/pixa to fake blobs aligned on the grid. @@ -155,21 +236,20 @@ int ColumnFinder::FindBlocks(int imageheight, int resolution, // Decide which large blobs should be included in the grid as potential // characters. // A subsidiary grid used to decide which large blobs to use. + // NOTE: This seemingly anomalous use of StrokeWidth is the old API, and + // will go away entirely with the upcoming change to ImageFinder. StrokeWidth* stroke_width = new StrokeWidth(gridsize(), bleft(), tright()); - stroke_width->InsertBlobs(block, this); - if (textord_tabfind_show_strokewidths) { - stroke_width->DisplayGoodBlobs("GoodStrokewidths", NULL); - } - stroke_width->MoveGoodLargeBlobs(resolution, block); + stroke_width->InsertBlobsOld(block, this); + stroke_width->MoveGoodLargeBlobs(resolution_, block); delete stroke_width; if (single_column) { // No tab stops needed. Just the grid that FindTabVectors makes. - DontFindTabVectors(resolution, &image_bblobs_, block, &reskew_); + DontFindTabVectors(&image_bblobs_, block, &deskew_, &reskew_); } else { // Find the tab stops. - FindTabVectors(resolution, &horizontal_lines_, &image_bblobs_, block, - &reskew_, &rerotate_); + FindTabVectors(&horizontal_lines_, &image_bblobs_, block, + min_gutter_width_, &deskew_, &reskew_); } // Find the columns. @@ -214,14 +294,15 @@ int ColumnFinder::FindBlocks(int imageheight, int resolution, GridSplitPartitions(); // Resolve unknown partitions by adding to an existing partition, fixing // the type, or declaring them noise. - GridFindMargins(); - ListFindMargins(&unknown_parts_); + part_grid_.GridFindMargins(best_columns_); + part_grid_.ListFindMargins(best_columns_, &unknown_parts_); GridInsertUnknowns(); GridMergePartitions(); // Add horizontal line separators as partitions. GridInsertHLinePartitions(); + GridInsertVLinePartitions(); // Recompute margins based on a local neighbourhood search. - GridFindMargins(); + part_grid_.GridFindMargins(best_columns_); SetPartitionTypes(); if (textord_tabfind_show_initial_partitions) { ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions"); @@ -229,17 +310,23 @@ int ColumnFinder::FindBlocks(int imageheight, int resolution, DisplayTabVectors(part_win); } - // Copy cleaned partitions from part_grid_ to clean_part_grid_ and - // insert dot-like noise into period_grid_ - GetCleanPartitions(block); - - // Get Table Regions - LocateTables(); + if (textord_tabfind_find_tables) { + TableFinder table_finder; + table_finder.Init(gridsize(), bleft(), tright()); + table_finder.set_resolution(resolution_); + table_finder.set_left_to_right_language(!block->block->right_to_left()); + // Copy cleaned partitions from part_grid_ to clean_part_grid_ and + // insert dot-like noise into period_grid_ + table_finder.InsertCleanPartitions(&part_grid_, block); + // Get Table Regions + table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_); + } // Build the partitions into chains that belong in the same block and // refine into one-to-one links, then smooth the types within each chain. - FindPartitionPartners(); - RefinePartitionPartners(); + part_grid_.FindPartitionPartners(); + part_grid_.FindFigureCaptions(); + part_grid_.RefinePartitionPartners(true); SmoothPartnerRuns(); if (textord_tabfind_show_partitions) { ScrollView* window = MakeWindow(400, 300, "Partitions"); @@ -263,6 +350,13 @@ int ColumnFinder::FindBlocks(int imageheight, int resolution, tprintf("Found %d blocks, %d to_blocks\n", blocks->length(), to_blocks->length()); } + // Copy the right_to_left flag from the source block to the created blocks. + // TODO(rays) fix block ordering if the input block is right_to_left. + BLOCK_IT blk_it(blocks); + for (blk_it.mark_cycle_pt(); !blk_it.cycled_list(); blk_it.forward()) { + BLOCK* new_block = blk_it.data(); + new_block->set_right_to_left(block->block->right_to_left()); + } DisplayBlocks(blocks); // MoveSmallBlobs(&block->small_blobs, to_blocks); @@ -291,6 +385,13 @@ int ColumnFinder::FindBlocks(int imageheight, int resolution, return result; } +// Get the rotation required to deskew, and its inverse rotation. +void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) { + *reskew = reskew_; + *deskew = reskew_; + deskew->set_y(-deskew->y()); +} + //////////////// PRIVATE CODE ///////////////////////// // Displays the blob and block bounding boxes in a window called Blocks. @@ -343,7 +444,6 @@ void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) { // The output is a list of C_BLOBs for the images, but the C_OUTLINEs // contain no data. void ColumnFinder::ExtractImageBlobs(int image_height, Boxa* boxa, Pixa* pixa) { -#ifdef HAVE_LIBLEPT BLOBNBOX_IT bb_it(&image_bblobs_); // Iterate the connected components in the image regions mask. int nboxes = boxaGetCount(boxa); @@ -393,7 +493,6 @@ void ColumnFinder::ExtractImageBlobs(int image_height, Boxa* boxa, Pixa* pixa) { } pixDestroy(&pix); } -#endif // HAVE_LIBLEPT } ////// Functions involved in making the initial ColPartitions. ///// @@ -564,7 +663,7 @@ void ColumnFinder::InsertSmallBlobsAsUnknowns(bool filter, for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX box = blob->bounding_box(); - bool good_blob = !filter || + bool good_blob = !filter || blob->flow() == BTFT_LEADER || box.width() > noise_blob_size || box.height() > noise_blob_size; if (!good_blob) { @@ -587,7 +686,6 @@ void ColumnFinder::InsertSmallBlobsAsUnknowns(bool filter, } if (good_blob) { blob_it.extract(); - blob->set_noise_flag(true); InsertBlob(true, true, false, blob, this); if (WithinTestRegion(2, box.left(), box.bottom())) tprintf("Starting small partition with box (%d,%d)->(%d,%d)\n", @@ -596,7 +694,11 @@ void ColumnFinder::InsertSmallBlobsAsUnknowns(bool filter, int unk_right_margin = tright().x(); TabVector* unk_right_line = NULL; bool unk_edge_is_left = false; - ColPartition* unk_partition = StartPartition(BRT_TEXT, bleft().x(), blob, + BlobRegionType start_type = blob->region_type(); + if (!BLOBNBOX::IsLineType(start_type)) + start_type = BRT_TEXT; + ColPartition* unk_partition = StartPartition(start_type, bleft().x(), + blob, &unk_right_line, &unk_right_margin, &unk_edge_is_left); @@ -824,6 +926,8 @@ void ColumnFinder::PrintColumnCandidates(const char* title) { // greedy algorithm is used instead. The algorithm repeatedly finds the modal // compatible column in an unassigned region and uses that with the extra // tweak of extending the modal region over small breaks in compatibility. +// Where modal regions overlap, the boundary is chosen so as to minimize +// the cost in terms of ColPartitions not fitting an approved column. void ColumnFinder::AssignColumns() { int set_count = part_sets_.size(); ASSERT_HOST(set_count == gridheight()); @@ -832,17 +936,18 @@ void ColumnFinder::AssignColumns() { for (int y = 0; y < set_count; ++y) best_columns_[y] = NULL; int column_count = column_sets_.size(); - // possible_column_sets[part_sets_ index][column_sets_ index] is - // true if the partition set is compatible with the column set. - // assigned_column_sets[part_sets_ index][column_sets_ index] is true - // if the partition set has been assigned the column. (Multiple bits - // true is possible.) + // column_set_costs[part_sets_ index][column_sets_ index] is + // < MAX_INT32 if the partition set is compatible with the column set, + // in which case its value is the cost for that set used in deciding + // which competing set to assign. // any_columns_possible[part_sets_ index] is true if any of - // possible_column_sets[part_sets_ index][*] is true. + // possible_column_sets[part_sets_ index][*] is < MAX_INT32. + // assigned_costs[part_sets_ index] is set to the column_set_costs + // of the assigned column_sets_ index or MAX_INT32 if none is set. // On return the best_columns_ member is set. bool* any_columns_possible = new bool[set_count]; - bool** possible_column_sets = new bool*[set_count]; - bool** assigned_column_sets = new bool*[set_count]; + int* assigned_costs = new int[set_count]; + int** column_set_costs = new int*[set_count]; // Set possible column_sets to indicate whether each set is compatible // with each column. for (int part_i = 0; part_i < set_count; ++part_i) { @@ -850,18 +955,21 @@ void ColumnFinder::AssignColumns() { bool debug = line_set != NULL && WithinTestRegion(2, line_set->bounding_box().left(), line_set->bounding_box().bottom()); - possible_column_sets[part_i] = new bool[column_count]; - assigned_column_sets[part_i] = new bool[column_count]; + column_set_costs[part_i] = new int[column_count]; any_columns_possible[part_i] = false; + assigned_costs[part_i] = MAX_INT32; for (int col_i = 0; col_i < column_count; ++col_i) { - assigned_column_sets[part_i][col_i] = false; if (line_set != NULL && column_sets_.get(col_i)->CompatibleColumns(debug, line_set, WidthCB())) { - possible_column_sets[part_i][col_i] = true; + column_set_costs[part_i][col_i] = + column_sets_.get(col_i)->UnmatchedWidth(line_set); any_columns_possible[part_i] = true; } else { - possible_column_sets[part_i][col_i] = false; + column_set_costs[part_i][col_i] = MAX_INT32; + if (debug) + tprintf("Set id %d did not match at y=%d, lineset =%p\n", + col_i, part_i, line_set); } } } @@ -872,46 +980,48 @@ void ColumnFinder::AssignColumns() { if (textord_debug_tabfind >= 2) tprintf("Biggest unassigned range = %d- %d\n", start, end); // Find the modal column_set_id in the range. - int column_set_id = RangeModalColumnSet(possible_column_sets, start, end); + int column_set_id = RangeModalColumnSet(column_set_costs, + assigned_costs, start, end); if (textord_debug_tabfind >= 2) { tprintf("Range modal column id = %d\n", column_set_id); column_sets_.get(column_set_id)->Print(); } // Now find the longest run of the column_set_id in the range. - ShrinkRangeToLongestRun(possible_column_sets, any_columns_possible, + ShrinkRangeToLongestRun(column_set_costs, assigned_costs, + any_columns_possible, column_set_id, &start, &end); if (textord_debug_tabfind >= 2) tprintf("Shrunk range = %d- %d\n", start, end); // Extend the start and end past the longest run, while there are // only small gaps in compatibility that can be overcome by larger // regions of compatibility beyond. - ExtendRangePastSmallGaps(possible_column_sets, any_columns_possible, + ExtendRangePastSmallGaps(column_set_costs, assigned_costs, + any_columns_possible, column_set_id, -1, -1, &start); --end; - ExtendRangePastSmallGaps(possible_column_sets, any_columns_possible, + ExtendRangePastSmallGaps(column_set_costs, assigned_costs, + any_columns_possible, column_set_id, 1, set_count, &end); ++end; if (textord_debug_tabfind) tprintf("Column id %d applies to range = %d - %d\n", column_set_id, start, end); // Assign the column to the range, which now may overlap with other ranges. - AssignColumnToRange(column_set_id, start, end, - assigned_column_sets); + AssignColumnToRange(column_set_id, start, end, column_set_costs, + assigned_costs); } // If anything remains unassigned, the whole lot is unassigned, so // arbitrarily assign id 0. if (best_columns_[0] == NULL) { - AssignColumnToRange(0, 0, gridheight_, assigned_column_sets); + AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs); } // Free memory. for (int i = 0; i < set_count; ++i) { - delete [] possible_column_sets[i]; - delete [] assigned_column_sets[i]; + delete [] column_set_costs[i]; } + delete [] assigned_costs; delete [] any_columns_possible; - delete [] possible_column_sets; - delete [] assigned_column_sets; - // TODO(rays) Now resolve overlapping assignments. + delete [] column_set_costs; } // Finds the biggest range in part_sets_ that has no assigned column, but @@ -950,13 +1060,14 @@ bool ColumnFinder::BiggestUnassignedRange(const bool* any_columns_possible, } // Finds the modal compatible column_set_ index within the given range. -int ColumnFinder::RangeModalColumnSet(bool** possible_column_sets, +int ColumnFinder::RangeModalColumnSet(int** column_set_costs, + const int* assigned_costs, int start, int end) { int column_count = column_sets_.size(); STATS column_stats(0, column_count); for (int part_i = start; part_i < end; ++part_i) { for (int col_j = 0; col_j < column_count; ++col_j) { - if (possible_column_sets[part_i][col_j]) + if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) column_stats.add(col_j, 1); } } @@ -968,7 +1079,8 @@ int ColumnFinder::RangeModalColumnSet(bool** possible_column_sets, // shrinks the range to the longest contiguous run of compatibility, allowing // gaps where no columns are possible, but not where competing columns are // possible. -void ColumnFinder::ShrinkRangeToLongestRun(bool** possible_column_sets, +void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs, + const int* assigned_costs, const bool* any_columns_possible, int column_set_id, int* best_start, int* best_end) { @@ -982,7 +1094,7 @@ void ColumnFinder::ShrinkRangeToLongestRun(bool** possible_column_sets, for (int start = orig_start; start < orig_end; start = end) { // Find the first possible while (start < orig_end) { - if (possible_column_sets[start][column_set_id] || + if (column_set_costs[start][column_set_id] < assigned_costs[start] || !any_columns_possible[start]) break; ++start; @@ -990,7 +1102,7 @@ void ColumnFinder::ShrinkRangeToLongestRun(bool** possible_column_sets, // Find the first past the end. end = start + 1; while (end < orig_end) { - if (!possible_column_sets[end][column_set_id] && + if (column_set_costs[end][column_set_id] >= assigned_costs[start] && any_columns_possible[end]) break; ++end; @@ -1006,7 +1118,8 @@ void ColumnFinder::ShrinkRangeToLongestRun(bool** possible_column_sets, // Moves start in the direction of step, upto, but not including end while // the only incompatible regions are no more than kMaxIncompatibleColumnCount // in size, and the compatible regions beyond are bigger. -void ColumnFinder::ExtendRangePastSmallGaps(bool** possible_column_sets, +void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs, + const int* assigned_costs, const bool* any_columns_possible, int column_set_id, int step, int end, int* start) { @@ -1023,7 +1136,7 @@ void ColumnFinder::ExtendRangePastSmallGaps(bool** possible_column_sets, barrier_size = 0; int i; for (i = *start + step; i != end; i += step) { - if (possible_column_sets[i][column_set_id]) + if (column_set_costs[i][column_set_id] < assigned_costs[i]) break; // We are back on. // Locations where none are possible don't count. if (any_columns_possible[i]) @@ -1041,7 +1154,7 @@ void ColumnFinder::ExtendRangePastSmallGaps(bool** possible_column_sets, // Now find the size of the good region on the other side. good_size = 1; for (i += step; i != end; i += step) { - if (possible_column_sets[i][column_set_id]) + if (column_set_costs[i][column_set_id] < assigned_costs[i]) ++good_size; else if (any_columns_possible[i]) break; @@ -1049,17 +1162,18 @@ void ColumnFinder::ExtendRangePastSmallGaps(bool** possible_column_sets, if (textord_debug_tabfind > 2) tprintf("At %d, good size = %d\n", i, good_size); // If we had enough good ones we can extend the start and keep looking. - if (good_size > barrier_size) + if (good_size >= barrier_size) *start = i - step; - } while (good_size > barrier_size); + } while (good_size >= barrier_size); } // Assigns the given column_set_id to the given range. void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end, - bool** assigned_column_sets) { + int** column_set_costs, + int* assigned_costs) { ColPartitionSet* column_set = column_sets_.get(column_set_id); for (int i = start; i < end; ++i) { - assigned_column_sets[i][column_set_id] = true; + assigned_costs[i] = column_set_costs[i][column_set_id]; best_columns_[i] = column_set; } } @@ -1136,7 +1250,7 @@ void ColumnFinder::GridSplitPartitions() { int first_col = -1; int last_col = -1; // Find which columns the partition spans. - part->ColumnRange(column_set, &first_col, &last_col); + part->ColumnRange(resolution_, column_set, &first_col, &last_col); if (first_col > 0) --first_col; // Convert output column indices to physical column indices. @@ -1342,7 +1456,7 @@ void ColumnFinder::GridInsertUnknowns() { int first_col = -1; int last_col = -1; // Find which columns the partition spans. - part->ColumnRange(columns, &first_col, &last_col); + part->ColumnRange(resolution_, columns, &first_col, &last_col); // Convert output column indices to physical column indices. // Twiddle with first and last_col to get the desired effect with // in-between columns: @@ -1468,107 +1582,71 @@ void ColumnFinder::GridInsertHLinePartitions() { TabVector* hline = hline_it.data(); int top = MAX(hline->startpt().y(), hline->endpt().y()); int bottom = MIN(hline->startpt().y(), hline->endpt().y()); + top += hline->mean_width(); if (top == bottom) { if (bottom > 0) --bottom; else ++top; } - ColPartition* part = new ColPartition(vertical_skew_, - hline->startpt().x(), bottom, - hline->endpt().x(), top); - part_grid_.InsertBBox(true, true, part); - } -} - -// Improves the margins of the ColPartitions in the grid by calling -// FindPartitionMargins on each. -void ColumnFinder::GridFindMargins() { - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&part_grid_); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != NULL) { - // Set up a rectangle search x-bounded by the column and y by the part. - ColPartitionSet* columns = best_columns_[gsearch.GridY()]; - FindPartitionMargins(columns, part); - } -} - -// Improves the margins of the ColPartitions in the list by calling -// FindPartitionMargins on each. -void ColumnFinder::ListFindMargins(ColPartition_LIST* parts) { - ColPartition_IT part_it(parts); - for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { - ColPartition* part = part_it.data(); - TBOX part_box = part->bounding_box(); - // Get the columns from the y grid coord. - int grid_x, grid_y; - GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); - ColPartitionSet* columns = best_columns_[grid_y]; - FindPartitionMargins(columns, part); + ColPartition* part = ColPartition::MakeLinePartition( + BRT_HLINE, vertical_skew_, + hline->startpt().x(), bottom, hline->endpt().x(), top); + part->set_type(PT_HORZ_LINE); + bool any_image = false; + ColPartitionGridSearch part_search(&part_grid_); + part_search.SetUniqueMode(true); + part_search.StartRectSearch(part->bounding_box()); + ColPartition* covered; + while ((covered = part_search.NextRectSearch()) != NULL) { + if (covered->IsImageType()) { + any_image = true; + break; + } + } + if (!any_image) + part_grid_.InsertBBox(true, true, part); + else + delete part; } } -// Improves the margins of the ColPartition by searching for -// neighbours that vertically overlap significantly. -void ColumnFinder::FindPartitionMargins(ColPartitionSet* columns, - ColPartition* part) { - // Set up a rectangle search x-bounded by the column and y by the part. - ASSERT_HOST(columns != NULL); - TBOX box = part->bounding_box(); - int y = part->MidY(); - // Initial left margin is based on the column, if there is one. - ColPartition* column = columns->ColumnContaining(box.left(), y); - int left_margin = column != NULL ? column->LeftAtY(y) : bleft_.x(); - left_margin -= kColumnWidthFactor; - // Search for ColPartitions that reduce the margin. - left_margin = FindMargin(box.left()+ box.height(), true, left_margin, - box.bottom(), box.top(), part); - part->set_left_margin(left_margin); - column = columns->ColumnContaining(box.right(), y); - int right_margin = column != NULL ? column->RightAtY(y) : tright_.x(); - right_margin += kColumnWidthFactor; - // Search for ColPartitions that reduce the margin. - right_margin = FindMargin(box.right() - box.height(), false, right_margin, - box.bottom(), box.top(), part); - part->set_right_margin(right_margin); -} - -// Starting at x, and going in the specified direction, upto x_limit, finds -// the margin for the given y range by searching sideways, -// and ignoring not_this. -int ColumnFinder::FindMargin(int x, bool right_to_left, int x_limit, - int y_bottom, int y_top, - const ColPartition* not_this) { - int height = y_top - y_bottom; - int target_overlap = static_cast(height * kMarginOverlapFraction); - // Iterate the ColPartitions in the grid. - GridSearch - side_search(&part_grid_); - side_search.StartSideSearch(x, y_bottom, y_top); - ColPartition* part; - while ((part = side_search.NextSideSearch(right_to_left)) != NULL) { - // Ignore itself. - if (part == not_this) - continue; - // Must overlap by enough. - TBOX box = part->bounding_box(); - int y_overlap = MIN(y_top, box.top()) - MAX(y_bottom, box.bottom()); - if (y_overlap < target_overlap) - continue; - // Must be going the right way. - int x_edge = right_to_left ? box.right() : box.left(); - if ((x_edge < x) != right_to_left) +// Add horizontal line separators as partitions. +void ColumnFinder::GridInsertVLinePartitions() { + TabVector_IT vline_it(dead_vectors()); + for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) { + TabVector* vline = vline_it.data(); + if (!vline->IsSeparator()) continue; - // If we have gone past x_limit, then x_limit will do. - if ((x_edge < x_limit) == right_to_left) - break; - // It reduces x limit, so save the new one. - x_limit = x_edge; + int left = MIN(vline->startpt().x(), vline->endpt().x()); + int right = MAX(vline->startpt().x(), vline->endpt().x()); + right += vline->mean_width(); + if (left == right) { + if (left > 0) + --left; + else + ++right; + } + ColPartition* part = ColPartition::MakeLinePartition( + BRT_VLINE, vertical_skew_, + left, vline->startpt().y(), right, vline->endpt().y()); + part->set_type(PT_VERT_LINE); + bool any_image = false; + ColPartitionGridSearch part_search(&part_grid_); + part_search.SetUniqueMode(true); + part_search.StartRectSearch(part->bounding_box()); + ColPartition* covered; + while ((covered = part_search.NextRectSearch()) != NULL) { + if (covered->IsImageType()) { + any_image = true; + break; + } + } + if (!any_image) + part_grid_.InsertBBox(true, true, part); + else + delete part; } - return x_limit; } // For every ColPartition in the grid, sets its type based on position @@ -1579,85 +1657,7 @@ void ColumnFinder::SetPartitionTypes() { gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { - part->SetPartitionType(best_columns_[gsearch.GridY()]); - } -} - -//////// Functions that manipulate ColPartitions in the part_grid_ ///// -//////// to find chains of partner partitions of the same type. /////// - -// For every ColPartition in the grid, finds its upper and lower neighbours. -void ColumnFinder::FindPartitionPartners() { - GridSearch - gsearch(&part_grid_); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != NULL) { - FindPartitionPartners(true, part); - FindPartitionPartners(false, part); - } -} - -// Finds the best partner in the given direction for the given partition. -// Stores the result with AddPartner. -void ColumnFinder::FindPartitionPartners(bool upper, ColPartition* part) { - if (part->type() == PT_NOISE) - return; // Noise is not allowed to partner anything. - const TBOX& box = part->bounding_box(); - int top = part->median_top(); - int bottom = part->median_bottom(); - int height = top - bottom; - int mid_y = (bottom + top) / 2; - GridSearch - vsearch(&part_grid_); - // Search down for neighbour below - vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY()); - ColPartition* neighbour; - ColPartition* best_neighbour = NULL; - int best_dist = MAX_INT32; - while ((neighbour = vsearch.NextVerticalSearch(!upper)) != NULL) { - if (neighbour == part || neighbour->type() == PT_NOISE) - continue; // Noise is not allowed to partner anything. - int neighbour_bottom = neighbour->median_bottom(); - int neighbour_top = neighbour->median_top(); - int neighbour_y = (neighbour_bottom + neighbour_top) / 2; - if (upper != (neighbour_y > mid_y)) - continue; - if (!part->HOverlaps(*neighbour) && !part->HCompatible(*neighbour)) - continue; - if (!part->TypesMatch(*neighbour)) { - if (best_neighbour == NULL) - best_neighbour = neighbour; - continue; - } - int dist = upper ? neighbour_bottom - top : bottom - neighbour_top; - if (dist <= kMaxPartitionSpacing * height) { - if (dist < best_dist) { - best_dist = dist; - best_neighbour = neighbour; - } - } else { - break; - } - } - if (best_neighbour != NULL) - part->AddPartner(upper, best_neighbour); -} - -// For every ColPartition with multiple partners in the grid, reduces the -// number of partners to 0 or 1. -void ColumnFinder::RefinePartitionPartners() { - // Refine in type order so that chasing multple partners can be done - // before eliminating type mis-matching partners. - for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) { - // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&part_grid_); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != NULL) { - part->RefinePartners(static_cast(type)); - } + part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]); } } @@ -1768,46 +1768,72 @@ void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks, } } -// Reskew the completed blocks to put them back to the original coords. -// (Blob outlines are not corrected for skew.) -// Rotate blobs and blocks individually so text line direction is -// horizontal. Record appropriate inverse transformations and required +// Undo the deskew that was done in FindTabVectors, as recognition is done +// without correcting blobs or blob outlines for skew. +// Reskew the completed blocks to put them back to the original rotated coords +// that were created by CorrectOrientation. +// Blocks that were identified as vertical text (relative to the rotated +// coordinates) are further rotated so the text lines are horizontal. +// blob polygonal outlines are rotated to match the position of the blocks +// that they are in, and their bounding boxes are recalculated to be accurate. +// Record appropriate inverse transformations and required // classifier transformation in the blocks. void ColumnFinder::RotateAndReskewBlocks(TO_BLOCK_LIST* blocks) { + int text_blocks = 0; + int image_blocks = 0; + int other_blocks = 0; TO_BLOCK_IT it(blocks); int block_index = 1; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TO_BLOCK* to_block = it.data(); BLOCK* block = to_block->block; - block->set_index(block_index++); + if (block->poly_block()->IsText()) + ++text_blocks; + else if (PTIsImageType(block->poly_block()->isA())) + ++image_blocks; + else + ++other_blocks; BLOBNBOX_IT blob_it(&to_block->blobs); - // ColPartition::MakeBlock stored the inverse rotation that must be - // applied to small vertical blocks to go back to the original image - // coords at the end of recognition, but did not actually do any rotations, - // so now blocks must actually be rotated to make them horizontal by the - // inverse of that stored inverse rotation. This is of course a no-op - // for normal blocks. - FCOORD block_rotation = block->re_rotation(); + // The text_rotation_ tells us the gross page text rotation that needs + // to be applied for classification + // TODO(rays) find block-level classify rotation by orientation detection. + // In the mean time, assume that "up" for text printed in the minority + // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading. + // Accomplish this by zero-ing out the text rotation. This covers the + // common cases of image credits in documents written in Latin scripts + // and page headings for predominantly vertically written CJK books. + FCOORD classify_rotation(text_rotation_); + FCOORD block_rotation(1.0f, 0.0f); + if (block->poly_block()->isA() == PT_VERTICAL_TEXT) { + // Vertical text needs to be 90 degrees rotated relative to the rest. + // If the rest has a 90 degree rotation already, use the inverse, making + // the vertical text the original way up. Otherwise use 90 degrees + // clockwise. + if (rerotate_.x() == 0.0f) + block_rotation = rerotate_; + else + block_rotation = FCOORD(0.0f, -1.0f); + block->rotate(block_rotation); + classify_rotation = FCOORD(1.0f, 0.0f); + } + block_rotation.rotate(rotation_); + // block_rotation is now what we have done to the blocks. Now do the same + // thing to the blobs, but save the inverse rotation in the block. + FCOORD blob_rotation(block_rotation); block_rotation.set_y(-block_rotation.y()); - block->poly_block()->rotate(block_rotation); - // The final stored inverse coordinate rotation (block->re_rotation_) - // is the sum of rerotate_ (for gross vertical pages) and the current - // block->re_rotation_ (for small vertical text regions). - // We will execute the inverse of that on all the blobs. - FCOORD blob_rotation = block->re_rotation(); - blob_rotation.rotate(rerotate_); - block->set_re_rotation(blob_rotation); - blob_rotation.set_y(-blob_rotation.y()); - // TODO(rays) determine classify rotation by orientation detection. - // In the mean time, it works for Chinese and English photo credits - // to set a classify rotation to the stored block rerotation only if - // the block rotation to do (before skew) is 0. - if (block_rotation.y() == 0.0f) { - block->set_classify_rotation(block->re_rotation()); + block->set_re_rotation(block_rotation); + block->set_classify_rotation(classify_rotation); + if (textord_debug_tabfind) { + tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:", + block_index, block->poly_block()->isA(), + block->re_rotation().x(), block->re_rotation().y(), + classify_rotation.x(), classify_rotation.y()); } - // Blocks must also be rotated back by the skew angle. + block->set_index(block_index++); + // Blocks are created on the deskewed blob outlines in TransformToBlocks() + // so we need to reskew them back to page coordinates. block->rotate(reskew_); - // Save the skew in the block. + // Save the skew angle in the block for baseline computations. block->set_skew(reskew_); // Rotate all the blobs if needed and recompute the bounding boxes. // Compute the block median blob width and height as we go. @@ -1815,7 +1841,7 @@ void ColumnFinder::RotateAndReskewBlocks(TO_BLOCK_LIST* blocks) { STATS heights(0, block->bounding_box().height()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); - if (blob_rotation.y() != 0.0f) { + if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) { blob->cblob()->rotate(blob_rotation); } blob->compute_bounding_box(); diff --git a/textord/colfind.h b/textord/colfind.h index 1129cc7a39..16d7d60d13 100644 --- a/textord/colfind.h +++ b/textord/colfind.h @@ -21,9 +21,8 @@ #define TESSERACT_TEXTORD_COLFIND_H__ #include "tabfind.h" -#include "tablefind.h" #include "imagefind.h" -#include "colpartition.h" +#include "colpartitiongrid.h" #include "colpartitionset.h" #include "ocrblock.h" @@ -36,6 +35,8 @@ struct Pixa; namespace tesseract { +extern BOOL_VAR_H(textord_tabfind_find_tables, false, "run table detection"); + class StrokeWidth; class LineSpacing; class TempColumn_LIST; @@ -54,27 +55,66 @@ class ColumnFinder : public TabFind { // vlines is a (possibly empty) list of TabVector and vertical_x and y are // the sum logical vertical vector produced by LineFinder::FindVerticalLines. ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright, - TabVector_LIST* vlines, TabVector_LIST* hlines, + int resolution, TabVector_LIST* vlines, TabVector_LIST* hlines, int vertical_x, int vertical_y); virtual ~ColumnFinder(); + // ====================================================================== + // The main function of ColumnFinder is broken into pieces to facilitate + // optional insertion of orientation and script detection in an efficient + // way. The calling sequence IS MANDATORY however, whether or not + // OSD is being used: + // 1. Construction. + // 2. IsVerticallyAlignedText. + // 3. CorrectOrientation. + // 4. FindBlocks. + // 5. Destruction. Use of a single column finder for multiple images does not + // make sense. + // TODO(rays) break up column finder further into smaller classes, as + // there is a lot more to it than column finding now. + // ====================================================================== + + // Tests for vertical alignment of text (returning true if so), and + // generates a list of blobs for orientation and script detection. Note that + // the vertical alignment may be due to text whose writing direction is + // vertical, like say Japanese, or due to text whose writing direction is + // horizontal but whose text appears vertically aligned because the image is + // not the right way up. + bool IsVerticallyAlignedText(TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs); + + // Rotates the blobs and the TabVectors so that the gross writing direction + // (text lines) are horizontal and lines are read down the page. + // Applied rotation stored in rotation_. + // A second rotation is calculated for application during recognition to + // make the rotated blobs upright for recognition. + // Subsequent rotation stored in text_rotation_. + // + // Arguments: + // vertical_text_lines is true if the text lines are vertical. + // recognition_rotation [0..3] is the number of anti-clockwise 90 degree + // rotations from osd required for the text to be upright and readable. + void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines, + int recognition_rotation); + // Finds the text and image blocks, returning them in the blocks and to_blocks // lists. (Each TO_BLOCK points to the basic BLOCK and adds more information.) // If boxa and pixa are not NULL, they are assumed to be the output of // ImageFinder::FindImages, and are used to generate image blocks. // The input boxa and pixa are destroyed. - // Imageheight and resolution should be the pixel height and resolution in - // pixels per inch of the original image. + // Imageheight should be the pixel height of the original image. // The input block is the result of a call to find_components, and contains // the blobs found in the image. These blobs will be removed and placed // in the output blocks, while unused ones will be deleted. // If single_column is true, the input is treated as single column, but // it is still divided into blocks of equal line spacing/text size. // Returns -1 if the user requested retry with more debug info. - int FindBlocks(int imageheight, int resolution, bool single_column, + int FindBlocks(bool single_column, int imageheight, TO_BLOCK* block, Boxa* boxa, Pixa* pixa, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); + // Get the rotation required to deskew, and its inverse rotation. + void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew); + private: // Displays the blob and block bounding boxes in a window called Blocks. void DisplayBlocks(BLOCK_LIST* blocks); @@ -167,26 +207,28 @@ class ColumnFinder : public TabFind { bool BiggestUnassignedRange(const bool* any_columns_possible, int* start, int* end); // Finds the modal compatible column_set_ index within the given range. - int RangeModalColumnSet(bool** possible_column_sets, + int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs, int start, int end); // Given that there are many column_set_id compatible columns in the range, // shrinks the range to the longest contiguous run of compatibility, allowing // gaps where no columns are possible, but not where competing columns are // possible. - void ShrinkRangeToLongestRun(bool** possible_column_sets, - const bool* any_columns_possible, - int column_set_id, - int* best_start, int* best_end); + void ShrinkRangeToLongestRun(int** column_set_costs, + const int* assigned_costs, + const bool* any_columns_possible, + int column_set_id, + int* best_start, int* best_end); // Moves start in the direction of step, upto, but not including end while // the only incompatible regions are no more than kMaxIncompatibleColumnCount // in size, and the compatible regions beyond are bigger. - void ExtendRangePastSmallGaps(bool** possible_column_sets, + void ExtendRangePastSmallGaps(int** column_set_costs, + const int* assigned_costs, const bool* any_columns_possible, int column_set_id, int step, int end, int* start); // Assigns the given column_set_id to the part_sets_ in the given range. void AssignColumnToRange(int column_set_id, int start, int end, - bool** assigned_column_sets); + int** column_set_costs, int* assigned_costs); // Computes the mean_column_gap_. void ComputeMeanColumnGap(); @@ -209,151 +251,15 @@ class ColumnFinder : public TabFind { void GridInsertUnknowns(); // Add horizontal line separators as partitions. void GridInsertHLinePartitions(); - // Improves the margins of the ColPartitions in the grid by calling - // FindPartitionMargins on each. - void GridFindMargins(); - // Improves the margins of the ColPartitions in the list by calling - // FindPartitionMargins on each. - void ListFindMargins(ColPartition_LIST* parts); - // Improves the margins of the ColPartition by searching for - // neighbours that vertically overlap significantly. - void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part); - // Starting at x, and going in the specified direction, upto x_limit, finds - // the margin for the given y range by searching sideways, - // and ignoring not_this. - int FindMargin(int x, bool right_to_left, int x_limit, - int y_bottom, int y_top, const ColPartition* not_this); + // Add vertical line separators as partitions. + void GridInsertVLinePartitions(); // For every ColPartition in the grid, sets its type based on position // in the columns. void SetPartitionTypes(); - - //////// Functions that manipulate ColPartitions in the part_grid_ ///// - //////// to find chains of partner partitions of the same type. /////// - - // For every ColPartition in the grid, finds its upper and lower neighbours. - void FindPartitionPartners(); - // Finds the best partner in the given direction for the given partition. - // Stores the result with AddPartner. - void FindPartitionPartners(bool upper, ColPartition* part); - // For every ColPartition with multiple partners in the grid, reduces the - // number of partners to 0 or 1. - void RefinePartitionPartners(); // Only images remain with multiple types in a run of partners. // Sets the type of all in the group to the maximum of the group. void SmoothPartnerRuns(); - //////// Functions that manipulate ColPartitions in the part_grid_ ///// - //////// to find tables. /////// - - // Copy cleaned partitions from part_grid_ to clean_part_grid_ and - // insert dot-like noise into period_grid_ - void GetCleanPartitions(TO_BLOCK* block); - - // High level function to perform table detection - void LocateTables(); - - // Get Column segments from best_columns_ - void GetColumnBlocks(ColSegment_LIST *col_segments); - - // Group Column segments into consecutive single column regions. - void GroupColumnBlocks(ColSegment_LIST *current_segments, - ColSegment_LIST *col_segments); - - // Check if two boxes are consecutive within the same column - bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2); - - // Set left, right and top, bottom spacings of each colpartition. - // Left/right spacings are w.r.t the column boundaries - // Top/bottom spacings are w.r.t. previous and next colpartitions - void SetPartitionSpacings(); - - // Set spacing and closest neighbors above and below a given colpartition. - void SetVerticalSpacing(ColPartition* part); - - // Set global spacing estimates - void SetGlobalSpacings(); - - // Mark partitions as table rows/cells. - void GridMarkTablePartitions(); - - // Check if the partition has at lease one large gap between words or no - // significant gap at all - bool HasWideOrNoInterWordGap(ColPartition* part); - - // Check if a period lies in the inter-wrod gap in the parition boxes - bool LiesInGap(BLOBNBOX* period, BLOBNBOX_CLIST* boxes); - - // Filter individual text partitions marked as table partitions - // consisting of paragraph endings, small section headings, and - // headers and footers. - void FilterFalseAlarms(); - - // Mark all ColPartitions as table cells that have a table cell above - // and below them - void SmoothTablePartitionRuns(); - - // Set the ratio of candidate table partitions in each column - void SetColumnsType(ColSegment_LIST* col_segments); - - // Move Column Blocks to col_seg_grid_ - void MoveColSegmentsToGrid(ColSegment_LIST *segments, - ColSegmentGrid *col_seg_grid); - - // Merge Column Blocks that were split due to the presence of a table - void GridMergeColumnBlocks(); - - // Merge table cells into table columns - void GetTableColumns(ColSegment_LIST *table_columns); - - // Get Column segments from best_columns_ - void GetTableRegions(ColSegment_LIST *table_columns, - ColSegment_LIST *table_regions); - - // Merge table regions corresponding to tables spanning multiple columns - void GridMergeTableRegions(); - bool BelongToOneTable(const TBOX &box1, const TBOX &box2); - - // Adjust table boundaries by building a tight bounding box around all - // ColPartitions contained in it. - void AdjustTableBoundaries(); - - // Checks whether the horizontal line belong to the table by looking at the - // side spacing of extra ColParitions that will be included in the table - // due to expansion - bool HLineBelongsToTable(ColPartition* part, const TBOX& table_box); - - // Look for isolated column headers above the given table box and - // include them in the table - void IncludeLeftOutColumnHeaders(TBOX& table_box); - - // Remove false alarms consiting of a single column - void DeleteSingleColumnTables(); - - // Return true if at least one gap larger than the global x-height - // exists in the horizontal projection - bool GapInXProjection(int* xprojection, int length); - - // Displays Colpartitions marked as table row. Overlays them on top of - // part_grid_. - void DisplayColSegments(ColSegment_LIST *cols, ScrollView* win, - ScrollView::Color color); - - // Displays the colpartitions using a new coloring on an existing window. - // Note: This method is only for debug purpose during development and - // would not be part of checked in code - void DisplayColPartitions(ScrollView* win, - ScrollView::Color color); - - // Write ColParitions and Tables to a PIX image - // Note: This method is only for debug purpose during development and - // would not be part of checked in code - void WriteToPix(); - - // Merge all colpartitions in table regions to make them a single - // colpartition and revert types of isolated table cells not - // assigned to any table to their original types. - void MakeTableBlocks(); - //////// Functions that make the final output blocks /////// // Helper functions for TransformToBlocks. @@ -366,10 +272,15 @@ class ColumnFinder : public TabFind { // Transform the grid of partitions to the output blocks. void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); - // Reskew the completed blocks to put them back to the original coords. - // (Blob outlines are not corrected for skew.) - // Rotate blobs and blocks individually so text line direction is - // horizontal. Record appropriate inverse transformations and required + // Undo the deskew that was done in FindTabVectors, as recognition is done + // without correcting blobs or blob outlines for skew. + // Reskew the completed blocks to put them back to the original rotated coords + // that were created by CorrectOrientation. + // Blocks that were identified as vertical text (relative to the rotated + // coordinates) are further rotated so the text lines are horizontal. + // blob polygonal outlines are rotated to match the position of the blocks + // that they are in, and their bounding boxes are recalculated to be accurate. + // Record appropriate inverse transformations and required // classifier transformation in the blocks. void RotateAndReskewBlocks(TO_BLOCK_LIST* to_blocks); @@ -378,16 +289,22 @@ class ColumnFinder : public TabFind { // the block from the to_blocks list that contains them. void MoveSmallBlobs(BLOBNBOX_LIST* bblobs, TO_BLOCK_LIST* to_blocks); + // The minimum gutter width to apply for finding columns. + // Modified when vertical text is detected to prevent detection of + // vertical text lines as columns. + int min_gutter_width_; // The mean gap between columns over the page. int mean_column_gap_; - // Estimate of median x-height over the page - int global_median_xheight_; - // Estimate of median ledding on the page - int global_median_ledding_; + // The rotation vector needed to convert original coords to deskewed. + FCOORD deskew_; // The rotation vector needed to convert deskewed back to original coords. FCOORD reskew_; + // The rotation vector used to rotate vertically oriented pages. + FCOORD rotation_; // The rotation vector needed to convert the rotated back to original coords. FCOORD rerotate_; + // The additional rotation vector needed to rotate text for recognition. + FCOORD text_rotation_; // The part_sets_ are the initial text-line-like partition of the grid, // and is a vector of ColPartitionSets. PartSetVector part_sets_; @@ -397,20 +314,10 @@ class ColumnFinder : public TabFind { // A simple array of pointers to the best assigned column division at // each grid y coordinate. ColPartitionSet** best_columns_; + // The grid used for creating initial partitions with strokewidth. + StrokeWidth* stroke_width_; // The grid used to hold ColPartitions after the columns have been determined. ColPartitionGrid part_grid_; - // Grid to hold cleaned colpartitions after removing all - // colpartitions that consist of only noise blobs, and removing - // noise blobs from remaining colpartitions. - ColPartitionGrid clean_part_grid_; - // Grid to hold periods, commas, i-dots etc. - BBGrid period_grid_; - // List of period blobs extracted from small and noise blobs - BLOBNBOX_LIST period_blobs_; - // Grid of page column blocks - ColSegmentGrid col_seg_grid_; - // Grid of detected tables - ColSegmentGrid table_grid_; // List of ColPartitions that are no longer needed after they have been // turned into regions, but are kept around because they are referenced // by the part_grid_. diff --git a/textord/colpartition.cpp b/textord/colpartition.cpp index 6d921c5103..469f857883 100644 --- a/textord/colpartition.cpp +++ b/textord/colpartition.cpp @@ -19,7 +19,9 @@ /////////////////////////////////////////////////////////////////////// #include "colpartition.h" +#include "colpartitiongrid.h" #include "colpartitionset.h" +#include "dppoint.h" #include "workingpartset.h" #ifdef _MSC_VER @@ -43,22 +45,33 @@ const double kMaxSpacingDrift = 1.0 / 72; // 1/72 is one point. const double kMaxTopSpacingFraction = 0.25; // Maximum ratio of sizes for lines to be considered the same size. const double kMaxSizeRatio = 1.5; +// Fraction of max of leader width and gap for max IQR of gaps. +const double kMaxLeaderGapFractionOfMax = 0.25; +// Fraction of min of leader width and gap for max IQR of gaps. +const double kMaxLeaderGapFractionOfMin = 0.5; +// Minimum number of blobs to be considered a leader. +const int kMinLeaderCount = 5; +// Cost of a cut through a leader. +const int kLeaderCutCost = 8; // blob_type is the blob_region_type_ of the blobs in this partition. // Vertical is the direction of logical vertical on the possibly skewed image. ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical) : left_margin_(MIN_INT32), right_margin_(MAX_INT32), median_bottom_(MAX_INT32), median_top_(MIN_INT32), median_size_(0), - blob_type_(blob_type), + median_left_(MAX_INT32), median_right_(MIN_INT32), median_width_(0), + blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0), good_width_(false), good_column_(false), left_key_tab_(false), right_key_tab_(false), left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical), working_set_(NULL), block_owned_(false), + desperately_merged_(false), first_column_(-1), last_column_(-1), column_set_(NULL), side_step_(0), top_spacing_(0), bottom_spacing_(0), type_before_table_(PT_UNKNOWN), inside_table_column_(false), nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL), - space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0) { + space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0), + owns_blobs_(true) { } // Constructs a fake ColPartition with a single fake BLOBNBOX, all made @@ -88,25 +101,21 @@ ColPartition::~ColPartition() { } } -// Constructs a fake ColPartition with no BLOBNBOXes. -// Used for making horizontal line ColPartitions and types it accordingly. -ColPartition::ColPartition(const ICOORD& vertical, - int left, int bottom, int right, int top) - : left_margin_(MIN_INT32), right_margin_(MAX_INT32), - bounding_box_(left, bottom, right, top), - median_bottom_(bottom), median_top_(top), median_size_(top - bottom), - blob_type_(BRT_HLINE), - good_width_(false), good_column_(false), - left_key_tab_(false), right_key_tab_(false), - type_(PT_UNKNOWN), vertical_(vertical), - working_set_(NULL), block_owned_(false), - first_column_(-1), last_column_(-1), column_set_(NULL), - side_step_(0), top_spacing_(0), bottom_spacing_(0), - type_before_table_(PT_UNKNOWN), inside_table_column_(false), - nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL), - space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0) { - left_key_ = BoxLeftKey(); - right_key_ = BoxRightKey(); +// Constructs a fake ColPartition with no BLOBNBOXes to represent a +// horizontal or vertical line, given a type and a bounding box. +ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type, + const ICOORD& vertical, + int left, int bottom, + int right, int top) { + ColPartition* part = new ColPartition(blob_type, vertical); + part->bounding_box_ = TBOX(left, bottom, right, top); + part->median_bottom_ = bottom; + part->median_top_ = top; + part->median_size_ = top - bottom; + part->median_width_ = right - left; + part->left_key_ = part->BoxLeftKey(); + part->right_key_ = part->BoxRightKey(); + return part; } @@ -114,7 +123,10 @@ ColPartition::ColPartition(const ICOORD& vertical, // The list of boxes in the partition is updated, ensuring that no box is // recorded twice, and the boxes are kept in increasing left position. void ColPartition::AddBox(BLOBNBOX* bbox) { - boxes_.add_sorted(SortByBoxLeft, true, bbox); + if (IsVerticalType()) + boxes_.add_sorted(SortByBoxBottom, true, bbox); + else + boxes_.add_sorted(SortByBoxLeft, true, bbox); TBOX box = bbox->bounding_box(); // Update the partition limits. bounding_box_ += box; @@ -128,6 +140,50 @@ void ColPartition::AddBox(BLOBNBOX* bbox) { bounding_box_.left(), bounding_box_.right()); } +// Removes the given box from the partition, updating the bounds. +void ColPartition::RemoveBox(BLOBNBOX* box) { + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + if (box == bb_it.data()) { + bb_it.extract(); + ComputeLimits(); + return; + } + } +} + +// Returns the tallest box in the partition, as measured perpendicular to the +// presumed flow of text. +BLOBNBOX* ColPartition::BiggestBox() { + BLOBNBOX* biggest = NULL; + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + BLOBNBOX* bbox = bb_it.data(); + if (IsVerticalType()) { + if (biggest == NULL || + bbox->bounding_box().width() > biggest->bounding_box().width()) + biggest = bbox; + } else { + if (biggest == NULL || + bbox->bounding_box().height() > biggest->bounding_box().height()) + biggest = bbox; + } + } + return biggest; +} + +// Returns the bounding box excluding the given box. +TBOX ColPartition::BoundsWithoutBox(BLOBNBOX* box) { + TBOX result; + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + if (box != bb_it.data()) { + result += bb_it.data()->bounding_box(); + } + } + return result; +} + // Claims the boxes in the boxes_list by marking them with a this owner // pointer. If a box is already owned, then run Unique on it. void ColPartition::ClaimBoxes(WidthCallback* cb) { @@ -154,7 +210,8 @@ void ColPartition::ClaimBoxes(WidthCallback* cb) { BLOBNBOX_C_IT other_it(&other->boxes_); for (other_it.mark_cycle_pt(); !other_it.cycled_list(); other_it.forward()) { - ASSERT_HOST(other_it.data()->owner() == other); + ASSERT_HOST(other_it.data()->owner() == other || + other_it.data()->owner() == NULL); other_it.data()->set_owner(NULL); } Unique(other, cb); @@ -176,6 +233,17 @@ void ColPartition::ClaimBoxes(WidthCallback* cb) { } while (!completed); } +// NULL the owner of the blobs in this partition, so they can be deleted +// independently of the ColPartition. +void ColPartition::DisownBoxes() { + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + BLOBNBOX* bblob = bb_it.data(); + ASSERT_HOST(bblob->owner() == this || bblob->owner() == NULL); + bblob->set_owner(NULL); + } +} + // Delete the boxes that this partition owns. void ColPartition::DeleteBoxes() { // Although the boxes_ list is a C_LIST, in some cases it owns the @@ -233,6 +301,94 @@ bool ColPartition::MatchingColumns(const ColPartition& other) const { return true; } +// Returns true if the sizes match for two text partitions, +// taking orientation into account. See also SizesSimilar. +bool ColPartition::MatchingSizes(const ColPartition& other) const { + if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT) + return !TabFind::DifferentSizes(median_width_, other.median_width_); + else + return !TabFind::DifferentSizes(median_size_, other.median_size_); +} + +// Returns true if there is no tabstop violation in merging this and other. +bool ColPartition::ConfirmNoTabViolation(const ColPartition& other) const { + if (bounding_box_.right() < other.bounding_box_.left() && + bounding_box_.right() < other.LeftBlobRule()) + return false; + if (other.bounding_box_.right() < bounding_box_.left() && + other.bounding_box_.right() < LeftBlobRule()) + return false; + if (bounding_box_.left() > other.bounding_box_.right() && + bounding_box_.left() > other.RightBlobRule()) + return false; + if (other.bounding_box_.left() > bounding_box_.right() && + other.bounding_box_.left() > RightBlobRule()) + return false; + return true; +} + +// Returns true if other has a similar stroke width to this. +bool ColPartition::MatchingStrokeWidth(const ColPartition& other, + double fractional_tolerance, + double constant_tolerance) const { + int match_count = 0; + int nonmatch_count = 0; + BLOBNBOX_C_IT box_it(const_cast(&boxes_)); + BLOBNBOX_C_IT other_it(const_cast(&other.boxes_)); + box_it.mark_cycle_pt(); + other_it.mark_cycle_pt(); + while (!box_it.cycled_list() && !other_it.cycled_list()) { + if (box_it.data()->MatchingStrokeWidth(*other_it.data(), + fractional_tolerance, + constant_tolerance)) + ++match_count; + else + ++nonmatch_count; + box_it.forward(); + other_it.forward(); + } + return match_count > nonmatch_count; +} + +// Returns true if base is an acceptable diacritic base char merge +// with this as the diacritic. +// Returns true if: +// (1) this is a ColPartition containing only diacritics, and +// (2) the base characters indicated on the diacritics all believably lie +// within the text line of the candidate ColPartition. +bool ColPartition::OKDiacriticMerge(const ColPartition& candidate, + bool debug) const { + BLOBNBOX_C_IT it(const_cast(&boxes_)); + int min_top = MAX_INT32; + int max_bottom = MIN_INT32; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + if (!blob->IsDiacritic()) { + if (debug) { + tprintf("Blob is not a diacritic:"); + blob->bounding_box().print(); + } + return false; // All blobs must have diacritic bases. + } + if (blob->base_char_top() < min_top) + min_top = blob->base_char_top(); + if (blob->base_char_bottom() > max_bottom) + max_bottom = blob->base_char_bottom(); + } + // If the intersection of all vertical ranges of all base characters + // overlaps the median range of this, then it is OK. + bool result = min_top > candidate.median_bottom_ && + max_bottom < candidate.median_top_; + if (debug) { + if (result) + tprintf("OKDiacritic!\n"); + else + tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", + max_bottom, min_top, median_bottom_, median_top_); + } + return result; +} + // Sets the sort key using either the tab vector, or the bounding box if // the tab vector is NULL. If the tab_vector lies inside the bounding_box, // use the edge of the box as a key any way. @@ -286,6 +442,18 @@ void ColPartition::CopyRightTab(const ColPartition& src, bool take_box) { right_margin_ = src.right_margin_; } +// Returns the left rule line x coord of the leftmost blob. +int ColPartition::LeftBlobRule() const { + BLOBNBOX_C_IT it(const_cast(&boxes_)); + return it.data()->left_rule(); +} +// Returns the right rule line x coord of the rightmost blob. +int ColPartition::RightBlobRule() const { + BLOBNBOX_C_IT it(const_cast(&boxes_)); + it.move_to_last(); + return it.data()->right_rule(); +} + // Add a partner above if upper, otherwise below. // Add them uniquely and keep the list sorted by box left. // Partnerships are added symmetrically to partner and this. @@ -325,6 +493,11 @@ ColPartition* ColPartition::SingletonPartner(bool upper) { // Merge with the other partition and delete it. void ColPartition::Absorb(ColPartition* other, WidthCallback* cb) { + // The result has to either own all of the blobs or none of them. + // Verify the flag is consisent. + ASSERT_HOST(owns_blobs() == other->owns_blobs()); + // TODO(nbeato): check owns_blobs better. Right now owns_blobs + // should always be true when this is called. So there is no issues. if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()) || TabFind::WithinTestRegion(2, other->bounding_box_.left(), @@ -339,6 +512,10 @@ void ColPartition::Absorb(ColPartition* other, WidthCallback* cb) { for (; !it2.empty(); it2.forward()) { BLOBNBOX* bbox2 = it2.extract(); ColPartition* prev_owner = bbox2->owner(); + if (prev_owner != other && prev_owner != NULL) { + // A blob on other's list is owned by someone else; let them have it. + continue; + } ASSERT_HOST(prev_owner == other || prev_owner == NULL); if (prev_owner == other) bbox2->set_owner(this); @@ -369,6 +546,30 @@ void ColPartition::Absorb(ColPartition* other, WidthCallback* cb) { right_key_ = other->right_key_; right_key_tab_ = other->right_key_tab_; } + // Combine the flow and blob_type in a sensible way. + // Dominant flows stay. + // TODO(rays) fix the asymmetry and factor out the dominance function. + // if DominatesInMerge(other->flow_, flow_) { + if (other->flow_ == BTFT_NONTEXT || + (other->flow_ > flow_ && other->flow_ != BTFT_LEADER)) { + flow_ = other->flow_; + blob_type_ = other->blob_type_; + } + SetBlobTypes(); + // Fix partner lists. other is going away, so remove it as a + // partner of all its partners and add this in its place. + for (int upper = 0; upper < 2; ++upper) { + ColPartition_CLIST* partners = upper + ? &other->upper_partners_ + : &other->lower_partners_; + for (ColPartition_C_IT part_it(partners); !part_it.empty(); + part_it.forward()) { + ColPartition* partner = part_it.extract(); + partner->RemovePartner(!upper, other); + partner->RemovePartner(!upper, this); + partner->AddPartner(!upper, this); + } + } delete other; ComputeLimits(); if (cb != NULL) { @@ -486,21 +687,116 @@ bool ColPartition::Unique(ColPartition* other, WidthCallback* cb) { return any_moved; } +// Merge1 and merge2 are candidates to be merged, yet their combined box +// overlaps this. Is that allowed? +// Returns true if the overlap between this and the merged pair of +// merge candidates is sufficiently trivial to be allowed. +// The merged box can graze the edge of this by the ok_box_overlap +// if that exceeds the margin to the median top and bottom. +// ok_box_overlap should be set by the caller appropriate to the sizes of +// the text involved, and is usually a fraction of the median size of merge1 +// and/or merge2, or this. +// TODO(rays) Determine whether vertical text needs to be considered. +bool ColPartition::OKMergeOverlap(const ColPartition& merge1, + const ColPartition& merge2, + int ok_box_overlap, bool debug) { + // Vertical partitions are not allowed to be involved. + if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) { + if (debug) + tprintf("Vertical partition\n"); + return false; + } + // The merging partitions must strongly overlap each other. + if (!merge1.VOverlaps(merge2)) { + if (debug) + tprintf("Voverlap %d (%d)\n", + merge1.VOverlap(merge2), merge1.VOverlaps(merge2)); + return false; + } + // The merged box must not overlap the median bounds of this. + TBOX merged_box(merge1.bounding_box()); + merged_box += merge2.bounding_box(); + if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ && + merged_box.bottom() < bounding_box_.top() - ok_box_overlap && + merged_box.top() > bounding_box_.bottom() + ok_box_overlap) { + if (debug) + tprintf("Excessive box overlap\n"); + return false; + } + // Looks OK! + return true; +} + +// Find the blob at which to split this to minimize the overlap with the +// given box. Returns the first blob to go in the second partition. +BLOBNBOX* ColPartition::OverlapSplitBlob(const TBOX& box) { + if (boxes_.empty() || boxes_.singleton()) + return NULL; + BLOBNBOX_C_IT it(&boxes_); + TBOX left_box(it.data()->bounding_box()); + for (it.forward(); !it.at_first(); it.forward()) { + BLOBNBOX* bbox = it.data(); + left_box += bbox->bounding_box(); + if (left_box.overlap(box)) + return bbox; + } + return NULL; +} + +// Split this partition keeping the first half in this and returning +// the second half. +// Splits by putting the split_blob and the blobs that follow +// in the second half, and the rest in the first half. +ColPartition* ColPartition::SplitAtBlob(BLOBNBOX* split_blob) { + ColPartition* split_part = ShallowCopy(); + split_part->set_owns_blobs(owns_blobs()); + BLOBNBOX_C_IT it(&boxes_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + ColPartition* prev_owner = bbox->owner(); + ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL); + if (bbox == split_blob || !split_part->boxes_.empty()) { + split_part->AddBox(it.extract()); + if (owns_blobs() && prev_owner != NULL) + bbox->set_owner(split_part); + } + } + ASSERT_HOST(!it.empty()); + if (split_part->IsEmpty()) { + // Split part ended up with nothing. Possible if split_blob is not + // in the list of blobs. + delete split_part; + return NULL; + } + right_key_tab_ = false; + split_part->left_key_tab_ = false; + ComputeLimits(); + // TODO(nbeato) Merge Ray's CL like this: + // if (owns_blobs()) + // SetBlobTextlineGoodness(); + split_part->ComputeLimits(); + // TODO(nbeato) Merge Ray's CL like this: + // if (split_part->owns_blobs()) + // split_part->SetBlobTextlineGoodness(); + return split_part; +} + // Split this partition at the given x coordinate, returning the right // half and keeping the left half in this. ColPartition* ColPartition::SplitAt(int split_x) { if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right()) return NULL; // There will be no change. ColPartition* split_part = ShallowCopy(); + split_part->set_owns_blobs(owns_blobs()); BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); ColPartition* prev_owner = bbox->owner(); - ASSERT_HOST(prev_owner == this || prev_owner == NULL); + ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL); const TBOX& box = bbox->bounding_box(); if (box.left() >= split_x) { split_part->AddBox(it.extract()); - if (prev_owner != NULL) + if (owns_blobs() && prev_owner != NULL) bbox->set_owner(split_part); } } @@ -525,6 +821,7 @@ void ColPartition::ComputeLimits() { bounding_box_ = TBOX(); // Clear it BLOBNBOX_C_IT it(&boxes_); BLOBNBOX* bbox = NULL; + int non_leader_count = 0; if (it.empty()) { bounding_box_.set_left(left_margin_); bounding_box_.set_right(right_margin_); @@ -534,6 +831,8 @@ void ColPartition::ComputeLimits() { for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); bounding_box_ += bbox->bounding_box(); + if (bbox->flow() != BTFT_LEADER) + ++non_leader_count; } } if (!left_key_tab_) @@ -552,19 +851,41 @@ void ColPartition::ComputeLimits() { } if (it.empty()) return; - STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1); - STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1); - STATS size_stats(0, bounding_box_.height() + 1); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - bbox = it.data(); - TBOX box = bbox->bounding_box(); - top_stats.add(box.top(), 1); - bottom_stats.add(box.bottom(), 1); - size_stats.add(box.height(), 1); + if (IsImageType() || blob_type() == BRT_RECTIMAGE || + blob_type() == BRT_POLYIMAGE) { + median_top_ = bounding_box_.top(); + median_bottom_ = bounding_box_.bottom(); + median_size_ = bounding_box_.height(); + median_left_ = bounding_box_.left(); + median_right_ = bounding_box_.right(); + median_width_ = bounding_box_.width(); + } else { + STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1); + STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1); + STATS size_stats(0, bounding_box_.height() + 1); + STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1); + STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1); + STATS width_stats(0, bounding_box_.width() + 1); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + bbox = it.data(); + if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) { + TBOX box = bbox->bounding_box(); + int area = box.area(); + top_stats.add(box.top(), area); + bottom_stats.add(box.bottom(), area); + size_stats.add(box.height(), area); + left_stats.add(box.left(), area); + right_stats.add(box.right(), area); + width_stats.add(box.width(), area); + } + } + median_top_ = static_cast(top_stats.median() + 0.5); + median_bottom_ = static_cast(bottom_stats.median() + 0.5); + median_size_ = static_cast(size_stats.median() + 0.5); + median_left_ = static_cast(left_stats.median() + 0.5); + median_right_ = static_cast(right_stats.median() + 0.5); + median_width_ = static_cast(width_stats.median() + 0.5); } - median_top_ = static_cast(top_stats.median() + 0.5); - median_bottom_ = static_cast(bottom_stats.median() + 0.5); - median_size_ = static_cast(size_stats.median() + 0.5); if (right_margin_ < bounding_box_.right() && textord_debug_bugs) { tprintf("Made partition with bad right coords"); @@ -574,6 +895,19 @@ void ColPartition::ComputeLimits() { tprintf("Made partition with bad left coords"); Print(); } + // Fix partner lists. The bounding box has changed and partners are stored + // in bounding box order, so remove and reinsert this as a partner + // of all its partners. + for (int upper = 0; upper < 2; ++upper) { + ColPartition_CLIST partners; + ColPartition_C_IT part_it(&partners); + part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_); + for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { + ColPartition* partner = part_it.extract(); + partner->RemovePartner(!upper, this); + partner->AddPartner(!upper, this); + } + } if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) { tprintf("Recomputed box for partition %p\n", this); @@ -581,19 +915,31 @@ void ColPartition::ComputeLimits() { } } +// Returns the number of boxes that overlap the given box. +int ColPartition::CountOverlappingBoxes(const TBOX& box) { + BLOBNBOX_C_IT it(&boxes_); + int overlap_count = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + if (box.overlap(bbox->bounding_box())) + ++overlap_count; + } + return overlap_count; +} + // Computes and sets the type_ and first_colum_, last_column_ and column_set_. -void ColPartition::SetPartitionType(ColPartitionSet* columns) { +// resolution refers to the ppi resolution of the image. +void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) { int first_spanned_col = -1; - int last_spanned_col = -1; - type_ = columns->SpanningType(blob_type_, - bounding_box_.left(), bounding_box_.right(), - MidY(), left_margin_, right_margin_, - &first_column_, &last_column_, - &first_spanned_col, &last_spanned_col); + ColumnSpanningType span_type = + columns->SpanningType(resolution, + bounding_box_.left(), bounding_box_.right(), + MidY(), left_margin_, right_margin_, + &first_column_, &last_column_, + &first_spanned_col); column_set_ = columns; - if (first_column_ != last_column_ && - (type_ == PT_PULLOUT_TEXT || type_ == PT_PULLOUT_IMAGE || - type_ == PT_PULLOUT_LINE)) { + if (first_column_ < last_column_ && span_type == CST_PULLOUT && + !IsLineType()) { // Unequal columns may indicate that the pullout spans one of the columns // it lies in, so force it to be allocated to just that column. if (first_spanned_col >= 0) { @@ -608,18 +954,73 @@ void ColPartition::SetPartitionType(ColPartitionSet* columns) { first_column_ = last_column_ = (first_column_ + last_column_) / 2; } } + type_ = PartitionType(span_type); +} + +// Returns the PartitionType from the current BlobRegionType and a column +// flow spanning type ColumnSpanningType, generated by +// ColPartitionSet::SpanningType, that indicates how the partition sits +// in the columns. +PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const { + if (flow == CST_NOISE) { + if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE && + blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) + return PT_NOISE; + flow = CST_FLOWING; + } + + switch (blob_type_) { + case BRT_NOISE: + return PT_NOISE; + case BRT_HLINE: + return PT_HORZ_LINE; + case BRT_VLINE: + return PT_VERT_LINE; + case BRT_RECTIMAGE: + case BRT_POLYIMAGE: + case BRT_UNKNOWN: + switch (flow) { + case CST_FLOWING: + return PT_FLOWING_IMAGE; + case CST_HEADING: + return PT_HEADING_IMAGE; + case CST_PULLOUT: + return PT_PULLOUT_IMAGE; + default: + ASSERT_HOST(!"Undefined flow type for image!"); + } + break; + case BRT_VERT_TEXT: + return PT_VERTICAL_TEXT; + case BRT_TEXT: + default: + switch (flow) { + case CST_FLOWING: + return PT_FLOWING_TEXT; + case CST_HEADING: + return PT_HEADING_TEXT; + case CST_PULLOUT: + return PT_PULLOUT_TEXT; + default: + ASSERT_HOST(!"Undefined flow type for text!"); + } + } + ASSERT_HOST(!"Should never get here!"); + return PT_NOISE; } // Returns the first and last column touched by this partition. -void ColPartition::ColumnRange(ColPartitionSet* columns, +// resolution refers to the ppi resolution of the image. +void ColPartition::ColumnRange(int resolution, ColPartitionSet* columns, int* first_col, int* last_col) { int first_spanned_col = -1; - int last_spanned_col = -1; - type_ = columns->SpanningType(blob_type_, - bounding_box_.left(), bounding_box_.right(), - MidY(), left_margin_, right_margin_, - first_col, last_col, - &first_spanned_col, &last_spanned_col); + ColumnSpanningType span_type = + columns->SpanningType(resolution, + bounding_box_.left(), bounding_box_.right(), + MidY(), left_margin_, right_margin_, + first_col, last_col, + &first_spanned_col); + type_ = PartitionType(span_type); } // Sets the internal flags good_width_ and good_column_. @@ -630,6 +1031,125 @@ void ColPartition::SetColumnGoodness(WidthCallback* cb) { good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_; } +// Determines whether the blobs in this partition mostly represent +// a leader (fixed pitch sequence) and sets the member blobs accordingly. +// Note that height is assumed to have been tested elsewhere, and that this +// function will find most fixed-pitch text as leader without a height filter. +// Leader detection is limited to sequences of identical width objects, +// such as .... or ----, so patterns, such as .-.-.-.-. will not be found. +bool ColPartition::MarkAsLeaderIfMonospaced() { + bool result = false; + // Gather statistics on the gaps between blobs and the widths of the blobs. + int part_width = bounding_box_.width(); + STATS gap_stats(0, part_width); + STATS width_stats(0, part_width); + BLOBNBOX_C_IT it(&boxes_); + BLOBNBOX* prev_blob = it.data(); + prev_blob->set_flow(BTFT_NEIGHBOURS); + width_stats.add(prev_blob->bounding_box().width(), 1); + int blob_count = 1; + for (it.forward(); !it.at_first(); it.forward()) { + BLOBNBOX* blob = it.data(); + int left = blob->bounding_box().left(); + int right = blob->bounding_box().right(); + gap_stats.add(left - prev_blob->bounding_box().right(), 1); + width_stats.add(right - left, 1); + blob->set_flow(BTFT_NEIGHBOURS); + prev_blob = blob; + ++blob_count; + } + double median_gap = gap_stats.median(); + double median_width = width_stats.median(); + double max_width = MAX(median_gap, median_width); + double min_width = MIN(median_gap, median_width); + double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f); + if (textord_debug_tabfind >= 4) { + tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", + gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax, + min_width * kMaxLeaderGapFractionOfMin); + } + if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax && + gap_iqr < min_width * kMaxLeaderGapFractionOfMin && + blob_count >= kMinLeaderCount) { + // This is stable enough to be called a leader, so check the widths. + // Since leader dashes can join, run a dp cutting algorithm and go + // on the cost. + int offset = static_cast(ceil(gap_iqr * 2)); + int min_step = static_cast(median_gap + median_width + 0.5); + int max_step = min_step + offset; + min_step -= offset; + // Pad the buffer with min_step/2 on each end. + int part_left = bounding_box_.left() - min_step / 2; + part_width += min_step; + DPPoint* projection = new DPPoint[part_width]; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + int left = blob->bounding_box().left(); + int right = blob->bounding_box().right(); + int height = blob->bounding_box().height(); + for (int x = left; x < right; ++x) { + projection[left - part_left].AddLocalCost(height); + } + } + DPPoint* best_end = DPPoint::Solve(min_step, max_step, false, + &DPPoint::CostWithVariance, + part_width, projection); + if (best_end != NULL && best_end->total_cost() < blob_count) { + // Good enough. Call it a leader. + result = true; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + TBOX box = blob->bounding_box(); + // If the first or last blob is spaced too much, don't mark it. + if (it.at_first()) { + int gap = it.data_relative(1)->bounding_box().left() - + blob->bounding_box().right(); + if (blob->bounding_box().width() + gap > max_step) { + it.extract(); + continue; + } + } + if (it.at_last()) { + int gap = blob->bounding_box().left() - + it.data_relative(-1)->bounding_box().right(); + if (blob->bounding_box().width() + gap > max_step) { + it.extract(); + break; + } + } + blob->set_region_type(BRT_TEXT); + blob->set_flow(BTFT_LEADER); + } + blob_type_ = BRT_TEXT; + flow_ = BTFT_LEADER; + } else if (textord_debug_tabfind) { + if (best_end == NULL) { + tprintf("No path\n"); + } else { + tprintf("Total cost = %d vs allowed %d\n", + best_end->total_cost() < blob_count); + } + } + delete [] projection; + } + return result; +} + +// Sets all blobs with the partition blob type and flow, but never overwrite +// leader blobs, as we need to be able to identify them later. +void ColPartition::SetBlobTypes() { + if (!owns_blobs()) + return; + BLOBNBOX_C_IT it(&boxes_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + if (blob->flow() != BTFT_LEADER) + blob->set_flow(flow_); + blob->set_region_type(blob_type_); + ASSERT_HOST(blob->owner() == NULL || blob->owner() == this); + } +} + // Adds this ColPartition to a matching WorkingPartSet if one can be found, // otherwise starts a new one in the appropriate column, ending the previous. void ColPartition::AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright, @@ -869,11 +1389,28 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright, bb_it.forward()) { BLOBNBOX* bblob = bb_it.extract(); ASSERT_HOST(bblob->owner() == part); + // Assert failure here is caused by arbitrarily changing the partition + // type without also changing the blob type, such as in + // InsertSmallBlobsAsUnknowns. ASSERT_HOST(bblob->region_type() >= BRT_UNKNOWN); - C_OUTLINE_IT ol_it(bblob->cblob()->out_list()); - ASSERT_HOST(ol_it.data()->pathlength() > 0); - heights.add(bblob->bounding_box().height(), 1); - blob_it.add_after_then_move(bblob); + C_OUTLINE_LIST* outlines = bblob->cblob()->out_list(); + C_OUTLINE_IT ol_it(outlines); + if (outlines->singleton()) { + ASSERT_HOST(ol_it.data()->pathlength() > 0); + heights.add(bblob->bounding_box().height(), 1); + blob_it.add_after_then_move(bblob); + } else { + // This blob has multiple outlines from CJK repair. + // Explode the blob back into individual outlines. + for (;!ol_it.empty(); ol_it.forward()) { + C_OUTLINE* outline = ol_it.extract(); + BLOBNBOX* blob = BLOBNBOX::RealBlob(outline); + heights.add(blob->bounding_box().height(), 1); + blob_it.add_after_then_move(blob); + } + delete bblob->cblob(); + delete bblob; + } } } used_it.add_to_end(part); @@ -887,13 +1424,8 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright, int block_height = block->bounding_box().height(); if (block_height < line_spacing) line_spacing = block_height; - to_block->line_spacing = line_spacing; - to_block->max_blob_size = block_height + 1; - if (type == PT_VERTICAL_TEXT) { - // This block will get rotated 90 deg clockwise so record the inverse. - FCOORD rotation(0.0f, 1.0f); - block->set_re_rotation(rotation); - } + to_block->line_spacing = static_cast(line_spacing); + to_block->max_blob_size = static_cast(block_height + 1); return to_block; } @@ -907,42 +1439,70 @@ ColPartition* ColPartition::ShallowCopy() const { part->median_bottom_ = median_bottom_; part->median_top_ = median_top_; part->median_size_ = median_size_; + part->median_left_ = median_left_; + part->median_right_ = median_right_; + part->median_width_ = median_width_; part->good_width_ = good_width_; part->good_column_ = good_column_; part->left_key_tab_ = left_key_tab_; part->right_key_tab_ = right_key_tab_; part->type_ = type_; + part->flow_ = flow_; part->left_key_ = left_key_; part->right_key_ = right_key_; + part->first_column_ = first_column_; + part->last_column_ = last_column_; + part->owns_blobs_ = false; return part; } +ColPartition* ColPartition::CopyButDontOwnBlobs() { + ColPartition* copy = ShallowCopy(); + copy->set_owns_blobs(false); + BLOBNBOX_C_IT inserter(copy->boxes()); + BLOBNBOX_C_IT traverser(boxes()); + for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward()) + inserter.add_after_then_move(traverser.data()); + return copy; +} + // Provides a color for BBGrid to draw the rectangle. // Must be kept in sync with PolyBlockType. ScrollView::Color ColPartition::BoxColor() const { + if (type_ == PT_UNKNOWN) + return BLOBNBOX::TextlineColor(blob_type_, flow_); return POLY_BLOCK::ColorForPolyBlockType(type_); } // Keep in sync with BlobRegionType. -static char kBlobTypes[BRT_COUNT + 1] = "NHRIUVT"; +static char kBlobTypes[BRT_COUNT + 1] = "NHSRIUVT"; // Prints debug information on this. -void ColPartition::Print() { +void ColPartition::Print() const { int y = MidY(); - tprintf("ColPart:%c(M%d-%c%d-B%d,%d/%d)->(%dB-%d%c-%dM,%d/%d)" - " w-ok=%d, v-ok=%d, type=%d%c, fc=%d, lc=%d, boxes=%d" + tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" + " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" " ts=%d bs=%d ls=%d rs=%d\n", boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y), - bounding_box_.left(), median_bottom_, bounding_box_.bottom(), + bounding_box_.left(), median_left_, + bounding_box_.bottom(), median_bottom_, bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B', - right_margin_, median_top_, bounding_box_.top(), + right_margin_, median_right_, bounding_box_.top(), median_top_, good_width_, good_column_, type_, - kBlobTypes[blob_type_], + kBlobTypes[blob_type_], flow_, first_column_, last_column_, boxes_.length(), space_above_, space_below_, space_to_left_, space_to_right_); } +// Prints debug information on the colors. +void ColPartition::PrintColors() { + tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", + color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE], + color1_[L_ALPHA_CHANNEL], + color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]); +} + // Sets the types of all partitions in the run to be the max of the types. void ColPartition::SmoothPartnerRun(int working_set_count) { STATS left_stats(0, working_set_count); @@ -959,39 +1519,87 @@ void ColPartition::SmoothPartnerRun(int working_set_count) { } } type_ = max_type; + // TODO(rays) Either establish that it isn't necessary to set the columns, + // or find a way to do it that does not cause an assert failure in + // AddToWorkingSet. +#if 0 first_column_ = left_stats.mode(); last_column_ = right_stats.mode(); if (last_column_ < first_column_) last_column_ = first_column_; +#endif for (partner = SingletonPartner(false); partner != NULL; partner = partner->SingletonPartner(false)) { partner->type_ = max_type; +#if 0 // See TODO above if (column_set_ == partner->column_set_) { partner->first_column_ = first_column_; partner->last_column_ = last_column_; } +#endif } } +// ======= Scenario common to all Refine*Partners* functions ======= +// ColPartitions are aiming to represent textlines, or horizontal slices +// of images, and we are trying to form bi-directional (upper/lower) chains +// of UNIQUE partner ColPartitions that can be made into blocks. +// The ColPartitions have previously been typed (see SetPartitionType) +// according to a combination of the content type and +// how they lie on the columns. We want to chain text into +// groups of a single type, but image ColPartitions may have been typed +// differently in different parts of the image, due to being non-rectangular. +// +// We previously ran a search for upper and lower partners, but there may +// be more than one, and they may be of mixed types, so now we wish to +// refine the partners down to at most one. +// A heading may have multiple partners: +// =============================== +// ======== ========== ========= +// ======== ========== ========= +// but it should be a different type. +// A regular flowing text line may have multiple partners: +// ================== =================== +// ======= ================= =========== +// This could be the start of a pull-out, or it might all be in a single +// column and might be caused by tightly spaced text, bold words, bullets, +// funny punctuation etc, all of which can cause textlines to be split into +// multiple ColPartitions. Pullouts and figure captions should now be different +// types so we can more aggressively merge groups of partners that all sit +// in a single column. +// // Cleans up the partners of the given type so that there is at most // one partner. This makes block creation simpler. -void ColPartition::RefinePartners(PolyBlockType type) { +// If get_desperate is true, goes to more desperate merge methods +// to merge flowing text before breaking partnerships. +void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, + ColPartitionGrid* grid) { if (type_ == type) { - RefinePartnersInternal(true); - RefinePartnersInternal(false); + RefinePartnersInternal(true, get_desperate, grid); + RefinePartnersInternal(false, get_desperate, grid); } else if (type == PT_COUNT) { // This is the final pass. Make sure only the correctly typed // partners surivive, however many there are. RefinePartnersByType(true, &upper_partners_); RefinePartnersByType(false, &lower_partners_); + // It is possible for a merge to have given a partition multiple + // partners again, so the last resort is to use overlap which is + // guaranteed to leave at most one partner left. + if (!upper_partners_.empty() && !upper_partners_.singleton()) + RefinePartnersByOverlap(true, &upper_partners_); + if (!lower_partners_.empty() && !lower_partners_.singleton()) + RefinePartnersByOverlap(false, &lower_partners_); } } ////////////////// PRIVATE CODE ///////////////////////////// // Cleans up the partners above if upper is true, else below. -void ColPartition::RefinePartnersInternal(bool upper) { +// If get_desperate is true, goes to more desperate merge methods +// to merge flowing text before breaking partnerships. +void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, + ColPartitionGrid* grid) { ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_; if (!partners->empty() && !partners->singleton()) { RefinePartnersByType(upper, partners); @@ -1001,40 +1609,47 @@ void ColPartition::RefinePartnersInternal(bool upper) { if (!partners->empty() && !partners->singleton()) { // Types didn't fix it. Flowing text keeps the one with the longest // sequence of singleton matching partners. All others max overlap. - if (type_ == PT_FLOWING_TEXT) - RefineFlowingTextPartners(upper, partners); - else + if (type_ == PT_FLOWING_TEXT && get_desperate) { + RefineTextPartnersByMerge(upper, false, partners, grid); + if (!partners->empty() && !partners->singleton()) + RefineTextPartnersByMerge(upper, true, partners, grid); + } + // The last resort is to use overlap. + if (!partners->empty() && !partners->singleton()) RefinePartnersByOverlap(upper, partners); } } } } +// Cleans up the partners above if upper is true, else below. // Restricts the partners to only desirable types. For text and BRT_HLINE this // means the same type_ , and for image types it means any image type. void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST* partners) { - if (TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom())) { - tprintf("Refining %s partners by type for:\n", upper ? "Upper" : "Lower"); + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); + if (debug) { + tprintf("Refining %d %s partners by type for:\n", + partners->length(), upper ? "Upper" : "Lower"); Print(); } ColPartition_C_IT it(partners); // Purify text by type. - if (blob_type_ > BRT_UNKNOWN || blob_type_ == BRT_HLINE) { + if (!IsImageType()) { // Keep only partners matching type_. // Exception: PT_VERTICAL_TEXT is allowed to stay with the other // text types if it is the only partner. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* partner = it.data(); - if (partner->type_ != type_ && - (!partners->singleton() || - (type_ != PT_VERTICAL_TEXT && partner->type_ != PT_VERTICAL_TEXT) || - !IsTextType() || !partner->IsTextType())) { + if (partner->type_ != type_) { + if (debug) { + tprintf("Removing partner:"); + partner->Print(); + } partner->RemovePartner(!upper, this); it.extract(); - } else if (TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom())) { + } else if (debug) { tprintf("Keeping partner:"); partner->Print(); } @@ -1043,12 +1658,14 @@ void ColPartition::RefinePartnersByType(bool upper, // Keep only images with images, but not being fussy about type. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* partner = it.data(); - if (partner->blob_type_ > BRT_UNKNOWN || - partner->blob_type_ == BRT_HLINE) { + if (!partner->IsImageType()) { + if (debug) { + tprintf("Removing partner:"); + partner->Print(); + } partner->RemovePartner(!upper, this); it.extract(); - } else if (TabFind::WithinTestRegion(2, bounding_box_.left(), - bounding_box_.bottom())) { + } else if (debug) { tprintf("Keeping partner:"); partner->Print(); } @@ -1056,6 +1673,7 @@ void ColPartition::RefinePartnersByType(bool upper, } } +// Cleans up the partners above if upper is true, else below. // Remove transitive partnerships: this<->a, and a<->b and this<->b. // Gets rid of this<->b, leaving a clean chain. // Also if we have this<->a and a<->this, then gets rid of this<->a, as @@ -1101,50 +1719,73 @@ void ColPartition::RefinePartnerShortcuts(bool upper, } while (done_any && !partners->empty() && !partners->singleton()); } -// Keeps the partner with the longest sequence of singleton matching partners. -// Converts all others to pullout. -void ColPartition::RefineFlowingTextPartners(bool upper, - ColPartition_CLIST* partners) { - ColPartition_C_IT it(partners); - ColPartition* best_partner = it.data(); - // Nasty iterative algorithm. - int depth = 1; - int survivors = 0; - do { - survivors = 0; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* partner = it.data(); - // See if it survives a chase to depth levels. - for (int i = 0; i < depth && partner != NULL; ++i) { - partner = partner->SingletonPartner(upper); - if (partner != NULL && partner->type_ != PT_FLOWING_TEXT) - partner = NULL; - } - if (partner != NULL) { - ++survivors; - best_partner = it.data(); - } +// Cleans up the partners above if upper is true, else below. +// If multiple text partners can be merged, (with each other, NOT with this), +// then do so. +// If desperate is true, then an increase in overlap with the merge is +// allowed. If the overlap increases, then the desperately_merged_ flag +// is set, indicating that the textlines probably need to be regenerated +// by aggressive line fitting/splitting, as there are probably vertically +// joined blobs that cross textlines. +void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, + ColPartition_CLIST* partners, + ColPartitionGrid* grid) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); + if (debug) { + tprintf("Refining %d %s partners by merge for:\n", + partners->length(), upper ? "Upper" : "Lower"); + Print(); + } + while (!partners->empty() && !partners->singleton()) { + // Absorb will mess up the iterators, so we have to merge one partition + // at a time and rebuild the iterators each time. + ColPartition_C_IT it(partners); + ColPartition* part = it.data(); + // Gather a list of merge candidates, from the list of partners, that + // are all in the same single column. See general scenario comment above. + ColPartition_CLIST candidates; + ColPartition_C_IT cand_it(&candidates); + for (it.forward(); !it.at_first(); it.forward()) { + ColPartition* candidate = it.data(); + if (part->first_column_ == candidate->last_column_ && + part->last_column_ == candidate->first_column_) + cand_it.add_after_then_move(it.data()); } - ++depth; - } while (survivors > 1 && depth <= kMaxPartnerDepth); - // Keep only the best partner. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColPartition* partner = it.data(); - if (partner != best_partner) { - partner->RemovePartner(!upper, this); - it.extract(); - // Change the types of partner to be PT_PULLOUT_TEXT. - while (partner != NULL && partner->type_ == PT_FLOWING_TEXT) { - partner->type_ = PT_PULLOUT_TEXT; - partner = partner->SingletonPartner(upper); + int overlap_increase; + ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug, + NULL, &overlap_increase); + if (candidate != NULL && (overlap_increase <= 0 || desperate)) { + if (debug) { + tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", + part->HOverlap(*candidate), part->VOverlap(*candidate), + overlap_increase); } + // Remove before merge and re-insert to keep the integrity of the grid. + grid->RemoveBBox(candidate); + grid->RemoveBBox(part); + part->Absorb(candidate, NULL); + // We modified the box of part, so re-insert it into the grid. + grid->InsertBBox(true, true, part); + if (overlap_increase > 0) + part->desperately_merged_ = true; + } else { + break; // Can't merge. } } } +// Cleans up the partners above if upper is true, else below. // Keep the partner with the biggest overlap. void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); + if (debug) { + tprintf("Refining %d %s partners by overlap for:\n", + partners->length(), upper ? "Upper" : "Lower"); + Print(); + } ColPartition_C_IT it(partners); ColPartition* best_partner = it.data(); // Find the partner with the best overlap. @@ -1162,6 +1803,10 @@ void ColPartition::RefinePartnersByOverlap(bool upper, for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* partner = it.data(); if (partner != best_partner) { + if (debug) { + tprintf("Removing partner:"); + partner->Print(); + } partner->RemovePartner(!upper, this); it.extract(); } @@ -1218,6 +1863,21 @@ static int MedianSpacing(int page_height, ColPartition_IT it) { return static_cast(stats.median() + 0.5); } +// Returns true if this column partition is in the same column as +// part. This function will only work after the SetPartitionType function +// has been called on both column partitions. This is useful for +// doing a SideSearch when you want things in the same page column. +// +// Currently called by the table detection code to identify if potential table +// partitions exist in the same column. +bool ColPartition::IsInSameColumnAs(const ColPartition& part) const { + // Overlap does not occur when last < part.first or first > part.last. + // In other words, one is completely to the side of the other. + // This is just DeMorgan's law applied to that so the function returns true. + return (last_column_ >= part.first_column_) && + (first_column_ <= part.last_column_); +} + // Smoothes the spacings in the list into groups of equal linespacing. // resolution is the resolution of the original image, used as a basis // for thresholds in change of spacing. page_height is in pixels. @@ -1532,4 +2192,3 @@ void ColPartition::RightEdgeRun(ColPartition_IT* part_it, } } // namespace tesseract. - diff --git a/textord/colpartition.h b/textord/colpartition.h index 84c7841a4a..91f8b59ec7 100644 --- a/textord/colpartition.h +++ b/textord/colpartition.h @@ -32,12 +32,26 @@ namespace tesseract { +// Number of colors in the color1, color2 arrays. +const int kRGBRMSColors = 4; class ColPartition; class ColPartitionSet; +class ColPartitionGrid; class WorkingPartSet; class WorkingPartSet_LIST; +// An enum to indicate how a partition sits on the columns. +// The order of flowing/heading/pullout must be kept consistent with +// PolyBlockType. +enum ColumnSpanningType { + CST_NOISE, // Strictly between columns. + CST_FLOWING, // Strictly within a single column. + CST_HEADING, // Spans multiple columns. + CST_PULLOUT, // Touches multiple columns, but doesn't span them. + CST_COUNT // Number of entries. +}; + ELIST2IZEH(ColPartition) CLISTIZEH(ColPartition) @@ -63,11 +77,13 @@ class ColPartition : public ELIST2_LINK { */ ColPartition(BlobRegionType blob_type, const ICOORD& vertical); /** - * Constructs a fake ColPartition with no BLOBNBOXes. - * Used for making horizontal line ColPartitions and types it accordingly. + * Constructs a fake ColPartition with no BLOBNBOXes to represent a + * horizontal or vertical line, given a type and a bounding box. */ - ColPartition(const ICOORD& vertical, - int left, int bottom, int right, int top); + static ColPartition* MakeLinePartition(BlobRegionType blob_type, + const ICOORD& vertical, + int left, int bottom, + int right, int top); // Constructs and returns a fake ColPartition with a single fake BLOBNBOX, // all made from a single TBOX. @@ -103,12 +119,30 @@ class ColPartition : public ELIST2_LINK { int median_size() const { return median_size_; } + void set_median_size(int size) { + median_size_ = size; + } + int median_width() const { + return median_width_; + } + void set_median_width(int width) { + median_width_ = width; + } BlobRegionType blob_type() const { return blob_type_; } void set_blob_type(BlobRegionType t) { blob_type_ = t; } + BlobTextFlowType flow() const { + return flow_; + } + void set_flow(BlobTextFlowType f) { + flow_ = f; + } + int good_blob_score() const { + return good_blob_score_; + } bool good_width() const { return good_width_; } @@ -136,6 +170,12 @@ class ColPartition : public ELIST2_LINK { BLOBNBOX_CLIST* boxes() { return &boxes_; } + int boxes_count() const { + return boxes_.length(); + } + void set_vertical(const ICOORD& v) { + vertical_ = v; + } ColPartition_CLIST* upper_partners() { return &upper_partners_; } @@ -145,6 +185,9 @@ class ColPartition : public ELIST2_LINK { void set_working_set(WorkingPartSet* working_set) { working_set_ = working_set; } + bool desperately_merged() const { + return desperately_merged_; + } ColPartitionSet* column_set() const { return column_set_; } @@ -216,6 +259,21 @@ class ColPartition : public ELIST2_LINK { void set_space_to_right(int space) { space_to_right_ = space; } + uinT8* color1() { + return color1_; + } + uinT8* color2() { + return color2_; + } + bool owns_blobs() const { + return owns_blobs_; + } + void set_owns_blobs(bool owns_blobs) { + // Do NOT change ownership flag when there are blobs in the list. + // Immediately set the ownership flag when creating copies. + ASSERT_HOST(boxes_.empty()); + owns_blobs_ = owns_blobs; + } // Inline quasi-accessors that require some computation. @@ -227,6 +285,10 @@ class ColPartition : public ELIST2_LINK { int MedianY() const { return (median_top_ + median_bottom_) / 2; } + // Returns the middle x-coord of the bounding box. + int MidX() const { + return (bounding_box_.left() + bounding_box_.right()) / 2; + } // Returns the sort key at any given x,y. int SortKey(int x, int y) const { return TabVector::SortKey(vertical_, x, y); @@ -269,9 +331,13 @@ class ColPartition : public ELIST2_LINK { return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1; } // Returns true if there are no blobs in the list. - bool IsEmpty() { + bool IsEmpty() const { return boxes_.empty(); } + // Returns true if there is a single blob in the list. + bool IsSingleton() const { + return boxes_.singleton(); + } // Returns true if this and other overlap horizontally by bounding box. bool HOverlaps(const ColPartition& other) const { return bounding_box_.x_overlap(other.bounding_box_); @@ -285,10 +351,17 @@ class ColPartition : public ELIST2_LINK { right_margin_ >= other.bounding_box_.right(); } // Returns the vertical overlap (by median) of this and other. + // WARNING! Only makes sense on horizontal partitions! int VOverlap(const ColPartition& other) const { return MIN(median_top_, other.median_top_) - MAX(median_bottom_, other.median_bottom_); } + // Returns the horizontal overlap (by median) of this and other. + // WARNING! Only makes sense on vertical partitions! + int HOverlap(const ColPartition& other) const { + return MIN(median_right_, other.median_right_) - + MAX(median_left_, other.median_left_); + } // Returns true if this and other overlap significantly vertically. bool VOverlaps(const ColPartition& other) const { int overlap = VOverlap(other); @@ -297,25 +370,48 @@ class ColPartition : public ELIST2_LINK { return overlap * 3 > height; } // Returns true if the region types (aligned_text_) match. + // Lines never match anything, as they should never be merged or chained. bool TypesMatch(const ColPartition& other) const { return TypesMatch(blob_type_, other.blob_type_); } static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) { - return type1 == type2 || - (type1 < BRT_UNKNOWN && type2 < BRT_UNKNOWN); + return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) && + !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2); } // Returns true if partitions is of horizontal line type - bool IsLineType() { - return POLY_BLOCK::IsLineType(type_); + bool IsLineType() const { + return PTIsLineType(type_); } // Returns true if partitions is of image type - bool IsImageType() { - return POLY_BLOCK::IsImageType(type_); + bool IsImageType() const { + return PTIsImageType(type_); } // Returns true if partitions is of text type - bool IsTextType() { - return POLY_BLOCK::IsTextType(type_); + bool IsTextType() const { + return PTIsTextType(type_); + } + // Returns true if the partition is of an exclusively vertical type. + bool IsVerticalType() const { + return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE; + } + // Returns true if the partition is of a definite horizontal type. + bool IsHorizontalType() const { + return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE; + } + // Returns true is the partition is of a type that cannot be merged. + bool IsUnMergeableType() const { + return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE; + } + // Returns true if this partition is a vertical line + // TODO(nbeato): Use PartitionType enum when Ray's code is submitted. + bool IsVerticalLine() const { + return IsVerticalType() && IsLineType(); + } + // Returns true if this partition is a horizontal line + // TODO(nbeato): Use PartitionType enum when Ray's code is submitted. + bool IsHorizontalLine() const { + return IsHorizontalType() && IsLineType(); } // Adds the given box to the partition, updating the partition bounds. @@ -323,10 +419,24 @@ class ColPartition : public ELIST2_LINK { // recorded twice, and the boxes are kept in increasing left position. void AddBox(BLOBNBOX* box); + // Removes the given box from the partition, updating the bounds. + void RemoveBox(BLOBNBOX* box); + + // Returns the tallest box in the partition, as measured perpendicular to the + // presumed flow of text. + BLOBNBOX* BiggestBox(); + + // Returns the bounding box excluding the given box. + TBOX BoundsWithoutBox(BLOBNBOX* box); + // Claims the boxes in the boxes_list by marking them with a this owner // pointer. If a box is already owned, then run Unique on it. void ClaimBoxes(WidthCallback* cb); + // NULL the owner of the blobs in this partition, so they can be deleted + // independently of the ColPartition. + void DisownBoxes(); + // Delete the boxes that this partition owns. void DeleteBoxes(); @@ -341,6 +451,21 @@ class ColPartition : public ELIST2_LINK { // Returns true if the left and right edges are approximately equal. bool MatchingColumns(const ColPartition& other) const; + // Returns true if the sizes match for two text partitions, + // taking orientation into account + bool MatchingSizes(const ColPartition& other) const; + + // Returns true if there is no tabstop violation in merging this and other. + bool ConfirmNoTabViolation(const ColPartition& other) const; + + // Returns true if other has a similar stroke width to this. + bool MatchingStrokeWidth(const ColPartition& other, + double fractional_tolerance, + double constant_tolerance) const; + // Returns true if candidate is an acceptable diacritic base char merge + // with this as the diacritic. + bool OKDiacriticMerge(const ColPartition& candidate, bool debug) const; + // Sets the sort key using either the tab vector, or the bounding box if // the tab vector is NULL. If the tab_vector lies inside the bounding_box, // use the edge of the box as a key any way. @@ -352,6 +477,11 @@ class ColPartition : public ELIST2_LINK { void CopyLeftTab(const ColPartition& src, bool take_box); void CopyRightTab(const ColPartition& src, bool take_box); + // Returns the left rule line x coord of the leftmost blob. + int LeftBlobRule() const; + // Returns the right rule line x coord of the rightmost blob. + int RightBlobRule() const; + // Add a partner above if upper, otherwise below. // Add them uniquely and keep the list sorted by box left. // Partnerships are added symmetrically to partner and this. @@ -369,6 +499,22 @@ class ColPartition : public ELIST2_LINK { // Shares out any common boxes amongst the partitions, ensuring that no // box stays in both. Returns true if anything was done. bool Unique(ColPartition* other, WidthCallback* cb); + // Returns true if the overlap between this and the merged pair of + // merge candidates is sufficiently trivial to be allowed. + // The merged box can graze the edge of this by the ok_box_overlap + // if that exceeds the margin to the median top and bottom. + bool OKMergeOverlap(const ColPartition& merge1, const ColPartition& merge2, + int ok_box_overlap, bool debug); + + // Find the blob at which to split this to minimize the overlap with the + // given box. Returns the first blob to go in the second partition. + BLOBNBOX* OverlapSplitBlob(const TBOX& box); + + // Split this partition keeping the first half in this and returning + // the second half. + // Splits by putting the split_blob and the blobs that follow + // in the second half, and the rest in the first half. + ColPartition* SplitAtBlob(BLOBNBOX* split_blob); // Splits this partition at the given x coordinate, returning the right // half and keeping the left half in this. @@ -377,15 +523,38 @@ class ColPartition : public ELIST2_LINK { // Recalculates all the coordinate limits of the partition. void ComputeLimits(); + // Returns the number of boxes that overlap the given box. + int CountOverlappingBoxes(const TBOX& box); + // Computes and sets the type_, first_column_, last_column_ and column_set_. - void SetPartitionType(ColPartitionSet* columns); + // resolution refers to the ppi resolution of the image. + void SetPartitionType(int resolution, ColPartitionSet* columns); + + // Returns the PartitionType from the current BlobRegionType and a column + // flow spanning type ColumnSpanningType, generated by + // ColPartitionSet::SpanningType, that indicates how the partition sits + // in the columns. + PolyBlockType PartitionType(ColumnSpanningType flow) const; // Returns the first and last column touched by this partition. - void ColumnRange(ColPartitionSet* columns, int* first_col, int* last_col); + // resolution refers to the ppi resolution of the image. + void ColumnRange(int resolution, ColPartitionSet* columns, + int* first_col, int* last_col); // Sets the internal flags good_width_ and good_column_. void SetColumnGoodness(WidthCallback* cb); + // Determines whether the blobs in this partition mostly represent + // a leader (fixed pitch sequence) and sets the member blobs accordingly. + // Note that height is assumed to have been tested elsewhere, and that this + // function will find most fixed-pitch text as leader without a height filter. + // Leader detection is limited to sequences of identical width objects, + // such as .... or ----, so patterns, such as .-.-.-.-. will not be found. + bool MarkAsLeaderIfMonospaced(); + + // Sets all blobs with the partition blob type and flow. + void SetBlobTypes(); + // Adds this ColPartition to a matching WorkingPartSet if one can be found, // otherwise starts a new one in the appropriate column, ending the previous. void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright, @@ -414,19 +583,42 @@ class ColPartition : public ELIST2_LINK { // Returns a copy of everything except the list of boxes. The resulting // ColPartition is only suitable for keeping in a column candidate list. ColPartition* ShallowCopy() const; + // Returns a copy of everything with a shallow copy of the blobs. + // The blobs are still owned by their original parent, so they are + // treated as read-only. + ColPartition* CopyButDontOwnBlobs(); // Provides a color for BBGrid to draw the rectangle. ScrollView::Color BoxColor() const; // Prints debug information on this. - void Print(); + void Print() const; + // Prints debug information on the colors. + void PrintColors(); // Sets the types of all partitions in the run to be the max of the types. void SmoothPartnerRun(int working_set_count); // Cleans up the partners of the given type so that there is at most // one partner. This makes block creation simpler. - void RefinePartners(PolyBlockType type); + // If get_desperate is true, goes to more desperate merge methods + // to merge flowing text before breaking partnerships. + void RefinePartners(PolyBlockType type, bool get_desparate, + ColPartitionGrid* grid); + + // Returns true if this column partition is in the same column as + // part. This function will only work after the SetPartitionType function + // has been called on both column partitions. This is useful for + // doing a SideSearch when you want things in the same page column. + bool IsInSameColumnAs(const ColPartition& part) const; + + // Sets the column bounds. Primarily used in testing. + void set_first_column(int column) { + first_column_ = column; + } + void set_last_column(int column) { + last_column_ = column; + } private: // enum to refer to the entries in a neigbourhood of lines. @@ -442,7 +634,10 @@ class ColPartition : public ELIST2_LINK { }; // Cleans up the partners above if upper is true, else below. - void RefinePartnersInternal(bool upper); + // If get_desperate is true, goes to more desperate merge methods + // to merge flowing text before breaking partnerships. + void RefinePartnersInternal(bool upper, bool get_desperate, + ColPartitionGrid* grid); // Restricts the partners to only desirable types. For text and BRT_HLINE this // means the same type_ , and for image types it means any image type. void RefinePartnersByType(bool upper, ColPartition_CLIST* partners); @@ -451,9 +646,15 @@ class ColPartition : public ELIST2_LINK { // Also if we have this<->a and a<->this, then gets rid of this<->a, as // this has multiple partners. void RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners); - // Keeps the partner with the longest sequence of singleton matching partners. - // Converts all others to pullout. - void RefineFlowingTextPartners(bool upper, ColPartition_CLIST* partners); + // If multiple text partners can be merged, then do so. + // If desperate is true, then an increase in overlap with the merge is + // allowed. If the overlap increases, then the desperately_merged_ flag + // is set, indicating that the textlines probably need to be regenerated + // by aggressive line fitting/splitting, as there are probably vertically + // joined blobs that cross textlines. + void RefineTextPartnersByMerge(bool upper, bool desperate, + ColPartition_CLIST* partners, + ColPartitionGrid* grid); // Keep the partner with the biggest overlap. void RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners); @@ -528,9 +729,18 @@ class ColPartition : public ELIST2_LINK { int median_bottom_; int median_top_; // Median height of blobs in this partition. + // TODO(rays) rename median_height_. int median_size_; + // Median left and right of blobs in this partition. + int median_left_; + int median_right_; + // Median width of blobs in this partition. + int median_width_; // blob_region_type_ for the blobs in this partition. BlobRegionType blob_type_; + BlobTextFlowType flow_; // Quality of text flow. + // Total of GoodTextBlob results for all blobs in the partition. + int good_blob_score_; // True if this partition has a common width. bool good_width_; // True if this is a good column candidate. @@ -562,6 +772,9 @@ class ColPartition : public ELIST2_LINK { // True when the partition's ownership has been taken from the grid and // placed in a working set, or, after that, in the good_parts_ list. bool block_owned_; + // Flag to indicate that this partition was subjected to a desperate merge, + // and therefore the textlines need rebuilding. + bool desperately_merged_; // The first and last column that this partition applies to. // Flowing partitions (see type_) will have an equal first and last value // of the form 2n + 1, where n is the zero-based index into the partitions @@ -592,15 +805,16 @@ class ColPartition : public ELIST2_LINK { int space_below_; // Distance from nearest_neighbor_below int space_to_left_; // Distance from the left edge of the column int space_to_right_; // Distance from the right edge of the column + // Color foreground/background data. + uinT8 color1_[kRGBRMSColors]; + uinT8 color2_[kRGBRMSColors]; + bool owns_blobs_; // Does the partition own its blobs? }; // Typedef it now in case it becomes a class later. -typedef BBGrid ColPartitionGrid; typedef GridSearch ColPartitionGridSearch; + ColPartition_CLIST, + ColPartition_C_IT> ColPartitionGridSearch; } // namespace tesseract. diff --git a/textord/colpartitiongrid.cpp b/textord/colpartitiongrid.cpp new file mode 100644 index 0000000000..b8a03b0bc6 --- /dev/null +++ b/textord/colpartitiongrid.cpp @@ -0,0 +1,680 @@ +/////////////////////////////////////////////////////////////////////// +// File: colpartitionrid.h +// Description: Class collecting code that acts on a BBGrid of ColPartitions. +// Author: Ray Smith +// Created: Mon Oct 05 08:42:01 PDT 2009 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "colpartitiongrid.h" +#include "colpartitionset.h" + +namespace tesseract { + +BOOL_VAR(textord_tabfind_show_color_fit, false, "Show stroke widths"); + +// Maximum number of lines in a credible figure caption. +const int kMaxCaptionLines = 7; +// Min ratio between biggest and smallest gap to bound a caption. +const double kMinCaptionGapRatio = 2.0; +// Min ratio between biggest gap and mean line height to bound a caption. +const double kMinCaptionGapHeightRatio = 0.5; +// Min fraction of ColPartition height to be overlapping for margin purposes. +const double kMarginOverlapFraction = 0.25; +// Fraction of gridsize to allow arbitrary overlap between partitions. +const double kTinyEnoughTextlineOverlapFraction = 0.25; +// Max vertical distance of neighbouring ColPartition as a multiple of +// partition height for it to be a partner. +// TODO(rays) determine and write here why a larger number doesn't work well. +const double kMaxPartitionSpacing = 1.75; + +ColPartitionGrid::ColPartitionGrid() { +} +ColPartitionGrid::ColPartitionGrid(int gridsize, + const ICOORD& bleft, const ICOORD& tright) + : BBGrid(gridsize, + bleft, tright) { +} + +ColPartitionGrid::~ColPartitionGrid() { +} + +// Handles a click event in a display window. +void ColPartitionGrid::HandleClick(int x, int y) { + BBGrid::HandleClick(x, y); + // Run a radial search for partitions that overlap. + ColPartitionGridSearch radsearch(this); + radsearch.SetUniqueMode(true); + radsearch.StartRadSearch(x, y, 1); + ColPartition* neighbour; + FCOORD click(x, y); + while ((neighbour = radsearch.NextRadSearch()) != NULL) { + TBOX nbox = neighbour->bounding_box(); + if (nbox.contains(click)) { + tprintf("Block box:"); + neighbour->bounding_box().print(); + neighbour->Print(); + } + } +} + +// Returns true if the given part and merge candidate might believably +// be part of a single text line according to the default rules. +// In general we only want to merge partitions that look like they +// are on the same text line, ie their median limits overlap, but we have +// to make exceptions for diacritics and stray punctuation. +static bool OKMergeCandidate(const ColPartition* part, + const ColPartition* candidate, + bool debug) { + const TBOX& part_box = part->bounding_box(); + if (candidate == part) + return false; // Ignore itself. + if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType()) + return false; // Don't mix inappropriate types. + + const TBOX& c_box = candidate->bounding_box(); + if (debug) { + tprintf("Examining merge candidate:"); + c_box.print(); + } + // Candidates must be within a reasonable distance. + if (candidate->IsVerticalType() || part->IsVerticalType()) { + int h_dist = -part->HOverlap(*candidate); + if (h_dist >= MAX(part_box.width(), c_box.width()) / 2) { + if (debug) + tprintf("Too far away: h_dist = %d\n", h_dist); + return false; + } + } else { + // Coarse filter by vertical distance between partitions. + int v_dist = -part->VOverlap(*candidate); + if (v_dist >= MAX(part_box.height(), c_box.height()) / 2) { + if (debug) + tprintf("Too far away: v_dist = %d\n", v_dist); + return false; + } + // Candidates must either overlap in median y, + // or part or candidate must be an acceptable diacritic. + if (!part->VOverlaps(*candidate) && + !part->OKDiacriticMerge(*candidate, debug) && + !candidate->OKDiacriticMerge(*part, debug)) { + if (debug) + tprintf("Candidate fails overlap and diacritic tests!\n"); + return false; + } + } + return true; +} + +// Helper function to compute the increase in overlap of the parts list of +// Colpartitions with the combination of merge1 and merge2, compared to +// the overlap with them uncombined. +// An overlap is not counted if passes the OKMergeOverlap test with ok_overlap +// as the pixel overlap limit. merge1 and merge2 must both be non-NULL. +static int IncreaseInOverlap(const ColPartition* merge1, + const ColPartition* merge2, + int ok_overlap, + ColPartition_CLIST* parts) { + ASSERT_HOST(merge1 != NULL && merge2 != NULL); + int total_area = 0; + ColPartition_C_IT it(parts); + TBOX merged_box(merge1->bounding_box()); + merged_box += merge2->bounding_box(); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + if (part == merge1 || part == merge2) + continue; + TBOX part_box = part->bounding_box(); + // Compute the overlap of the merged box with part. + int overlap_area = part_box.intersection(merged_box).area(); + if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2, + ok_overlap, false)) { + total_area += overlap_area; + // Subtract the overlap of merge1 and merge2 individually. + overlap_area = part_box.intersection(merge1->bounding_box()).area(); + if (overlap_area > 0) + total_area -= overlap_area; + TBOX intersection_box = part_box.intersection(merge2->bounding_box()); + overlap_area = intersection_box.area(); + if (overlap_area > 0) { + total_area -= overlap_area; + // Add back the 3-way area. + intersection_box -= merge1->bounding_box(); + overlap_area = intersection_box.area(); + if (overlap_area > 0) + total_area += overlap_area; + } + } + } + return total_area; +} + +// Helper function to test that each partition in candidates is either a +// good diacritic merge with part or an OK merge candidate with all others +// in the candidates list. +// ASCII Art Scenario: +// We sometimes get text such as "join-this" where the - is actually a long +// dash culled from a standard set of extra characters that don't match the +// font of the text. This makes its strokewidth not match and forms a broken +// set of 3 partitions for "join", "-" and "this" and the dash may slightly +// overlap BOTH words. +// ------- ------- +// | ==== | +// ------- ------- +// The standard merge rule: "you can merge 2 partitions as long as there is +// no increase in overlap elsewhere" fails miserably here. Merge any pair +// of partitions and the combined box overlaps more with the third than +// before. To allow the merge, we need to consider whether it is safe to +// merge everything, without merging separate text lines. For that we need +// everything to be an OKMergeCandidate (which is supposed to prevent +// separate text lines merging), but this is hard for diacritics to satisfy, +// so an alternative to being OKMergeCandidate with everything is to be an +// OKDiacriticMerge with part as the base character. +static bool TestCompatibleCandidates(const ColPartition& part, bool debug, + ColPartition_CLIST* candidates) { + ColPartition_C_IT it(candidates); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* candidate = it.data(); + if (!candidate->OKDiacriticMerge(part, false)) { + ColPartition_C_IT it2(it); + for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { + ColPartition* candidate2 = it2.data(); + if (candidate2 != candidate && + !OKMergeCandidate(candidate, candidate2, false)) { + if (debug) { + tprintf("NC overlap failed:Candidate:"); + candidate2->bounding_box().print(); + tprintf("fails to be a good merge with:"); + candidate->bounding_box().print(); + } + return false; + } + } + } + } + return true; +} + +// Finds all the ColPartitions in the grid that overlap with the given +// box and returns them SortByBoxLeft(ed) and uniqued in the given list. +// Any partition equal to not_this (may be NULL) is excluded. +void ColPartitionGrid::FindOverlappingPartitions(const TBOX& box, + const ColPartition* not_this, + ColPartition_CLIST* parts) { + ColPartitionGridSearch rsearch(this); + rsearch.StartRectSearch(box); + ColPartition* part; + while ((part = rsearch.NextRectSearch()) != NULL) { + if (part != not_this) + parts->add_sorted(SortByBoxLeft, true, part); + } +} + +// Finds and returns the best candidate ColPartition to merge with part, +// selected from the candidates list, based on the minimum increase in +// pairwise overlap among all the partitions overlapped by the combined box. +// If overlap_increase is not NULL then it returns the increase in overlap +// that would result from the merge. +// confirm_cb is a permanent callback that (if non-null) will be used to +// confirm the validity of a proposed merge candidate before selecting it. +// +// ======HOW MERGING WORKS====== +// The problem: +// We want to merge all the parts of a textline together, but avoid merging +// separate textlines. Diacritics, i dots, punctuation, and broken characters +// are examples of small bits that need merging with the main textline. +// Drop-caps and descenders in one line that touch ascenders in the one below +// are examples of cases where we don't want to merge. +// +// The solution: +// Merges that increase overlap among other partitions are generally bad. +// Those that don't increase overlap (much) and minimize the total area +// seem to be good. +// +// Ascii art example: +// The text: +// groggy descenders +// minimum ascenders +// The boxes: The === represents a small box near or overlapping the lower box. +// ----------------- +// | | +// ----------------- +// -===------------- +// | | +// ----------------- +// In considering what to do with the small === box, we find the 2 larger +// boxes as neighbours and possible merge candidates, but merging with the +// upper box increases overlap with the lower box, whereas merging with the +// lower box does not increase overlap. +// If the small === box didn't overlap either to start with, total area +// would be minimized by merging with the nearer (lower) box. +// +// This is a simple example. In reality, we have to allow some increase +// in overlap, or tightly spaced text would end up in bits. +ColPartition* ColPartitionGrid::BestMergeCandidate( + const ColPartition* part, ColPartition_CLIST* candidates, bool debug, + TessResultCallback2* confirm_cb, + int* overlap_increase) { + if (overlap_increase != NULL) + *overlap_increase = 0; + if (candidates->empty()) + return NULL; + int ok_overlap = + static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); + // The best neighbour to merge with is the one that causes least + // total pairwise overlap among all the neighbours. + // If more than one offers the same total overlap, choose the one + // with the least total area. + const TBOX& part_box = part->bounding_box(); + ColPartition_C_IT it(candidates); + ColPartition* best_candidate = NULL; + // Find the total combined box of all candidates and the original. + TBOX full_box(part_box); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* candidate = it.data(); + full_box += candidate->bounding_box(); + } + // Keep valid neighbours in a list. + ColPartition_CLIST neighbours; + // Now run a rect search of the merged box for overlapping neighbours, as + // we need anything that might be overlapped by the merged box. + FindOverlappingPartitions(full_box, part, &neighbours); + if (debug) { + tprintf("Finding best merge candidate from %d, %d neighbours for box:", + candidates->length(), neighbours.length()); + part_box.print(); + } + // If the best increase in overlap is positive, then we also check the + // worst non-candidate overlap. This catches the case of multiple good + // candidates that overlap each other when merged. If the worst + // non-candidate overlap is better than the best overlap, then return + // the worst non-candidate overlap instead. + ColPartition_CLIST non_candidate_neighbours; + non_candidate_neighbours.set_subtract(SortByBoxLeft, true, + &neighbours, candidates); + int worst_nc_increase = 0; + int best_increase = MAX_INT32; + int best_area = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* candidate = it.data(); + if (confirm_cb != NULL && !confirm_cb->Run(part, candidate)) { + if (debug) { + tprintf("Candidate not confirmed:"); + candidate->bounding_box().print(); + } + continue; + } + int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours); + const TBOX& cand_box = candidate->bounding_box(); + if (best_candidate == NULL || increase < best_increase) { + best_candidate = candidate; + best_increase = increase; + best_area = cand_box.bounding_union(part_box).area() - cand_box.area(); + if (debug) { + tprintf("New best merge candidate has increase %d, area %d, over box:", + increase, best_area); + full_box.print(); + candidate->Print(); + } + } else if (increase == best_increase) { + int area = cand_box.bounding_union(part_box).area() - cand_box.area(); + if (area < best_area) { + best_area = area; + best_candidate = candidate; + } + } + increase = IncreaseInOverlap(part, candidate, ok_overlap, + &non_candidate_neighbours); + if (increase > worst_nc_increase) + worst_nc_increase = increase; + } + if (best_increase > 0) { + // If the worst non-candidate increase is less than the best increase + // including the candidates, then all the candidates can merge together + // and the increase in outside overlap would be less, so use that result, + // but only if each candidate is either a good diacritic merge with part, + // or an ok merge candidate with all the others. + // See TestCompatibleCandidates for more explanation and a picture. + if (worst_nc_increase < best_increase && + TestCompatibleCandidates(*part, debug, candidates)) { + best_increase = worst_nc_increase; + } + } + if (overlap_increase != NULL) + *overlap_increase = best_increase; + return best_candidate; +} + +// Improves the margins of the ColPartitions in the grid by calling +// FindPartitionMargins on each. +// best_columns, which may be NULL, is an array of pointers indicating the +// column set at each y-coordinate in the grid. +// best_columns is usually the best_columns_ member of ColumnFinder. +void ColPartitionGrid::GridFindMargins(ColPartitionSet** best_columns) { + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != NULL) { + // Set up a rectangle search x-bounded by the column and y by the part. + ColPartitionSet* columns = best_columns != NULL + ? best_columns[gsearch.GridY()] + : NULL; + FindPartitionMargins(columns, part); + const TBOX& box = part->bounding_box(); + if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { + tprintf("Computed margins for part:"); + part->Print(); + } + } +} + +// Improves the margins of the ColPartitions in the list by calling +// FindPartitionMargins on each. +// best_columns, which may be NULL, is an array of pointers indicating the +// column set at each y-coordinate in the grid. +// best_columns is usually the best_columns_ member of ColumnFinder. +void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns, + ColPartition_LIST* parts) { + ColPartition_IT part_it(parts); + for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { + ColPartition* part = part_it.data(); + ColPartitionSet* columns = NULL; + if (best_columns != NULL) { + TBOX part_box = part->bounding_box(); + // Get the columns from the y grid coord. + int grid_x, grid_y; + GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); + columns = best_columns[grid_y]; + } + FindPartitionMargins(columns, part); + } +} + +// Finds and marks text partitions that represent figure captions. +void ColPartitionGrid::FindFigureCaptions() { + // For each image region find its best candidate text caption region, + // if any and mark it as such. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != NULL) { + if (part->IsImageType()) { + const TBOX& part_box = part->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), + part_box.bottom()); + ColPartition* best_caption = NULL; + int best_dist = 0; // Distance to best_caption. + int best_upper = 0; // Direction of best_caption. + // Handle both lower and upper directions. + for (int upper = 0; upper < 2; ++upper) { + ColPartition_C_IT partner_it(upper ? part->upper_partners() + : part->lower_partners()); + // If there are no image partners, then this direction is ok. + for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); + partner_it.forward()) { + ColPartition* partner = partner_it.data(); + if (partner->IsImageType()) { + break; + } + } + if (!partner_it.cycled_list()) continue; + // Find the nearest totally overlapping text partner. + for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); + partner_it.forward()) { + ColPartition* partner = partner_it.data(); + if (!partner->IsTextType()) continue; + const TBOX& partner_box = partner->bounding_box(); + if (debug) { + tprintf("Finding figure captions for image part:"); + part_box.print(); + tprintf("Considering partner:"); + partner_box.print(); + } + if (partner_box.left() >= part_box.left() && + partner_box.right() <= part_box.right()) { + int dist = partner_box.y_gap(part_box); + if (best_caption == NULL || dist < best_dist) { + best_dist = dist; + best_caption = partner; + best_upper = upper; + } + } + } + } + if (best_caption != NULL) { + if (debug) { + tprintf("Best caption candidate:"); + best_caption->bounding_box().print(); + } + // We have a candidate caption. Qualify it as being separable from + // any body text. We are looking for either a small number of lines + // or a big gap that indicates a separation from the body text. + int line_count = 0; + int biggest_gap = 0; + int smallest_gap = MAX_INT16; + int total_height = 0; + int mean_height = 0; + ColPartition* end_partner = NULL; + ColPartition* next_partner = NULL; + for (ColPartition* partner = best_caption; partner != NULL && + line_count <= kMaxCaptionLines; + partner = next_partner) { + if (!partner->IsTextType()) { + end_partner = partner; + break; + } + ++line_count; + total_height += partner->bounding_box().height(); + next_partner = partner->SingletonPartner(best_upper); + if (next_partner != NULL) { + int gap = partner->bounding_box().y_gap( + next_partner->bounding_box()); + if (gap > biggest_gap) { + biggest_gap = gap; + end_partner = next_partner; + mean_height = total_height / line_count; + } else if (gap < smallest_gap) { + smallest_gap = gap; + } + // If the gap looks big compared to the text size and the smallest + // gap seen so far, then we can stop. + if (biggest_gap > mean_height * kMinCaptionGapHeightRatio && + biggest_gap > smallest_gap * kMinCaptionGapRatio) + break; + } + } + if (debug) { + tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", + line_count, biggest_gap, smallest_gap, mean_height); + if (end_partner != NULL) { + tprintf("End partner:"); + end_partner->bounding_box().print(); + } + } + if (next_partner == NULL && line_count <= kMaxCaptionLines) + end_partner = NULL; // No gap, but line count is small. + if (line_count <= kMaxCaptionLines) { + // This is a qualified caption. Mark the text as caption. + for (ColPartition* partner = best_caption; partner != NULL && + partner != end_partner; + partner = next_partner) { + partner->set_type(PT_CAPTION_TEXT); + partner->SetBlobTypes(); + if (debug) { + tprintf("Set caption type for partition:"); + partner->bounding_box().print(); + } + next_partner = partner->SingletonPartner(best_upper); + } + } + } + } + } +} + +//////// Functions that manipulate ColPartitions in the part_grid_ ///// +//////// to find chains of partner partitions of the same type. /////// + +// For every ColPartition in the grid, finds its upper and lower neighbours. +void ColPartitionGrid::FindPartitionPartners() { + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != NULL) { + FindPartitionPartners(true, part); + FindPartitionPartners(false, part); + } +} + +// Finds the best partner in the given direction for the given partition. +// Stores the result with AddPartner. +void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) { + if (part->type() == PT_NOISE) + return; // Noise is not allowed to partner anything. + const TBOX& box = part->bounding_box(); + int top = part->median_top(); + int bottom = part->median_bottom(); + int height = top - bottom; + int mid_y = (bottom + top) / 2; + ColPartitionGridSearch vsearch(this); + // Search down for neighbour below + vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY()); + ColPartition* neighbour; + ColPartition* best_neighbour = NULL; + int best_dist = MAX_INT32; + while ((neighbour = vsearch.NextVerticalSearch(!upper)) != NULL) { + if (neighbour == part || neighbour->type() == PT_NOISE) + continue; // Noise is not allowed to partner anything. + int neighbour_bottom = neighbour->median_bottom(); + int neighbour_top = neighbour->median_top(); + int neighbour_y = (neighbour_bottom + neighbour_top) / 2; + if (upper != (neighbour_y > mid_y)) + continue; + if (!part->HOverlaps(*neighbour) && !part->HCompatible(*neighbour)) + continue; + if (!part->TypesMatch(*neighbour)) { + if (best_neighbour == NULL) + best_neighbour = neighbour; + continue; + } + int dist = upper ? neighbour_bottom - top : bottom - neighbour_top; + if (dist <= kMaxPartitionSpacing * height) { + if (dist < best_dist) { + best_dist = dist; + best_neighbour = neighbour; + } + } else { + break; + } + } + if (best_neighbour != NULL) + part->AddPartner(upper, best_neighbour); +} + +// For every ColPartition with multiple partners in the grid, reduces the +// number of partners to 0 or 1. If get_desperate is true, goes to more +// desperate merge methods to merge flowing text before breaking partnerships. +void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) { + ColPartitionGridSearch gsearch(this); + // Refine in type order so that chasing multiple partners can be done + // before eliminating type mis-matching partners. + for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) { + // Iterate the ColPartitions in the grid. + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != NULL) { + part->RefinePartners(static_cast(type), + get_desperate, this); + // Iterator may have been messed up by a merge. + gsearch.RepositionIterator(); + } + } +} + + +// ========================== PRIVATE CODE ======================== + +// Improves the margins of the part ColPartition by searching for +// neighbours that vertically overlap significantly. +// columns may be NULL, and indicates the assigned column structure this +// is applicable to part. +void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns, + ColPartition* part) { + // Set up a rectangle search x-bounded by the column and y by the part. + TBOX box = part->bounding_box(); + int y = part->MidY(); + // Initial left margin is based on the column, if there is one. + int left_margin = bleft().x(); + int right_margin = tright().x(); + if (columns != NULL) { + ColPartition* column = columns->ColumnContaining(box.left(), y); + if (column != NULL) + left_margin = column->LeftAtY(y); + column = columns->ColumnContaining(box.right(), y); + if (column != NULL) + right_margin = column->RightAtY(y); + } + left_margin -= kColumnWidthFactor; + right_margin += kColumnWidthFactor; + // Search for ColPartitions that reduce the margin. + left_margin = FindMargin(box.left() + box.height(), true, left_margin, + box.bottom(), box.top(), part); + part->set_left_margin(left_margin); + // Search for ColPartitions that reduce the margin. + right_margin = FindMargin(box.right() - box.height(), false, right_margin, + box.bottom(), box.top(), part); + part->set_right_margin(right_margin); +} + +// Starting at x, and going in the specified direction, upto x_limit, finds +// the margin for the given y range by searching sideways, +// and ignoring not_this. +int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, + int y_bottom, int y_top, + const ColPartition* not_this) { + int height = y_top - y_bottom; + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch side_search(this); + side_search.SetUniqueMode(true); + side_search.StartSideSearch(x, y_bottom, y_top); + ColPartition* part; + while ((part = side_search.NextSideSearch(right_to_left)) != NULL) { + // Ignore itself. + if (part == not_this) // || part->IsLineType()) + continue; + // Must overlap by enough, based on the min of the heights, so + // large partitions can't smash through small ones. + TBOX box = part->bounding_box(); + int min_overlap = MIN(height, box.height()); + min_overlap = static_cast(min_overlap * kMarginOverlapFraction + 0.5); + int y_overlap = MIN(y_top, box.top()) - MAX(y_bottom, box.bottom()); + if (y_overlap < min_overlap) + continue; + // Must be going the right way. + int x_edge = right_to_left ? box.right() : box.left(); + if ((x_edge < x) != right_to_left) + continue; + // If we have gone past x_limit, then x_limit will do. + if ((x_edge < x_limit) == right_to_left) + break; + // It reduces x limit, so save the new one. + x_limit = x_edge; + } + return x_limit; +} + + +} // namespace tesseract. diff --git a/textord/colpartitiongrid.h b/textord/colpartitiongrid.h new file mode 100644 index 0000000000..f4d9581a96 --- /dev/null +++ b/textord/colpartitiongrid.h @@ -0,0 +1,100 @@ +/////////////////////////////////////////////////////////////////////// +// File: colpartitionrid.h +// Description: Class collecting code that acts on a BBGrid of ColPartitions. +// Author: Ray Smith +// Created: Mon Oct 05 08:42:01 PDT 2009 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H__ +#define TESSERACT_TEXTORD_COLPARTITIONGRID_H__ + +#include "bbgrid.h" +#include "colpartition.h" + +namespace tesseract { + +class TabFind; + +// ColPartitionGrid is a BBGrid of ColPartition. +// It collects functions that work on the grid. +class ColPartitionGrid : public BBGrid { + public: + ColPartitionGrid(); + ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); + + ~ColPartitionGrid(); + + // Handles a click event in a display window. + void HandleClick(int x, int y); + + // Finds all the ColPartitions in the grid that overlap with the given + // box and returns them SortByBoxLeft(ed) and uniqued in the given list. + // Any partition equal to not_this (may be NULL) is excluded. + void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this, + ColPartition_CLIST* parts); + + // Finds and returns the best candidate ColPartition to merge with part, + // selected from the candidates list, based on the minimum increase in + // pairwise overlap among all the partitions overlapped by the combined box. + // If overlap_increase is not NULL then it returns the increase in overlap + // that would result from the merge. + // See colpartitiongrid.cpp for a diagram. + ColPartition* BestMergeCandidate( + const ColPartition* part, ColPartition_CLIST* candidates, bool debug, + TessResultCallback2* confirm_cb, + int* overlap_increase); + + // Improves the margins of the ColPartitions in the grid by calling + // FindPartitionMargins on each. + void GridFindMargins(ColPartitionSet** best_columns); + + // Improves the margins of the ColPartitions in the list by calling + // FindPartitionMargins on each. + void ListFindMargins(ColPartitionSet** best_columns, + ColPartition_LIST* parts); + + // Finds and marks text partitions that represent figure captions. + void FindFigureCaptions(); + + //////// Functions that manipulate ColPartitions in the grid /////// + //////// to find chains of partner partitions of the same type. /////// + // For every ColPartition in the grid, finds its upper and lower neighbours. + void FindPartitionPartners(); + // Finds the best partner in the given direction for the given partition. + // Stores the result with AddPartner. + void FindPartitionPartners(bool upper, ColPartition* part); + // For every ColPartition with multiple partners in the grid, reduces the + // number of partners to 0 or 1. If get_desperate is true, goes to more + // desperate merge methods to merge flowing text before breaking partnerships. + void RefinePartitionPartners(bool get_desperate); + + private: + // Improves the margins of the ColPartition by searching for + // neighbours that vertically overlap significantly. + void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part); + + // Starting at x, and going in the specified direction, upto x_limit, finds + // the margin for the given y range by searching sideways, + // and ignoring not_this. + int FindMargin(int x, bool right_to_left, int x_limit, + int y_bottom, int y_top, const ColPartition* not_this); +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_COLPARTITIONGRID_H__ diff --git a/textord/colpartitionset.cpp b/textord/colpartitionset.cpp index b4cd2f6bb6..b3f5c017c9 100644 --- a/textord/colpartitionset.cpp +++ b/textord/colpartitionset.cpp @@ -25,6 +25,9 @@ namespace tesseract { +// Minimum width of a column to be interesting as a multiple of resolution. +const double kMinColumnWidth = 2.0 / 3; + ELISTIZE(ColPartitionSet) ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) { @@ -318,8 +321,8 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, while (!it2.at_last()) { it2.forward(); ColPartition* next_part = it2.data(); - if (next_part->blob_type() <= BRT_UNKNOWN) - continue; // Image partitions are irrelevant. + if (!BLOBNBOX::IsTextType(next_part->blob_type())) + continue; // Non-text partitions are irrelevant. int next_left = next_part->bounding_box().left(); if (next_left == right) { break; // They share the same edge, so one must be a pull-out. @@ -367,16 +370,42 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, return true; } +// Returns the total width of all blobs in the part_set that do not lie +// within an approved column. Used as a cost measure for using this +// column set over another that might be compatible. +int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) { + int total_width = 0; + ColPartition_IT it(&part_set->parts_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + if (!BLOBNBOX::IsTextType(part->blob_type())) { + continue; // Non-text partitions are irrelevant to column compatibility. + } + int y = part->MidY(); + BLOBNBOX_C_IT box_it(part->boxes()); + for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { + const TBOX& box = it.data()->bounding_box(); + // Assume that the whole blob is outside any column iff its x-middle + // is outside. + int x = (box.left() + box.right()) / 2; + ColPartition* col = ColumnContaining(x, y); + if (col == NULL) + total_width += box.width(); + } + } + return total_width; +} + // Return true if this ColPartitionSet makes a legal column candidate by // having legal individual partitions and non-overlapping adjacent pairs. bool ColPartitionSet::LegalColumnCandidate() { ColPartition_IT it(&parts_); if (it.empty()) return false; - int any_text_parts = false; + bool any_text_parts = false; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* part = it.data(); - if (part->blob_type() > BRT_UNKNOWN) { + if (BLOBNBOX::IsTextType(part->blob_type())) { if (!part->IsLegal()) return false; // Individual partition is illegal. any_text_parts = true; @@ -398,7 +427,7 @@ ColPartitionSet* ColPartitionSet::Copy(bool good_only) { ColPartition_IT dest_it(©_parts); for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { ColPartition* part = src_it.data(); - if (part->blob_type() > BRT_UNKNOWN && + if (BLOBNBOX::IsTextType(part->blob_type()) && (!good_only || part->good_width() || part->good_column())) dest_it.add_after_then_move(part->ShallowCopy()); } @@ -434,23 +463,24 @@ void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, } } -// Return the PolyBlockType that best explains the columns overlapped +// Return the ColumnSpanningType that best explains the columns overlapped // by the given coords(left,right,y), with the given margins. // Also return the first and last column index touched by the coords and -// the leftmost and rightmost spanned columns. -// Column indices are 2n + 1 for real colums (0 based) and even values +// the leftmost spanned column. +// Column indices are 2n + 1 for real columns (0 based) and even values // represent the gaps in between columns, with 0 being left of the leftmost. -PolyBlockType ColPartitionSet::SpanningType(BlobRegionType type, - int left, int right, int y, - int left_margin, int right_margin, - int* first_col, int* last_col, - int* first_spanned_col, - int* last_spanned_col) { +// resolution refers to the ppi resolution of the image. +ColumnSpanningType ColPartitionSet::SpanningType(int resolution, + int left, int right, int y, + int left_margin, + int right_margin, + int* first_col, + int* last_col, + int* first_spanned_col) { *first_col = -1; *last_col = -1; *first_spanned_col = -1; - *last_spanned_col = -1; - int columns_spanned = 0; + int margin_columns = 0; ColPartition_IT it(&parts_); int col_index = 1; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) { @@ -464,19 +494,12 @@ PolyBlockType ColPartitionSet::SpanningType(BlobRegionType type, if (part->ColumnContains(right, y)) { // Both within a single column. *last_col = col_index; - if (type == BRT_HLINE) - return PT_FLOWING_LINE; - else if (type > BRT_UNKNOWN) - return type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_FLOWING_TEXT; - else - return PT_FLOWING_IMAGE; + return CST_FLOWING; } if (left_margin <= part->LeftAtY(y)) { // It completely spans this column. - *last_col = col_index; *first_spanned_col = col_index; - *last_spanned_col = col_index; - columns_spanned = 1; + margin_columns = 1; } } else if (part->ColumnContains(right, y)) { if (*first_col < 0) { @@ -485,23 +508,22 @@ PolyBlockType ColPartitionSet::SpanningType(BlobRegionType type, } if (right_margin >= part->RightAtY(y)) { // It completely spans this column. - if (columns_spanned == 0) + if (margin_columns == 0) *first_spanned_col = col_index; - *last_spanned_col = col_index; - ++columns_spanned; + ++margin_columns; } *last_col = col_index; break; } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) { // Neither left nor right are contained within, so it spans this // column. - if (columns_spanned == 0) { - *first_col = col_index; - *first_spanned_col = col_index; + if (*first_col < 0) { + // It started in between the previous column and the current column. + *first_col = col_index - 1; } + if (margin_columns == 0) + *first_spanned_col = col_index; *last_col = col_index; - *last_spanned_col = col_index; - ++columns_spanned; } else if (right < part->LeftAtY(y)) { // We have gone past the end. *last_col = col_index - 1; @@ -518,26 +540,21 @@ PolyBlockType ColPartitionSet::SpanningType(BlobRegionType type, *last_col = col_index - 1; // The last in-between. ASSERT_HOST(*first_col >= 0 && *last_col >= 0); ASSERT_HOST(*first_col <= *last_col); - if (columns_spanned == 0 && *first_col == *last_col) { + if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) { // Neither end was in a column, and it didn't span any, so it lies // entirely between columns, therefore noise. - return PT_NOISE; - } else if (columns_spanned <= 1) { - // It is a pullout, as left and right were not in the same column. - if (type == BRT_HLINE) - return PT_PULLOUT_LINE; - else if (type > BRT_UNKNOWN) - return type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_PULLOUT_TEXT; - else - return PT_PULLOUT_IMAGE; + return CST_NOISE; + } else if (margin_columns <= 1) { + // An exception for headings that stick outside of single-column text. + if (margin_columns == 1 && parts_.singleton()) { + return CST_HEADING; + } + // It is a pullout, as left and right were not in the same column, but + // it doesn't go to the edge of its start and end. + return CST_PULLOUT; } - // It completely spanned more than one column. Always a heading. - if (type == BRT_HLINE) - return PT_HEADING_LINE; - else if (type > BRT_UNKNOWN) - return type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_HEADING_TEXT; - else - return PT_HEADING_IMAGE; + // Its margins went to the edges of first and last columns => heading. + return CST_HEADING; } // The column_set has changed. Close down all in-progress WorkingPartSets in diff --git a/textord/colpartitionset.h b/textord/colpartitionset.h index 668a58f76a..2f9b356fe3 100644 --- a/textord/colpartitionset.h +++ b/textord/colpartitionset.h @@ -85,6 +85,11 @@ class ColPartitionSet : public ELIST_LINK { // in this. bool CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback* cb); + // Returns the total width of all blobs in the part_set that do not lie + // within an approved column. Used as a cost measure for using this + // column set over another that might be compatible. + int UnmatchedWidth(ColPartitionSet* part_set); + // Return true if this ColPartitionSet makes a legal column candidate by // having legal individual partitions and non-overlapping adjacent pairs. bool LegalColumnCandidate(); @@ -95,16 +100,19 @@ class ColPartitionSet : public ELIST_LINK { // Display the edges of the columns at the given y coords. void DisplayColumnEdges(int y_bottom, int y_top, ScrollView* win); - // Return the PolyBlockType that best explains the columns overlapped + // Return the ColumnSpanningType that best explains the columns overlapped // by the given coords(left,right,y), with the given margins. // Also return the first and last column index touched by the coords and - // the leftmost and rightmost spanned columns. + // the leftmost spanned column. // Column indices are 2n + 1 for real colums (0 based) and even values // represent the gaps in between columns, with 0 being left of the leftmost. - PolyBlockType SpanningType(BlobRegionType type, int left, int right, int y, - int left_margin, int right_margin, - int* first_col, int* last_col, - int* first_spanned_col, int* last_spanned_col); + // resolution refers to the ppi resolution of the image. It may be 0 if only + // the first_col and last_col are required. + ColumnSpanningType SpanningType(int resolution, + int left, int right, int y, + int left_margin, int right_margin, + int* first_col, int* last_col, + int* first_spanned_col); // The column_set has changed. Close down all in-progress WorkingPartSets in // columns that do not match and start new ones for the new columns in this. diff --git a/textord/drawedg.cpp b/textord/drawedg.cpp index 15f533c553..06b6557564 100644 --- a/textord/drawedg.cpp +++ b/textord/drawedg.cpp @@ -32,8 +32,6 @@ #define IMAGE_XPOS 250 /** default position */ #define IMAGE_YPOS 0 -/** control D */ -#define CTRLD '\004' #define EXTERN diff --git a/textord/drawtord.cpp b/textord/drawtord.cpp index 2b2ccc5a47..9a82a0ce1a 100644 --- a/textord/drawtord.cpp +++ b/textord/drawtord.cpp @@ -32,22 +32,13 @@ #define TO_WIN_YPOS 0 #define TO_WIN_NAME "Textord" //title of window -#define DEBUG_WIN_NAME "TODebug" -#define DEBUG_XPOS 0 -#define DEBUG_YPOS 120 -#define DEBUG_XSIZE 80 -#define DEBUG_YSIZE 32 -#define YMAX 3508 -#define XMAX 2550 #define EXTERN EXTERN BOOL_VAR (textord_show_fixed_cuts, FALSE, "Draw fixed pitch cell boundaries"); -EXTERN STRING_VAR (to_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); EXTERN ScrollView* to_win = NULL; -EXTERN FILE *to_debug = NULL; /********************************************************************** * create_to_win @@ -71,21 +62,6 @@ void close_to_win() { } -/********************************************************************** - * create_todebug_win - * - * Create the to window used to show the fit. - **********************************************************************/ - -void create_todebug_win() { //make gradients win - if (strcmp (to_debugfile.string (), DEBUG_WIN_NAME) != 0) - // create_debug_window(); - // else - to_debug = fopen (to_debugfile.string (), "w"); -} - - - /********************************************************************** * plot_box_list * diff --git a/textord/drawtord.h b/textord/drawtord.h index 1759e6c6b0..e46428d680 100644 --- a/textord/drawtord.h +++ b/textord/drawtord.h @@ -20,7 +20,7 @@ #ifndef DRAWTORD_H #define DRAWTORD_H -#include "varable.h" +#include "params.h" #include "scrollview.h" #include "pitsync1.h" #include "blobbox.h" diff --git a/textord/edgblob.cpp b/textord/edgblob.cpp index 9e29968a92..27cb11e591 100644 --- a/textord/edgblob.cpp +++ b/textord/edgblob.cpp @@ -333,27 +333,18 @@ void OL_BUCKETS::extract_children( // recursive count * Run the edge detector over the block and return a list of blobs. */ -void extract_edges( // find blobs -#ifndef GRAPHICS_DISABLED - ScrollView* window, // window for output -#endif - IMAGE *image, // image to scan - IMAGE *t_image, // thresholded image - ICOORD page_tr, // corner of page - BLOCK *block // block to scan - ) { - ICOORD bleft; // block box - ICOORD tright; +void extract_edges(Pix* pix, // thresholded image + BLOCK *block) { // block to scan C_OUTLINE_LIST outlines; // outlines in block - // iterator C_OUTLINE_IT out_it = &outlines; -#ifndef GRAPHICS_DISABLED - get_outlines(window, image, t_image, page_tr, (PDBLK *) block, &out_it); -#else - get_outlines(image, t_image, page_tr, (PDBLK *) block, &out_it); -#endif - // block box + // TODO(rays) move the pix all the way down to the bottom. + IMAGE image; + image.FromPix(pix); + + block_edges(&image, block, &out_it); + ICOORD bleft; // block box + ICOORD tright; block->bounding_box(bleft, tright); // make blobs outlines_to_blobs(block, bleft, tright, &outlines); diff --git a/textord/edgblob.h b/textord/edgblob.h index 9a1534608e..92f8fcf402 100644 --- a/textord/edgblob.h +++ b/textord/edgblob.h @@ -21,7 +21,7 @@ #define EDGBLOB_H #include "scrollview.h" -#include "varable.h" +#include "params.h" #include "img.h" #include "ocrblock.h" #include "coutln.h" @@ -74,15 +74,8 @@ class OL_BUCKETS inT32 index; //for extraction scan }; -void extract_edges( //find blobs -#ifndef GRAPHICS_DISABLED - ScrollView* window, //window for output -#endif - IMAGE *image, //image to scan - IMAGE *t_image, //thresholded image - ICOORD page_tr, //corner of page - BLOCK *block //block to scan - ); +void extract_edges(Pix* pix, // thresholded image + BLOCK* block); // block to scan void outlines_to_blobs( //find blobs BLOCK *block, //block to scan ICOORD bleft, //block box //outlines in block diff --git a/textord/edgloop.cpp b/textord/edgloop.cpp index 9eea3cf529..476b187133 100644 --- a/textord/edgloop.cpp +++ b/textord/edgloop.cpp @@ -27,62 +27,18 @@ #include "config_auto.h" #endif -#define MINEDGELENGTH 8 //min decent length - -#define EXTERN - -EXTERN double_VAR (edges_threshold_greyfraction, 0.07, -"Min edge diff for grad vector"); -EXTERN BOOL_VAR (edges_show_paths, FALSE, "Draw raw outlines"); -EXTERN BOOL_VAR (edges_show_needles, FALSE, "Draw edge needles"); -EXTERN INT_VAR (edges_maxedgelength, 16000, "Max steps in any outline"); - -#ifndef GRAPHICS_DISABLED -static ScrollView* edge_win; //window -#endif -static C_OUTLINE_IT *outline_it; //iterator -static int short_edges; //no of short ones -static int long_edges; //no of long ones - -/********************************************************************** - * get_outlines - * - * Run the edge detector over the block and return a list of outlines. - **********************************************************************/ - -DLLSYM void get_outlines( //edge detect -#ifndef GRAPHICS_DISABLED - ScrollView* window, //window for output -#endif - IMAGE *image, //image to scan - IMAGE *t_image, //thresholded image - ICOORD page_tr, //corner of page - PDBLK *block, //block to scan - C_OUTLINE_IT *out_it //output iterator - ) { -#ifndef GRAPHICS_DISABLED - edge_win = window; //set statics -#endif - outline_it = out_it; - block_edges(t_image, block, page_tr); - out_it->move_to_first (); -#ifndef GRAPHICS_DISABLED - if (window != NULL) -// overlap_picture_ops(TRUE); //update window - ScrollView::Update(); -#endif -} +#define MINEDGELENGTH 8 // min decent length +INT_VAR(edges_maxedgelength, 16000, "Max steps in any outline"); /********************************************************************** * complete_edge * - * Complete the edge by cleaning it up andapproximating it. + * Complete the edge by cleaning it up. **********************************************************************/ -void complete_edge( //clean and approximate - CRACKEDGE *start //start of loop - ) { +void complete_edge(CRACKEDGE *start, //start of loop + C_OUTLINE_IT* outline_it) { ScrollView::Color colour; //colour to draw in inT16 looplength; //steps in loop ICOORD botleft; //bounding box @@ -91,12 +47,6 @@ void complete_edge( //clean and approximate //check length etc. colour = check_path_legal (start); -#ifndef GRAPHICS_DISABLED - if (edges_show_paths) { - //in red - draw_raw_edge(edge_win, start, colour); - } -#endif if (colour == ScrollView::RED || colour == ScrollView::BLUE) { looplength = loop_bounding_box (start, botleft, topright); @@ -150,14 +100,10 @@ ScrollView::Color check_path_legal( //certify outline if ((chainsum != 4 && chainsum != -4) || edgept != start || length < MINEDGELENGTH) { if (edgept != start) { - long_edges++; - return ScrollView::YELLOW; - } - else if (length < MINEDGELENGTH) { - short_edges++; - return ScrollView::MAGENTA; - } - else { + return ScrollView::YELLOW; + } else if (length < MINEDGELENGTH) { + return ScrollView::MAGENTA; + } else { ED_ILLEGAL_SUM.error ("check_path_legal", TESSLOG, "chainsum=%d", chainsum); return ScrollView::GREEN; diff --git a/textord/edgloop.h b/textord/edgloop.h index bd58b8effb..0cceef1f51 100644 --- a/textord/edgloop.h +++ b/textord/edgloop.h @@ -21,7 +21,7 @@ #define EDGLOOP_H #include "scrollview.h" -#include "varable.h" +#include "params.h" #include "img.h" #include "pdblock.h" #include "coutln.h" @@ -43,19 +43,8 @@ extern double_VAR_H (edges_childarea, 0.5, "Max area fraction of child outline"); extern double_VAR_H (edges_boxarea, 0.8, "Min area fraction of grandchild for box"); -DLLSYM void get_outlines( //edge detect -#ifndef GRAPHICS_DISABLED - ScrollView* window, //window for output -#endif - IMAGE *image, //image to scan - IMAGE *t_image, //thresholded image - ICOORD page_tr, //corner of page - PDBLK *block, //block to scan - C_OUTLINE_IT *out_it //output iterator - ); -void complete_edge( //clean and approximate - CRACKEDGE *start //start of loop - ); +void complete_edge(CRACKEDGE *start, //start of loop + C_OUTLINE_IT* outline_it); ScrollView::Color check_path_legal( //certify outline CRACKEDGE *start //start of loop ); diff --git a/textord/fpchop.cpp b/textord/fpchop.cpp index a47ebe28d5..e96a313c7a 100644 --- a/textord/fpchop.cpp +++ b/textord/fpchop.cpp @@ -23,7 +23,6 @@ #endif #include "stderr.h" #include "blobbox.h" -#include "lmedsq.h" #include "statistc.h" #include "drawtord.h" #include "tovars.h" diff --git a/textord/fpchop.h b/textord/fpchop.h index d7647b0c52..f8be623430 100644 --- a/textord/fpchop.h +++ b/textord/fpchop.h @@ -20,7 +20,7 @@ #ifndef FPCHOP_H #define FPCHOP_H -#include "varable.h" +#include "params.h" #include "blobbox.h" #include "notdll.h" #include "notdll.h" diff --git a/textord/imagefind.cpp b/textord/imagefind.cpp index 2faaae8f79..15d9cb5bea 100644 --- a/textord/imagefind.cpp +++ b/textord/imagefind.cpp @@ -23,16 +23,14 @@ #endif #include "imagefind.h" -#include "varable.h" +#include "params.h" // This entire file is dependent upon leptonica. If you don't have it, // you don't get this functionality. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif -#ifdef HAVE_LIBLEPT #include "allheaders.h" -#endif BOOL_VAR(textord_tabfind_show_images, false, "Show image blobs"); @@ -57,7 +55,6 @@ void ImageFinder::FindImages(Pix* pix, Boxa** boxa, Pixa** pixa) { *boxa = NULL; *pixa = NULL; -#ifdef HAVE_LIBLEPT if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) return; // Not worth looking at small images. @@ -139,10 +136,8 @@ void ImageFinder::FindImages(Pix* pix, Boxa** boxa, Pixa** pixa) { img_pix, 0, 0); pixDestroy(&img_pix); } -#endif } -#ifdef HAVE_LIBLEPT // Scans horizontally on x=[x_start,x_end), starting with y=*y_start, // stepping y+=y_step, until y=y_end. *ystart is input/output. // If the number of black pixels in a row, pix_count fits this pattern: @@ -205,7 +200,6 @@ static bool VScanForEdge(uinT32* data, int wpl, int y_start, int y_end, } return false; // Never found max_count. } -#endif // Returns true if there is a rectangle in the source pix, such that all // pixel rows and column slices outside of it have less than @@ -221,7 +215,6 @@ bool ImageFinder::pixNearlyRectangular(Pix* pix, double max_skew_gradient, int* x_start, int* y_start, int* x_end, int* y_end) { -#ifdef HAVE_LIBLEPT *x_start = 0; *x_end = pixGetWidth(pix); *y_start = 0; @@ -280,12 +273,8 @@ bool ImageFinder::pixNearlyRectangular(Pix* pix, // All edges must satisfy the condition of sharp gradient in pixel density // in order for the full rectangle to be present. return left_done && right_done && top_done && bottom_done; -#else - return false; -#endif } -#ifdef HAVE_LIBLEPT // Scanning rows horizontally on x=[x_start, x_end), returns the first y row // starting at y_start, stepping by y_step to y_end in which there is // any black pixel. @@ -315,14 +304,12 @@ static int VScanForBlack(uinT32* data, int wpl, int x_start, int x_end, } return x_end; } -#endif // Given an input pix, and a bounding rectangle, the sides of the rectangle // are shrunk inwards until they bound any black pixels found within the // original rectangle. void ImageFinder::BoundsWithinRect(Pix* pix, int* x_start, int* y_start, int* x_end, int* y_end) { -#ifdef HAVE_LIBLEPT // This can probably be done with a lot less code using pixClipRect and // pixConnComp, but this code is probably a lot faster, given that most // uses will be applied to a solid black region. @@ -332,6 +319,8 @@ void ImageFinder::BoundsWithinRect(Pix* pix, int* x_start, int* y_start, if (*x_end > width) *x_end = width; if (*y_start < 0) *y_start = 0; if (*y_end > height) *y_end = height; + if (*y_end <= *y_start || *x_end <= *x_start) + return; // Nothing to do. uinT32* data = pixGetData(pix); int wpl = pixGetWpl(pix); @@ -343,7 +332,6 @@ void ImageFinder::BoundsWithinRect(Pix* pix, int* x_start, int* y_start, *x_start = VScanForBlack(data, wpl, *x_start, *x_end, *y_start, *y_end, 1); *x_end = VScanForBlack(data, wpl, *x_end - 1, *x_start - 1, *y_start, *y_end, -1) + 1; -#endif } } // namespace tesseract. diff --git a/textord/linefind.cpp b/textord/linefind.cpp index 265ae21d4b..c9f2ae3456 100644 --- a/textord/linefind.cpp +++ b/textord/linefind.cpp @@ -32,9 +32,7 @@ #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif -#ifdef HAVE_LIBLEPT #include "allheaders.h" -#endif BOOL_VAR(textord_tabfind_show_vlines, false, "Show vertical rule lines"); @@ -59,7 +57,6 @@ const int kLineFindGridSize = 50; void LineFinder::FindVerticalLines(int resolution, Pix* pix, int* vertical_x, int* vertical_y, TabVector_LIST* vectors) { -#ifdef HAVE_LIBLEPT Pix* line_pix; Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; @@ -101,7 +98,6 @@ void LineFinder::FindVerticalLines(int resolution, Pix* pix, TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); } pixDestroy(&line_pix); -#endif } // Finds horizontal line objects in the given pix. @@ -111,7 +107,6 @@ void LineFinder::FindVerticalLines(int resolution, Pix* pix, // having no boxes, as there is no need to refit or merge separator lines. void LineFinder::FindHorizontalLines(int resolution, Pix* pix, TabVector_LIST* vectors) { -#ifdef HAVE_LIBLEPT Pix* line_pix; Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; @@ -162,7 +157,6 @@ void LineFinder::FindHorizontalLines(int resolution, Pix* pix, } } pixDestroy(&line_pix); -#endif } // Converts the Boxa array to a list of C_BLOB, getting rid of severely @@ -172,7 +166,6 @@ void LineFinder::FindHorizontalLines(int resolution, Pix* pix, // bounding boxes. The Boxa is consumed and destroyed. void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, Boxa** boxes, C_BLOB_LIST* blobs) { -#ifdef HAVE_LIBLEPT C_OUTLINE_LIST outlines; C_OUTLINE_IT ol_it = &outlines; // Iterate the boxes to convert to outlines. @@ -202,7 +195,6 @@ void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, blob_it.add_list_after(block.blob_list()); // The boxes aren't needed any more. boxaDestroy(boxes); -#endif } // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright @@ -269,7 +261,6 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, // The input resolution overrides any resolution set in src_pix. // The output line_pix contains just all the detected lines. Boxa* LineFinder::GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) { -#ifdef HAVE_LIBLEPT // Remove any parts of 1 inch/kThinLineFraction wide or more, by opening // away the thin lines and subtracting what's left. // This is very generous and will leave in even quite wide lines. @@ -294,9 +285,6 @@ Boxa* LineFinder::GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) { Boxa* boxa = pixConnComp(pixt1, NULL, 8); *line_pix = pixt1; return boxa; -#else - return NULL; -#endif } // Get a set of bounding boxes of possible horizontal lines in the image. @@ -307,7 +295,6 @@ Boxa* LineFinder::GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) { // This transformation allows a simple x/y flip to reverse it in tesseract // coordinates and it is faster to flip the lines than rotate the image. Boxa* LineFinder::GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) { -#ifdef HAVE_LIBLEPT // Remove any parts of 1 inch/kThinLineFraction high or more, by opening // away the thin lines and subtracting what's left. // This is very generous and will leave in even quite wide lines. @@ -345,9 +332,6 @@ Boxa* LineFinder::GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) { boxaReplaceBox(boxa, i, box); } return boxa; -#else - return NULL; -#endif } } // namespace tesseract. diff --git a/textord/makerow.cpp b/textord/makerow.cpp index ecb7e9f2ae..fdf9f0d44d 100644 --- a/textord/makerow.cpp +++ b/textord/makerow.cpp @@ -23,17 +23,18 @@ #endif #include "stderr.h" #include "blobbox.h" -#include "lmedsq.h" +#include "ccstruct.h" +#include "detlinefit.h" #include "statistc.h" #include "drawtord.h" #include "blkocc.h" #include "sortflts.h" #include "oldbasel.h" +#include "textord.h" #include "tordmain.h" #include "underlin.h" #include "makerow.h" #include "tprintf.h" -#include "tesseractclass.h" #include "tovars.h" // Include automatically generated configuration file if running autoconf. @@ -50,12 +51,10 @@ BOOL_VAR(textord_show_final_blobs, FALSE, "Display blob bounds after pre-ass"); BOOL_VAR(textord_test_landscape, FALSE, "Tests refer to land/port"); BOOL_VAR(textord_parallel_baselines, TRUE, "Force parallel baselines"); BOOL_VAR(textord_straight_baselines, FALSE, "Force straight baselines"); -BOOL_VAR(textord_quadratic_baselines, FALSE, "Use quadratic splines"); BOOL_VAR(textord_old_baselines, TRUE, "Use old baseline algorithm"); BOOL_VAR(textord_old_xheight, FALSE, "Use old xheight algorithm"); BOOL_VAR(textord_fix_xheight_bug, TRUE, "Use spline baseline"); BOOL_VAR(textord_fix_makerow_bug, TRUE, "Prevent multiple baselines"); -BOOL_VAR(textord_cblob_blockocc, TRUE, "Use new projection for underlines"); BOOL_VAR(textord_debug_xheights, FALSE, "Test xheight algorithms"); BOOL_VAR(textord_biased_skewcalc, TRUE, "Bias skew estimates with line length"); BOOL_VAR(textord_interpolating_skew, TRUE, "Interpolate across gaps"); @@ -81,9 +80,6 @@ double_VAR(textord_chop_width, 1.5, "Max width before chopping"); double_VAR(textord_expansion_factor, 1.0, "Factor to expand rows by in expand_rows"); double_VAR(textord_overlap_x, 0.5, "Fraction of linespace for good overlap"); -double_VAR(textord_merge_desc, 0.25, "Fraction of linespace for desc drop"); -double_VAR(textord_merge_x, 0.5, "Fraction of linespace for x height"); -double_VAR(textord_merge_asc, 0.25, "Fraction of linespace for asc height"); double_VAR(textord_minxh, 0.25, "fraction of linesize for min xheight"); double_VAR(textord_min_linesize, 1.25, "* blob height for initial linesize"); double_VAR(textord_excess_blobsize, 1.3, @@ -98,21 +94,77 @@ double_VAR(textord_ascheight_mode_fraction, 0.08, "Min pile height to make ascheight"); double_VAR(textord_descheight_mode_fraction, 0.08, "Min pile height to make descheight"); -double_VAR(textord_ascx_ratio_min, 1.3, "Min cap/xheight"); +double_VAR(textord_ascx_ratio_min, 1.25, "Min cap/xheight"); double_VAR(textord_ascx_ratio_max, 1.8, "Max cap/xheight"); double_VAR(textord_descx_ratio_min, 0.25, "Min desc/xheight"); double_VAR(textord_descx_ratio_max, 0.6, "Max desc/xheight"); double_VAR(textord_xheight_error_margin, 0.1, "Accepted variation"); +INT_VAR(textord_lms_line_trials, 12, "Number of linew fits to do"); +BOOL_VAR(textord_new_initial_xheight, TRUE, "Use test xheight mechanism"); #define MAX_HEIGHT_MODES 12 +const int kMinLeaderCount = 5; + +// Factored-out helper to build a single row from a list of blobs. +// Returns the mean blob size. +static float MakeRowFromBlobs(float line_size, + BLOBNBOX_IT* blob_it, TO_ROW_IT* row_it) { + blob_it->sort(blob_x_order); + blob_it->move_to_first(); + TO_ROW* row = NULL; + float total_size = 0.0f; + int blob_count = 0; + // Add all the blobs to a single TO_ROW. + for (; !blob_it->empty(); blob_it->forward()) { + BLOBNBOX* blob = blob_it->extract(); + int top = blob->bounding_box().top(); + int bottom = blob->bounding_box().bottom(); + if (row == NULL) { + row = new TO_ROW(blob, top, bottom, line_size); + row_it->add_before_then_move(row); + } else { + row->add_blob(blob, top, bottom, line_size); + } + total_size += top - bottom; + ++blob_count; + } + return blob_count > 0 ? total_size / blob_count : total_size; +} + +// Helper to make a row using the children of a single blob. +// Returns the mean size of the blobs created. +float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob, TO_ROW_IT* row_it) { + // The blobs made from the children will go in the small_blobs list. + BLOBNBOX_IT bb_it(&block->small_blobs); + C_OUTLINE_IT ol_it(blob->out_list()); + // Get the children. + ol_it.set_to_list(ol_it.data()->child()); + if (ol_it.empty()) + return 0.0f; + for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { + // Deep copy the child outline and use that to make a blob. + C_OUTLINE* outline = C_OUTLINE::deep_copy(ol_it.data()); + // The constructor from a list of outlines corrects the direction. + C_OUTLINE_LIST outlines; + C_OUTLINE_IT ol_it(&outlines); + ol_it.add_after_then_move(outline); + C_BLOB* blob = new C_BLOB(&outlines); + BLOBNBOX* bbox = new BLOBNBOX(blob); + bb_it.add_after_then_move(bbox); + } + // Now we can make a row from the blobs. + return MakeRowFromBlobs(block->line_size, &bb_it, row_it); +} + /** * @name make_single_row * - * Arrange the blobs into a single row. + * Arrange the blobs into a single row... well actually, if there is + * only a single blob, it makes 2 rows, in case the top-level blob + * is a container of the real blobs to recognize. */ -float make_single_row(ICOORD page_tr, TO_BLOCK* block, TO_BLOCK_LIST* blocks, - tesseract::Tesseract* tess) { +float make_single_row(ICOORD page_tr, TO_BLOCK* block, TO_BLOCK_LIST* blocks) { BLOBNBOX_IT blob_it = &block->blobs; TO_ROW_IT row_it = block->get_rows(); @@ -120,35 +172,20 @@ float make_single_row(ICOORD page_tr, TO_BLOCK* block, TO_BLOCK_LIST* blocks, blob_it.add_list_after(&block->small_blobs); blob_it.add_list_after(&block->noise_blobs); blob_it.add_list_after(&block->large_blobs); - blob_it.sort(blob_x_order); - blob_it.move_to_first(); - TO_ROW* row = NULL; - // Add all the blobs to a single TO_ROW. - for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { - BLOBNBOX* blob = blob_it.extract(); - int top = blob->bounding_box().top(); - int bottom = blob->bounding_box().bottom(); - if (row == NULL) { - row = new TO_ROW(blob, top, bottom, block->line_size); - row_it.add_before_then_move(row); - } else { - row->add_blob(blob, top, bottom, block->line_size); - } - } - // Fit an LMS line to the row. + if (block->blobs.singleton()) { + blob_it.move_to_first(); + float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it); + if (size > block->line_size) + block->line_size = size; + } + MakeRowFromBlobs(block->line_size, &blob_it, &row_it); + // Fit an LMS line to the rows. for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) fit_lms_line(row_it.data()); float gradient; float fit_error; // Compute the skew based on the fitted line. compute_page_skew(blocks, gradient, fit_error); - FCOORD rotation(1.0f, 0.0f); - // Associate i dots and other diacriticals with the appropriate blobs. - pre_associate_blobs(page_tr, block, rotation, false); - int block_edge = block->block->bounding_box().left(); - fit_parallel_rows(block, gradient, rotation, block_edge, false); - // Make the curved baselines and setup some key block members. - make_spline_rows(block, gradient, rotation, block_edge, false, tess); return gradient; } @@ -157,52 +194,39 @@ float make_single_row(ICOORD page_tr, TO_BLOCK* block, TO_BLOCK_LIST* blocks, * * Arrange the blobs into rows. */ -float make_rows( //make rows - ICOORD page_tr, //top right - BLOCK_LIST *blocks, //block list - TO_BLOCK_LIST *land_blocks, //rotated for landscape - TO_BLOCK_LIST *port_blocks, //output list - tesseract::Tesseract* tess - ) { - float port_m; //global skew - float port_err; //global noise - // float land_m; //global skew - // float land_err; //global noise - TO_BLOCK_IT block_it; //iterator - - //don't do landscape for now - // block_it.set_to_list(land_blocks); - // for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward()) - // make_initial_textrows(page_tr,block_it.data(),FCOORD(0,-1), - // (BOOL8)textord_test_landscape); - block_it.set_to_list (port_blocks); - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) - make_initial_textrows (page_tr, block_it.data (), FCOORD (1.0f, 0.0f), +float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { + float port_m; // global skew + float port_err; // global noise + TO_BLOCK_IT block_it; // iterator + + block_it.set_to_list(port_blocks); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) + make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f), !(BOOL8) textord_test_landscape); - //compute globally + // compute globally compute_page_skew(port_blocks, port_m, port_err); - // compute_page_skew(land_blocks,land_m,land_err); // compute globally - // tprintf("Portrait skew gradient=%g, error=%g.\n", - // port_m,port_err); - // tprintf("Landscape skew gradient=%g, error=%g.\n", - // land_m,land_err); - block_it.set_to_list (port_blocks); + block_it.set_to_list(port_blocks); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f), + block_it.data()->block->bounding_box().left(), + !(BOOL8)textord_test_landscape); + } + return port_m; // global skew +} + +namespace tesseract { + +void Textord::fit_rows(float gradient, ICOORD page_tr, TO_BLOCK_LIST *blocks) { + TO_BLOCK_IT block_it(blocks); // iterator for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - cleanup_rows(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f), + cleanup_rows_fitting(page_tr, block_it.data(), gradient, FCOORD(1.0f, 0.0f), block_it.data()->block->bounding_box().left(), - !(BOOL8)textord_test_landscape, tess); - } - block_it.set_to_list (land_blocks); - // for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward()) - // { - // cleanup_rows(page_tr,block_it.data(),land_m,FCOORD(0,-1), - // -block_it.data()->block->bounding_box().top(), - // (BOOL8)textord_test_landscape); - // } - return port_m; //global skew + !(BOOL8)textord_test_landscape); + } } +} // namespace tesseract. /** * @name make_initial_textrows @@ -249,21 +273,17 @@ void make_initial_textrows( //find lines * * Fit an LMS line to a row. */ -void fit_lms_line( //sort function - TO_ROW *row //row to fit - ) { - float m, c; //fitted line - TBOX box; //blob box - LMS lms (row->blob_list ()->length ()); - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); +void fit_lms_line(TO_ROW *row) { + float m, c; // fitted line + tesseract::DetLineFit lms; + BLOBNBOX_IT blob_it = row->blob_list(); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - box = blob_it.data ()->bounding_box (); - lms.add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ())); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + const TBOX& box = blob_it.data()->bounding_box(); + lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom())); } - lms.fit (m, c); - row->set_line (m, c, lms.error ()); + double error = lms.Fit(&m, &c); + row->set_line(m, c, error); } @@ -500,18 +520,17 @@ static void vigorous_noise_removal(TO_BLOCK* block) { } /** - * cleanup_rows + * cleanup_rows_making * * Remove overlapping rows and fit all the blobs to what's left. */ -void cleanup_rows( //find lines +void cleanup_rows_making( //find lines ICOORD page_tr, //top right TO_BLOCK *block, //block to do float gradient, //gradient to fit FCOORD rotation, //for drawing inT32 block_edge, //edge of block - BOOL8 testing_on, //correct orientation - tesseract::Tesseract* tess + BOOL8 testing_on //correct orientation ) { //iterators BLOBNBOX_IT blob_it = &block->blobs; @@ -551,10 +570,27 @@ void cleanup_rows( //find lines blob_it.add_list_after (&block->noise_blobs); blob_it.add_list_after (&block->small_blobs); assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE); - //no rows for noise - row_it.set_to_list (block->get_rows ()); - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) - row_it.data ()->blob_list ()->sort (blob_x_order); +} + +namespace tesseract { + +void Textord::cleanup_rows_fitting(ICOORD page_tr, // top right + TO_BLOCK *block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + inT32 block_edge, // edge of block + BOOL8 testing_on) { // correct orientation + BLOBNBOX_IT blob_it = &block->blobs; + TO_ROW_IT row_it = block->get_rows(); + +#ifndef GRAPHICS_DISABLED + if (textord_show_parallel_rows && testing_on) { + if (to_win == NULL) + create_to_win(page_tr); + } +#endif + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) + row_it.data()->blob_list()->sort(blob_x_order); fit_parallel_rows(block, gradient, rotation, block_edge, FALSE); if (textord_heavy_nr) { vigorous_noise_removal(block); @@ -575,8 +611,7 @@ void cleanup_rows( //find lines gradient, rotation, block_edge, - textord_show_final_rows && testing_on, - tess); + textord_show_final_rows && testing_on); // We only want to call compute_block_xheight() if // both textord_old_xheight and textord_old_baselines are false. // No need to call compute_block_xheight() if textord_old_baselines @@ -585,16 +620,24 @@ void cleanup_rows( //find lines // Note: it can not be the case that textord_old_baselines is // false, and textord_old_xheight is true. if (!textord_old_xheight && !textord_old_baselines) - compute_block_xheight(block, gradient, tess); + compute_block_xheight(block, gradient); if (textord_restore_underlines) // fix underlines restore_underlined_blobs(block); #ifndef GRAPHICS_DISABLED if (textord_show_final_rows && testing_on) { - plot_blob_list (to_win, &block->blobs, - ScrollView::MAGENTA, ScrollView::WHITE); + ScrollView::Color colour = ScrollView::RED; + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + plot_parallel_row(row_it.data(), gradient, + block_edge, colour, rotation); + colour = (ScrollView::Color) (colour + 1); + if (colour > ScrollView::MAGENTA) + colour = ScrollView::RED; + } + plot_blob_list(to_win, &block->blobs, + ScrollView::MAGENTA, ScrollView::WHITE); //show discarded blobs - plot_blob_list (to_win, &block->underlines, - ScrollView::YELLOW, ScrollView::CORAL); + plot_blob_list(to_win, &block->underlines, + ScrollView::YELLOW, ScrollView::CORAL); } if (textord_show_final_rows && testing_on && block->blobs.length () > 0) tprintf ("%d blobs discarded as noise\n", block->blobs.length ()); @@ -604,6 +647,7 @@ void cleanup_rows( //find lines #endif } +} // namespace tesseract. /** * delete_non_dropout_rows @@ -656,11 +700,11 @@ void delete_non_dropout_rows( //find lines compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas); compute_occupation_threshold ((inT32) ceil (block->line_spacing * - (textord_merge_desc + - textord_merge_asc)), + (tesseract::CCStruct::kDescenderFraction + + tesseract::CCStruct::kAscenderFraction)), (inT32) ceil (block->line_spacing * - (textord_merge_x + - textord_merge_asc)), + (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction)), max_y - min_y + 1, occupation, deltas); #ifndef GRAPHICS_DISABLED if (testing_on) { @@ -1055,9 +1099,10 @@ void expand_rows( //find lines y_max = row->max_y (); //get current limits y_min = row->min_y (); y_bottom = row->intercept () - block->line_size * textord_expansion_factor * - textord_merge_desc; + tesseract::CCStruct::kDescenderFraction; y_top = row->intercept () + block->line_size * textord_expansion_factor * - (textord_merge_x + textord_merge_asc); + (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction); if (y_min > y_bottom) { //expansion allowed if (textord_show_expanded_rows && testing_on) tprintf("Expanding bottom of row at %f from %f to %f\n", @@ -1184,9 +1229,12 @@ void adjust_row_limits( //tidy limits if (textord_show_expanded_rows) tprintf("Row at %f has min %f, max %f, size %f\n", row->intercept(), row->min_y(), row->max_y(), size); - size /= textord_merge_x + textord_merge_asc + textord_merge_desc; - ymax = size * (textord_merge_x + textord_merge_asc); - ymin = -size * textord_merge_desc; + size /= tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction + + tesseract::CCStruct::kDescenderFraction; + ymax = size * (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction); + ymin = -size * tesseract::CCStruct::kDescenderFraction; row->set_limits (row->intercept () + ymin, row->intercept () + ymax); row->merged = FALSE; } @@ -1312,11 +1360,13 @@ void compute_row_stats( //find lines * ROW_UNKNOWN and ROW_INVALID categories. * */ -void compute_block_xheight(TO_BLOCK *block, float gradient, - tesseract::Tesseract *tess) { +namespace tesseract { +void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) { TO_ROW *row; // current row - float asc_frac_xheight = textord_merge_asc / textord_merge_x; - float desc_frac_xheight = textord_merge_desc / textord_merge_x; + float asc_frac_xheight = CCStruct::kAscenderFraction / + CCStruct::kXHeightFraction; + float desc_frac_xheight = CCStruct::kDescenderFraction / + CCStruct::kXHeightFraction; inT32 min_height, max_height; // limits on xheight TO_ROW_IT row_it = block->get_rows(); if (row_it.empty()) return; // no rows @@ -1338,7 +1388,8 @@ void compute_block_xheight(TO_BLOCK *block, float gradient, row = row_it.data(); // Compute the xheight of this row if it has not been computed before. if (row->xheight <= 0.0) { - compute_row_xheight(row, gradient, block->line_size, tess); + compute_row_xheight(row, block->block->classify_rotation(), + gradient, block->line_size); } ROW_CATEGORY row_category = get_row_category(row); if (row_category == ROW_ASCENDERS_FOUND) { @@ -1381,13 +1432,14 @@ void compute_block_xheight(TO_BLOCK *block, float gradient, // likely all caps (or all small caps, in which case the code // still works as intended). compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights, + textord_single_height_mode && + block->block->classify_rotation().y() == 0.0, min_height, max_height, &(xheight), &(ascrise)); if (ascrise == 0) { // assume only caps in the whole block - xheight = row_cap_xheights.median() * textord_merge_x / - (textord_merge_x + textord_merge_asc); + xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio; } } else { // default block sizes - xheight = block->line_size * textord_merge_x; + xheight = block->line_size * CCStruct::kXHeightFraction; } // Correct xheight, ascrise and descdrop if necessary. bool corrected_xheight = false; @@ -1421,15 +1473,15 @@ void compute_block_xheight(TO_BLOCK *block, float gradient, * Set xheigh_evidence to the number of blobs with the chosen xheight * that appear in this row. */ -void compute_row_xheight(TO_ROW *row, // row to do - float gradient, // global skew - int block_line_size, - tesseract::Tesseract *tess) { +void Textord::compute_row_xheight(TO_ROW *row, // row to do + const FCOORD& rotation, + float gradient, // global skew + int block_line_size) { // Find blobs representing repeated characters in rows and mark them. // This information is used for computing row xheight and at a later // stage when words are formed by make_words. if (!row->rep_chars_marked()) { - mark_repeated_chars(row, block_line_size * textord_merge_x, tess); + mark_repeated_chars(row); } int min_height, max_height; @@ -1441,19 +1493,20 @@ void compute_row_xheight(TO_ROW *row, // row to do row->ascrise = 0.0f; row->xheight = 0.0f; row->xheight_evidence = - compute_xheight_from_modes(&heights, &floating_heights, min_height, - max_height, &(row->xheight), &(row->ascrise)); + compute_xheight_from_modes(&heights, &floating_heights, + textord_single_height_mode && + rotation.y() == 0.0, + min_height, max_height, + &(row->xheight), &(row->ascrise)); row->descdrop = 0.0f; if (row->xheight > 0.0) { row->descdrop = static_cast( compute_row_descdrop(row, gradient, row->xheight_evidence, &heights)); - } else { - // Since we could not find a meaningful xheight, the results - // of mark_repeated_chars() should be recomputed at a later stage. - row->clear_rep_chars_marked(); } } +} // namespace tesseract. + /** * @name fill_heights * @@ -1519,25 +1572,29 @@ void fill_heights(TO_ROW *row, float gradient, int min_height, * that sit far above the baseline could represent valid ascenders, but * it is highly unlikely that such a character's height will be an xheight * (e.g. -, ', =, ^, `, ", ', etc) + * If cap_only, then force finding of only the top mode. */ int compute_xheight_from_modes( - STATS *heights, STATS *floating_heights, int min_height, + STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise) { int blob_index = heights->mode(); // find mode int blob_count = heights->pile_count(blob_index); // get count of mode if (textord_debug_xheights) { - tprintf ("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n", - min_height, max_height, blob_index, blob_count, heights->get_total()); + tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n", + min_height, max_height, blob_index, blob_count, + heights->get_total()); heights->print(NULL, true); floating_heights->print(NULL, true); } if (blob_count == 0) return 0; - int modes[MAX_HEIGHT_MODES]; // biggest piles + int modes[MAX_HEIGHT_MODES]; // biggest piles bool in_best_pile = FALSE; int prev_size = -MAX_INT32; int best_count = 0; int mode_count = compute_height_modes(heights, min_height, max_height, modes, MAX_HEIGHT_MODES); + if (cap_only && mode_count > 1) + mode_count = 1; int x; if (textord_debug_xheights) { tprintf("found %d modes: ", mode_count); @@ -1635,7 +1692,7 @@ inT32 compute_row_descdrop(TO_ROW *row, float gradient, static_cast(floor(row->xheight * textord_descx_ratio_max)); float xcentre; // centre of blob float height; // height of blob - BLOBNBOX_IT blob_it = row->blob_list (); + BLOBNBOX_IT blob_it = row->blob_list(); BLOBNBOX *blob; // current blob STATS heights (min_height, max_height + 1); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { @@ -1673,54 +1730,50 @@ inT32 compute_row_descdrop(TO_ROW *row, float gradient, * Find the top maxmodes values in the input array and put their * indices in the output in the order in which they occurred. */ -inT32 compute_height_modes( //find lines - STATS *heights, //stats to search - inT32 min_height, //bottom of range - inT32 max_height, //top of range - inT32 *modes, //output array - inT32 maxmodes //size of modes - ) { - inT32 pile_count; //no in source pile - inT32 src_count; //no of source entries - inT32 src_index; //current entry - inT32 least_count; //height of smalllest - inT32 least_index; //index of least - inT32 dest_count; //index in modes +inT32 compute_height_modes(STATS *heights, // stats to search + inT32 min_height, // bottom of range + inT32 max_height, // top of range + inT32 *modes, // output array + inT32 maxmodes) { // size of modes + inT32 pile_count; // no in source pile + inT32 src_count; // no of source entries + inT32 src_index; // current entry + inT32 least_count; // height of smalllest + inT32 least_index; // index of least + inT32 dest_count; // index in modes src_count = max_height + 1 - min_height; dest_count = 0; least_count = MAX_INT32; least_index = -1; for (src_index = 0; src_index < src_count; src_index++) { - pile_count = heights->pile_count (min_height + src_index); + pile_count = heights->pile_count(min_height + src_index); if (pile_count > 0) { if (dest_count < maxmodes) { if (pile_count < least_count) { - //find smallest in array + // find smallest in array least_count = pile_count; least_index = dest_count; } modes[dest_count++] = min_height + src_index; - } - else if (pile_count >= least_count) { + } else if (pile_count >= least_count) { while (least_index < maxmodes - 1) { modes[least_index] = modes[least_index + 1]; - //shuffle up + // shuffle up least_index++; } - //new one on end + // new one on end modes[maxmodes - 1] = min_height + src_index; if (pile_count == least_count) { - //new smallest + // new smallest least_index = maxmodes - 1; - } - else { - least_count = heights->pile_count (modes[0]); + } else { + least_count = heights->pile_count(modes[0]); least_index = 0; for (dest_count = 1; dest_count < maxmodes; dest_count++) { - pile_count = heights->pile_count (modes[dest_count]); + pile_count = heights->pile_count(modes[dest_count]); if (pile_count < least_count) { - //find smallest + // find smallest least_count = pile_count; least_index = dest_count; } @@ -1759,29 +1812,28 @@ void correct_row_xheight(TO_ROW *row, float xheight, // -- the row does not have ascenders or descenders, but its xheight // is close to the average block xheight (e.g. row with "www.mmm.com") if (row_category == ROW_ASCENDERS_FOUND) { - if (row->descdrop >= 0.0) { + if (row->descdrop >= 0.0) { row->descdrop = row->xheight * (descdrop / xheight); } - } else if (row_category == ROW_INVALID || - (row_category == ROW_DESCENDERS_FOUND && - (normal_xheight || cap_xheight)) || + } else if (row_category == ROW_INVALID || + (row_category == ROW_DESCENDERS_FOUND && + (normal_xheight || cap_xheight)) || (row_category == ROW_UNKNOWN && normal_xheight)) { if (textord_debug_xheights) tprintf("using average xheight\n"); row->xheight = xheight; row->ascrise = ascrise; row->descdrop = descdrop; - } - // Assume this is a row with mostly lowercase letters and it's xheight - // is computed correctly (unfortunately there is no way to distinguish - // this from the case when descenders are found, but the most common - // height is capheight). - else if (row_category == ROW_DESCENDERS_FOUND) { + } else if (row_category == ROW_DESCENDERS_FOUND) { + // Assume this is a row with mostly lowercase letters and it's xheight + // is computed correctly (unfortunately there is no way to distinguish + // this from the case when descenders are found, but the most common + // height is capheight). if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n"); row->ascrise = row->xheight * (ascrise / xheight); - } + } else if (row_category == ROW_UNKNOWN) { // Otherwise assume this row is an all-caps or small-caps row // and adjust xheight and ascrise of the row. - else if (row_category == ROW_UNKNOWN) { + row->all_caps = true; if (cap_xheight) { // regular all caps if (textord_debug_xheights) tprintf("all caps\n"); @@ -1827,101 +1879,68 @@ static int CountOverlaps(const TBOX& box, int min_height, * Test wide objects for being potential underlines. If they are then * put them in a separate list in the block. */ -void separate_underlines( //make rough chars - TO_BLOCK *block, //block to do - float gradient, //skew angle - FCOORD rotation, //inverse landscape - BOOL8 testing_on //correct orientation - ) { - BLOBNBOX *blob; //current blob - PBLOB *poly_blob; //rotated blob - C_BLOB *rotated_blob; //rotated blob - TO_ROW *row; //current row - float length; //of g_vec +void separate_underlines(TO_BLOCK *block, // block to do + float gradient, // skew angle + FCOORD rotation, // inverse landscape + BOOL8 testing_on) { // correct orientation + BLOBNBOX *blob; // current blob + C_BLOB *rotated_blob; // rotated blob + TO_ROW *row; // current row + float length; // of g_vec TBOX blob_box; - FCOORD blob_rotation; //inverse of rotation - FCOORD g_vec; //skew rotation - BLOBNBOX_IT blob_it; //iterator - //iterator + FCOORD blob_rotation; // inverse of rotation + FCOORD g_vec; // skew rotation + BLOBNBOX_IT blob_it; // iterator + // iterator BLOBNBOX_IT under_it = &block->underlines; BLOBNBOX_IT large_it = &block->large_blobs; - TO_ROW_IT row_it = block->get_rows (); + TO_ROW_IT row_it = block->get_rows(); int min_blob_height = static_cast(textord_min_blob_height_fraction * block->line_size + 0.5); - //length of vector - length = sqrt (1 + gradient * gradient); - g_vec = FCOORD (1 / length, -gradient / length); - blob_rotation = FCOORD (rotation.x (), -rotation.y ()); - blob_rotation.rotate (g_vec); //unoding everything - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - //get blobs - blob_it.set_to_list (row->blob_list ()); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - blob = blob_it.data (); - blob_box = blob->bounding_box (); - if (blob_box.width () > block->line_size * textord_underline_width) { - if (textord_cblob_blockocc && blob->cblob () != NULL) { - rotated_blob = crotate_cblob (blob->cblob (), - blob_rotation); - if (test_underline(testing_on && textord_show_final_rows, - rotated_blob, static_cast(row->intercept()), - static_cast(block->line_size * - (textord_merge_x + - textord_merge_asc / 2.0f)))) { - under_it.add_after_then_move(blob_it.extract()); - if (testing_on && textord_show_final_rows) { - tprintf("Underlined blob at:"); - rotated_blob->bounding_box().print(); - tprintf("Was:"); - blob_box.print(); - } - } else if (CountOverlaps(blob->bounding_box(), min_blob_height, - row->blob_list()) > - textord_max_blob_overlaps) { - large_it.add_after_then_move(blob_it.extract()); - if (testing_on && textord_show_final_rows) { - tprintf("Large blob overlaps %d blobs at:", - CountOverlaps(blob_box, min_blob_height, - row->blob_list())); + // length of vector + length = sqrt(1 + gradient * gradient); + g_vec = FCOORD(1 / length, -gradient / length); + blob_rotation = FCOORD(rotation.x(), -rotation.y()); + blob_rotation.rotate(g_vec); // undoing everything + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + // get blobs + blob_it.set_to_list(row->blob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); + blob_it.forward()) { + blob = blob_it.data(); + blob_box = blob->bounding_box(); + if (blob_box.width() > block->line_size * textord_underline_width) { + ASSERT_HOST(blob->cblob() != NULL); + rotated_blob = crotate_cblob (blob->cblob(), + blob_rotation); + if (test_underline( + testing_on && textord_show_final_rows, + rotated_blob, static_cast(row->intercept()), + static_cast( + block->line_size * + (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction / 2.0f)))) { + under_it.add_after_then_move(blob_it.extract()); + if (testing_on && textord_show_final_rows) { + tprintf("Underlined blob at:"); + rotated_blob->bounding_box().print(); + tprintf("Was:"); blob_box.print(); - } - } - delete rotated_blob; - } - else { - if (blob->blob () != NULL) { - // if (testing_on && textord_show_final_rows) - // tprintf("Rotating by (%g,%g)\n", - // blob_rotation.x(),blob_rotation.y()); - poly_blob = rotate_blob (blob->blob (), blob_rotation); } - else - poly_blob = rotate_cblob (blob->cblob (), - block->line_size, - blob_rotation); - if (test_underline - (testing_on - && textord_show_final_rows, poly_blob, - row->intercept (), - block->line_size * (textord_merge_x + - textord_merge_asc / 2))) { - if (testing_on && textord_show_final_rows) { - tprintf ("Underlined blob at (%d,%d)->(%d,%d) ", - poly_blob->bounding_box ().left (), - poly_blob->bounding_box ().bottom (), - poly_blob->bounding_box ().right (), - poly_blob->bounding_box ().top ()); - tprintf ("(Was (%d,%d)->(%d,%d))\n", - blob_box.left (), blob_box.bottom (), - blob_box.right (), blob_box.top ()); - } - under_it.add_after_then_move (blob_it.extract ()); + } else if (CountOverlaps(blob->bounding_box(), min_blob_height, + row->blob_list()) > + textord_max_blob_overlaps) { + large_it.add_after_then_move(blob_it.extract()); + if (testing_on && textord_show_final_rows) { + tprintf("Large blob overlaps %d blobs at:", + CountOverlaps(blob_box, min_blob_height, + row->blob_list())); + blob_box.print(); } - delete poly_blob; } + delete rotated_blob; } } } @@ -1986,7 +2005,7 @@ void pre_associate_blobs( //make rough chars while (overlap); blob->chop (&start_it, &blob_it, blob_rotation, - block->line_size * textord_merge_x * + block->line_size * tesseract::CCStruct::kXHeightFraction * textord_chop_width); //attempt chop } @@ -2061,33 +2080,27 @@ void fit_parallel_rows( //find lines * Make the fit parallel to the given gradient and set the * row accordingly. */ -void fit_parallel_lms( //sort function - float gradient, //forced gradient - TO_ROW *row //row to fit - ) { - float c; //fitted line - int blobcount; //no of blobs - TBOX box; //blob box - LMS lms (row->blob_list ()->length ()); - //blobs - BLOBNBOX_IT blob_it = row->blob_list (); +void fit_parallel_lms(float gradient, TO_ROW *row) { + float c; // fitted line + int blobcount; // no of blobs + tesseract::DetLineFit lms; + BLOBNBOX_IT blob_it = row->blob_list(); blobcount = 0; - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - if (!blob_it.data ()->joined_to_prev ()) { - box = blob_it.data ()->bounding_box (); - lms. - add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ())); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + if (!blob_it.data()->joined_to_prev()) { + const TBOX& box = blob_it.data()->bounding_box(); + lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom())); blobcount++; } } - lms.constrained_fit (gradient, c); - row->set_parallel_line (gradient, c, lms.error ()); - if (textord_straight_baselines && blobcount > lms_line_trials) { - lms.fit (gradient, c); + double error = lms.ConstrainedFit(gradient, &c); + row->set_parallel_line(gradient, c, error); + if (textord_straight_baselines && blobcount > textord_lms_line_trials) { + error = lms.Fit(&gradient, &c); } //set the other too - row->set_line (gradient, c, lms.error ()); + row->set_line(gradient, c, error); } @@ -2096,14 +2109,12 @@ void fit_parallel_lms( //sort function * * Re-fit the rows in the block to the given gradient. */ -void make_spline_rows( //find lines - TO_BLOCK *block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - inT32 block_edge, //edge of block - BOOL8 testing_on, //correct orientation - tesseract::Tesseract* tess - ) { +namespace tesseract { +void Textord::make_spline_rows(TO_BLOCK *block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + inT32 block_edge, // edge of block + BOOL8 testing_on) { #ifndef GRAPHICS_DISABLED ScrollView::Color colour; //of row #endif @@ -2129,7 +2140,7 @@ void make_spline_rows( //find lines } } #endif - make_old_baselines(block, testing_on, gradient, tess); + make_old_baselines(block, testing_on, gradient); } #ifndef GRAPHICS_DISABLED if (testing_on) { @@ -2144,6 +2155,8 @@ void make_spline_rows( //find lines #endif } +} // namespace tesseract. + /** * @name make_baseline_spline @@ -2152,56 +2165,19 @@ void make_spline_rows( //find lines * Make the fit parallel to the given gradient and set the * row accordingly. */ -void make_baseline_spline( //sort function - TO_ROW *row, //row to fit - TO_BLOCK *block //block it came from - ) { - float b, c; //fitted curve - float middle; //x middle of blob - TBOX box; //blob box - LMS lms (row->blob_list ()->length ()); - //blobs +void make_baseline_spline(TO_ROW *row, //row to fit + TO_BLOCK *block) { BLOBNBOX_IT blob_it = row->blob_list (); - inT32 *xstarts; //spline boundaries - double *coeffs; //quadratic coeffs - inT32 segments; //no of segments - inT32 segment; //current segment + inT32 *xstarts; // spline boundaries + double *coeffs; // quadratic coeffs + inT32 segments; // no of segments xstarts = - (inT32 *) alloc_mem ((row->blob_list ()->length () + 1) * sizeof (inT32)); - if (segment_baseline (row, block, segments, xstarts) + (inT32 *) alloc_mem((row->blob_list()->length() + 1) * sizeof(inT32)); + if (segment_baseline(row, block, segments, xstarts) && !textord_straight_baselines && !textord_parallel_baselines) { - if (textord_quadratic_baselines) { - coeffs = (double *) alloc_mem (segments * 3 * sizeof (double)); - for (segment = 0; segment < segments; segment++) { - lms.clear (); - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); - blob_it.forward ()) { - if (!blob_it.data ()->joined_to_prev ()) { - box = blob_it.data ()->bounding_box (); - middle = (box.left () + box.right ()) / 2.0; - if (middle >= xstarts[segment] - && middle < xstarts[segment + 1]) { - lms.add (FCOORD (middle, box.bottom ())); - } - } - } - if (textord_quadratic_baselines) - lms.fit_quadratic (block->line_size * - textord_spline_outlier_fraction, - coeffs[segment * 3], b, c); - else { - lms.fit (b, c); - coeffs[segment * 3] = 0; - } - coeffs[segment * 3 + 1] = b; - coeffs[segment * 3 + 2] = c; - } - } - else - coeffs = linear_spline_baseline (row, block, segments, xstarts); - } - else { + coeffs = linear_spline_baseline(row, block, segments, xstarts); + } else { xstarts[1] = xstarts[segments]; segments = 1; coeffs = (double *) alloc_mem (3 * sizeof (double)); @@ -2334,12 +2310,11 @@ inT32 xstarts[] //coords of segments int blobs_per_segment; //blobs in each TBOX box; //blob box TBOX new_box; //new_it box - float middle; //xcentre of blob //blobs BLOBNBOX_IT blob_it = row->blob_list (); BLOBNBOX_IT new_it = blob_it; //front end float b, c; //fitted curve - LMS lms (row->blob_list ()->length ()); + tesseract::DetLineFit lms; double *coeffs; //quadratic coeffs inT32 segment; //current segment @@ -2366,18 +2341,18 @@ inT32 xstarts[] //coords of segments blobindex = index2; do { blobindex += blobs_per_segment; - lms.clear (); + lms.Clear(); while (index1 < blobindex || (segment == segments && index1 < blobcount)) { box = box_next_pre_chopped (&blob_it); - middle = (box.left () + box.right ()) / 2.0; - lms.add (FCOORD (middle, box.bottom ())); + int middle = (box.left() + box.right()) / 2; + lms.Add(ICOORD(middle, box.bottom())); index1++; if (index1 == blobindex - blobs_per_segment / 2 || index1 == blobcount - 1) { xstarts[segment] = box.left (); } } - lms.fit (b, c); + lms.Fit(&b, &c); coeffs[segment * 3 - 3] = 0; coeffs[segment * 3 - 2] = b; coeffs[segment * 3 - 1] = c; @@ -2386,18 +2361,18 @@ inT32 xstarts[] //coords of segments break; blobindex += blobs_per_segment; - lms.clear (); + lms.Clear(); while (index2 < blobindex || (segment == segments && index2 < blobcount)) { new_box = box_next_pre_chopped (&new_it); - middle = (new_box.left () + new_box.right ()) / 2.0; - lms.add (FCOORD (middle, new_box.bottom ())); + int middle = (new_box.left() + new_box.right()) / 2; + lms.Add(ICOORD (middle, new_box.bottom())); index2++; if (index2 == blobindex - blobs_per_segment / 2 || index2 == blobcount - 1) { xstarts[segment] = new_box.left (); } } - lms.fit (b, c); + lms.Fit(&b, &c); coeffs[segment * 3 - 3] = 0; coeffs[segment * 3 - 2] = b; coeffs[segment * 3 - 1] = c; @@ -2435,7 +2410,7 @@ void assign_blobs_to_rows( //find lines ICOORD testpt; //testing only BLOBNBOX *blob; //current blob TO_ROW *row; //current row - TO_ROW *dest_row; //row to put blob in + TO_ROW *dest_row = NULL; //row to put blob in //iterators BLOBNBOX_IT blob_it = &block->blobs; TO_ROW_IT row_it = block->get_rows (); @@ -2506,7 +2481,7 @@ void assign_blobs_to_rows( //find lines if (bottom < row->min_y ()) { if (row->min_y () - bottom <= (block->line_spacing - - block->line_size) * textord_merge_desc) { + block->line_size) * tesseract::CCStruct::kDescenderFraction) { //done it overlap_result = ASSIGN; dest_row = row; @@ -2518,7 +2493,7 @@ void assign_blobs_to_rows( //find lines dest_row = row_it.data (); if (dest_row->min_y () - bottom <= (block->line_spacing - - block->line_size) * textord_merge_desc) { + block->line_size) * tesseract::CCStruct::kDescenderFraction) { //done it overlap_result = ASSIGN; } @@ -2527,7 +2502,7 @@ void assign_blobs_to_rows( //find lines if (top - row->max_y () <= (block->line_spacing - block->line_size) * (textord_overlap_x + - textord_merge_asc)) { + tesseract::CCStruct::kAscenderFraction)) { //done it overlap_result = ASSIGN; dest_row = row; @@ -2767,128 +2742,48 @@ int row_spacing_order( //sort function } /** - * @name make_repeated_chars + * @name mark_repeated_chars * - * Mark textord_repeat_threshold or more adjacent chars which are the - * same as repeated chars. + * Mark blobs marked with BTFT_LEADER in repeated sets using the + * repeated_set member of BLOBNBOX. */ -void mark_repeated_chars(TO_ROW *row, float block_xheight, - tesseract::Tesseract *tess) { - ROW *real_row = NULL; //output row - BLOBNBOX *bblob; //current blob - BLOBNBOX *nextblob; //neighbour to compare - BLOBNBOX_IT box_it; //iterator - BLOBNBOX_IT search_it; //forward search - inT32 blobcount; //no of neighbours - inT32 matched_blobcount; //no of matches - inT32 blobindex; //in row - inT32 row_length; //blobs in row - inT32 width_change; //max width change - inT32 blob_width; //required blob width - inT32 space_width; //required gap width - inT32 prev_right; //right edge of last blob - float rating; //match rating - PBLOB *pblob1; //polygonal blob - PBLOB *pblob2; //second blob - - // kern_size and space_size are computed in the same way as in - // compute_block_pitch(). - float kern_size = ceil(block_xheight * textord_words_default_nonspace); - float space_size = floor(block_xheight * textord_words_default_minspace); +void mark_repeated_chars(TO_ROW *row) { + BLOBNBOX_IT box_it(row->blob_list()); // Iterator. int num_repeated_sets = 0; - box_it.set_to_list(row->blob_list()); - row_length = row->blob_list()->length(); - blobindex = 0; if (!box_it.empty()) { - if (textord_debug_xheights) - tprintf("Running mark_repeated_chars(), row length %d\n", row_length); - real_row = new ROW(row, static_cast(kern_size), - static_cast(space_size)); - // Use block_xheight, since xheight of the row (used in the ROW() - // constructor) might not have been computed yet. - real_row->set_x_height(block_xheight); do { - bblob = box_it.data(); - blobcount = 1; - search_it = box_it; - search_it.forward(); - matched_blobcount = 1; - width_change = MAX_INT16; - blob_width = 0; - space_width = 0; - prev_right = bblob->bounding_box().right(); - if (bblob->bounding_box().height() * 2 < block_xheight && - !bblob->joined_to_prev() && - (bblob->blob() != NULL || bblob->cblob() != NULL)) { - pblob1 = (bblob->cblob() != NULL) ? - new PBLOB(bblob->cblob(), block_xheight) : bblob->blob(); - rating = 0.0f; - while (rating < textord_repeat_rating && - blobindex + blobcount < row_length && - ((nextblob = search_it.data())->blob() != NULL || - nextblob->cblob() != NULL) && - nextblob->bounding_box().height() * 2 < block_xheight) { - if (blobcount == 1) { - space_width = nextblob->bounding_box().left() - - bblob->bounding_box().right(); - blob_width = bblob->bounding_box().width(); - width_change = blob_width > space_width ? blob_width : space_width; - width_change = - static_cast(width_change * textord_repch_width_variance); - if (width_change < 3) width_change = 3; + BLOBNBOX* bblob = box_it.data(); + int repeat_length = 0; + if (bblob->flow() == BTFT_LEADER && + !bblob->joined_to_prev() && bblob->cblob() != NULL) { + BLOBNBOX_IT test_it(box_it); + for (test_it.forward(); !test_it.at_first(); test_it.forward()) { + bblob = test_it.data(); + if (bblob->flow() != BTFT_LEADER) + break; + if (bblob->joined_to_prev() || bblob->cblob() == NULL) { + tprintf("Cancelled repeat of length %d due to %s\n", + repeat_length, + bblob->joined_to_prev() ? "Joined" : "Null"); + repeat_length = 0; + break; } - if (nextblob->bounding_box().width() > blob_width + width_change || - nextblob->bounding_box().width() < blob_width - width_change || - nextblob->bounding_box().left() - prev_right > - space_width + width_change || - nextblob->bounding_box().left() - prev_right < - space_width - width_change) { - break; // not good enough - } - if (nextblob->blob() != NULL) - rating = tess->compare_blobs(pblob1, real_row, - nextblob->blob(), real_row); - else { - pblob2 = new PBLOB(nextblob->cblob(), block_xheight); - rating = tess->compare_blobs(pblob1, real_row, pblob2, real_row); - delete pblob2; - } - if (rating < textord_repeat_rating) { - blobcount++; - search_it.forward(); - matched_blobcount++; - while (blobindex + blobcount < row_length && - (search_it.data()->joined_to_prev() || - (search_it.data()->blob() == NULL && - search_it.data()->cblob() == NULL))) { - search_it.forward(); - blobcount++; //suck in joined bits - } - } - prev_right = nextblob->bounding_box().right(); + ++repeat_length; } - if (bblob->cblob() != NULL) delete pblob1; } - - // Record position and length of this run of repeated chars. - if (matched_blobcount >= textord_repeat_threshold) { - if (textord_debug_xheights) { - tprintf("Found %d repeated chars starting at blob index %d\n", - blobcount, blobindex); - } - blobindex += blobcount; + if (repeat_length >= kMinLeaderCount) { num_repeated_sets++; - while (blobcount-- > 0 && !box_it.at_first()) { - box_it.data()->set_repeated_set(num_repeated_sets); - box_it.forward(); + for (; repeat_length > 0; box_it.forward(), --repeat_length) { + bblob = box_it.data(); + bblob->set_repeated_set(num_repeated_sets); } - } else { // just forward box_it to the next blob - blobindex += blobcount; + if (!box_it.at_first()) + bblob->set_repeated_set(0); + } else { box_it.forward(); + bblob->set_repeated_set(0); } } while (!box_it.at_first()); // until all done - - if (real_row != NULL) delete real_row; } row->set_num_repeated_sets(num_repeated_sets); } diff --git a/textord/makerow.h b/textord/makerow.h index 0243450e02..c9bd8a6a40 100644 --- a/textord/makerow.h +++ b/textord/makerow.h @@ -20,13 +20,12 @@ #ifndef MAKEROW_H #define MAKEROW_H -#include "varable.h" +#include "params.h" #include "ocrblock.h" -#include "tessclas.h" +#include "blobs.h" #include "blobbox.h" #include "statistc.h" #include "notdll.h" -#include "tesseractclass.h" enum OVERLAP_STATE { @@ -88,12 +87,6 @@ extern double_VAR_H (textord_linespace_iqrlimit, 0.2, extern double_VAR_H (textord_width_limit, 8, "Max width of blobs to make rows"); extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping"); -extern double_VAR_H (textord_merge_desc, 0.25, -"Fraction of linespace for desc drop"); -extern double_VAR_H (textord_merge_x, 0.5, -"Fraction of linespace for x height"); -extern double_VAR_H (textord_merge_asc, 0.25, -"Fraction of linespace for asc height"); extern double_VAR_H (textord_minxh, 0.25, "fraction of linesize for min xheight"); extern double_VAR_H (textord_min_linesize, 1.25, @@ -115,12 +108,15 @@ extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight"); extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight"); extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight"); extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation"); +extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do"); +extern BOOL_VAR_H (textord_new_initial_xheight, TRUE, +"Use test xheight mechanism"); -inline void get_min_max_xheight(double block_linesize, +inline void get_min_max_xheight(int block_linesize, int *min_height, int *max_height) { *min_height = static_cast(floor(block_linesize * textord_minxh)); if (*min_height < textord_min_xheight) *min_height = textord_min_xheight; - *max_height = static_cast(ceil(block_linesize * 3)); + *max_height = static_cast(ceil(block_linesize * 3.0)); } inline ROW_CATEGORY get_row_category(const TO_ROW *row) { @@ -137,37 +133,23 @@ void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights); float make_single_row(ICOORD page_tr, TO_BLOCK* block, - TO_BLOCK_LIST* blocks, tesseract::Tesseract* tess); -float make_rows( //make rows - ICOORD page_tr, //top right - BLOCK_LIST *blocks, //block list - TO_BLOCK_LIST *land_blocks, //rotated for landscape - TO_BLOCK_LIST *port_blocks, //output list - tesseract::Tesseract* tess - ); -void make_initial_textrows( //find lines - ICOORD page_tr, - TO_BLOCK *block, //block to do - FCOORD rotation, //for drawing - BOOL8 testing_on //correct orientation - ); -void fit_lms_line( //sort function - TO_ROW *row //row to fit - ); -void compute_page_skew( //get average gradient - TO_BLOCK_LIST *blocks, //list of blocks - float &page_m, //average gradient - float &page_err //average error - ); -void cleanup_rows( //find lines - ICOORD page_tr, //top right - TO_BLOCK *block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - inT32 block_edge, //edge of block - BOOL8 testing_on, //correct orientation - tesseract::Tesseract* tess - ); + TO_BLOCK_LIST* blocks); +float make_rows(ICOORD page_tr, // top right + TO_BLOCK_LIST *port_blocks); +void make_initial_textrows(ICOORD page_tr, + TO_BLOCK *block, // block to do + FCOORD rotation, // for drawing + BOOL8 testing_on); // correct orientation +void fit_lms_line(TO_ROW *row); +void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks + float &page_m, // average gradient + float &page_err); // average error +void cleanup_rows_making(ICOORD page_tr, // top right + TO_BLOCK *block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + inT32 block_edge, // edge of block + BOOL8 testing_on); // correct orientation void delete_non_dropout_rows( //find lines TO_BLOCK *block, //block to do float gradient, //global skew @@ -222,79 +204,45 @@ void compute_row_stats( //find lines TO_BLOCK *block, //block to do BOOL8 testing_on //correct orientation ); -void compute_block_xheight( //find lines - TO_BLOCK *block, //block to do - float gradient, //global skew - tesseract::Tesseract* tess - ); float median_block_xheight( //find lines TO_BLOCK *block, //block to do float gradient //global skew ); -void compute_row_xheight( //find lines - TO_ROW *row, //row to do - float gradient, //global skew - int block_height, //a guess of block xheight - tesseract::Tesseract* tess - ); int compute_xheight_from_modes( - STATS *heights, STATS *floating_heights, int min_height, + STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise); -inT32 compute_row_descdrop( // find lines - TO_ROW *row, // row to do +inT32 compute_row_descdrop(TO_ROW *row, // row to do float gradient, // global skew int xheight_blob_count, - STATS *heights - ); -inT32 compute_height_modes( //find lines - STATS *heights, //stats to search - inT32 min_height, //bottom of range - inT32 max_height, //top of range - inT32 *modes, //output array - inT32 maxmodes //size of modes - ); -void correct_row_xheight( //fix bad values - TO_ROW *row, //row to fix - float xheight, //average values + STATS *heights); +inT32 compute_height_modes(STATS *heights, // stats to search + inT32 min_height, // bottom of range + inT32 max_height, // top of range + inT32 *modes, // output array + inT32 maxmodes); // size of modes +void correct_row_xheight(TO_ROW *row, // row to fix + float xheight, // average values float ascrise, float descdrop); -void separate_underlines( //make rough chars - TO_BLOCK *block, //block to do - float gradient, //skew angle - FCOORD rotation, //inverse landscape - BOOL8 testing_on //correct orientation - ); -void pre_associate_blobs( //make rough chars - ICOORD page_tr, //top right - TO_BLOCK *block, //block to do - FCOORD rotation, //inverse landscape - BOOL8 testing_on //correct orientation - ); -void fit_parallel_rows( //find lines - TO_BLOCK *block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - inT32 block_edge, //edge of block - BOOL8 testing_on //correct orientation - ); -void fit_parallel_lms( //sort function - float gradient, //forced gradient - TO_ROW *row //row to fit - ); -void make_spline_rows( //find lines - TO_BLOCK *block, //block to do - float gradient, //gradient to fit - FCOORD rotation, //for drawing - inT32 block_edge, //edge of block - BOOL8 testing_on, //correct orientation - tesseract::Tesseract* tess - ); -void make_baseline_spline( //sort function - TO_ROW *row, //row to fit - TO_BLOCK *block //block it came from - ); +void separate_underlines(TO_BLOCK *block, // block to do + float gradient, // skew angle + FCOORD rotation, // inverse landscape + BOOL8 testing_on); // correct orientation +void pre_associate_blobs( ICOORD page_tr, // top right + TO_BLOCK *block, // block to do + FCOORD rotation, // inverse landscape + BOOL8 testing_on); // correct orientation +void fit_parallel_rows(TO_BLOCK *block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + inT32 block_edge, // edge of block + BOOL8 testing_on); // correct orientation +void fit_parallel_lms(float gradient, // forced gradient + TO_ROW *row); // row to fit +void make_baseline_spline(TO_ROW *row, // row to fit + TO_BLOCK *block); // block it came from BOOL8 segment_baseline ( //split baseline TO_ROW * row, //row to fit TO_BLOCK * block, //block it came from @@ -333,6 +281,5 @@ int row_spacing_order( //sort function const void *item1, //items to compare const void *item2); -void mark_repeated_chars(TO_ROW *row, float block_xheight, - tesseract::Tesseract *tess); +void mark_repeated_chars(TO_ROW *row); #endif diff --git a/textord/oldbasel.cpp b/textord/oldbasel.cpp index 9e8eda9b8e..e6cbab204b 100644 --- a/textord/oldbasel.cpp +++ b/textord/oldbasel.cpp @@ -18,14 +18,15 @@ **********************************************************************/ #include "mfcpch.h" +#include "ccstruct.h" #include "statistc.h" #include "quadlsq.h" -#include "lmedsq.h" +#include "detlinefit.h" #include "makerow.h" #include "drawtord.h" #include "oldbasel.h" +#include "textord.h" #include "tprintf.h" -#include "tesseractclass.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -72,41 +73,40 @@ EXTERN double_VAR (textord_oldbl_jumplimit, 0.15, #define ABS(x) ((x)<0 ? (-(x)) : (x)) +namespace tesseract { + /********************************************************************** * make_old_baselines * * Top level function to make baselines the old way. **********************************************************************/ -void make_old_baselines( //make splines - TO_BLOCK *block, //block to do - BOOL8 testing_on, //correct orientation - float gradient, - tesseract::Tesseract* tess - ) { - QSPLINE *prev_baseline; //baseline of previous row - TO_ROW *row; //current row - TO_ROW_IT row_it = block->get_rows (); +void Textord::make_old_baselines(TO_BLOCK *block, // block to do + BOOL8 testing_on, // correct orientation + float gradient) { + QSPLINE *prev_baseline; // baseline of previous row + TO_ROW *row; // current row + TO_ROW_IT row_it = block->get_rows(); BLOBNBOX_IT blob_it; - prev_baseline = NULL; //nothing yet - for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { - row = row_it.data (); - find_textlines(block, row, 2, NULL, tess); + prev_baseline = NULL; // nothing yet + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + find_textlines(block, row, 2, NULL); if (row->xheight <= 0 && prev_baseline != NULL) - find_textlines(block, row, 2, prev_baseline, tess); - if (row->xheight > 0) // was a good one + find_textlines(block, row, 2, prev_baseline); + if (row->xheight > 0) { // was a good one prev_baseline = &row->baseline; - else { + } else { prev_baseline = NULL; - blob_it.set_to_list (row->blob_list ()); + blob_it.set_to_list(row->blob_list()); if (textord_debug_baselines) - tprintf ("Row baseline generation failed on row at (%d,%d)\n", - blob_it.data ()->bounding_box ().left (), - blob_it.data ()->bounding_box ().bottom ()); + tprintf("Row baseline generation failed on row at (%d,%d)\n", + blob_it.data()->bounding_box().left(), + blob_it.data()->bounding_box().bottom()); } } - correlate_lines(block, gradient, tess); + correlate_lines(block, gradient); } @@ -118,11 +118,7 @@ void make_old_baselines( //make splines * Also fix baselines of rows without a decent fit. **********************************************************************/ -void correlate_lines( //cleanup lines - TO_BLOCK *block, //block to do - float gradient, - tesseract::Tesseract* tess - ) { +void Textord::correlate_lines(TO_BLOCK *block, float gradient) { TO_ROW **rows; //array of ptrs int rowcount; /*no of rows to do */ register int rowindex; /*no of row */ @@ -142,16 +138,16 @@ void correlate_lines( //cleanup lines rows[rowindex++] = row_it.data (); /*try to fix bad lines */ - correlate_neighbours(block, rows, rowcount, tess); + correlate_neighbours(block, rows, rowcount); if (textord_really_old_xheight || textord_old_xheight) { block->xheight = (float) correlate_with_stats(rows, rowcount, block); if (block->xheight <= 0) - block->xheight = block->line_size * textord_merge_x; + block->xheight = block->line_size * tesseract::CCStruct::kXHeightFraction; if (block->xheight < textord_min_xheight) block->xheight = (float) textord_min_xheight; } else { - compute_block_xheight(block, gradient, tess); + compute_block_xheight(block, gradient); } free_mem(rows); @@ -164,12 +160,9 @@ void correlate_lines( //cleanup lines * Try to fix rows that had a bad spline fit by using neighbours. **********************************************************************/ -void correlate_neighbours( //fix bad rows - TO_BLOCK *block, /*block rows are in */ - TO_ROW **rows, /*rows of block */ - int rowcount, /*no of rows to do */ - tesseract::Tesseract* tess - ) { +void Textord::correlate_neighbours(TO_BLOCK *block, // block rows are in. + TO_ROW **rows, // rows of block. + int rowcount) { // no of rows to do. TO_ROW *row; /*current row */ register int rowindex; /*no of row */ register int otherrow; /*second row */ @@ -194,14 +187,14 @@ void correlate_neighbours( //fix bad rows MAXOVERLAP)); otherrow++); lowerrow = otherrow; /*decent row below */ if (upperrow >= 0) - find_textlines(block, row, 2, &rows[upperrow]->baseline, tess); + find_textlines(block, row, 2, &rows[upperrow]->baseline); if (row->xheight < 0 && lowerrow < rowcount) - find_textlines(block, row, 2, &rows[lowerrow]->baseline, tess); + find_textlines(block, row, 2, &rows[lowerrow]->baseline); if (row->xheight < 0) { if (upperrow >= 0) - find_textlines(block, row, 1, &rows[upperrow]->baseline, tess); + find_textlines(block, row, 1, &rows[upperrow]->baseline); else if (lowerrow < rowcount) - find_textlines(block, row, 1, &rows[lowerrow]->baseline, tess); + find_textlines(block, row, 1, &rows[lowerrow]->baseline); } } } @@ -223,11 +216,9 @@ void correlate_neighbours( //fix bad rows * the ascender height and descender height for rows without one. **********************************************************************/ -int correlate_with_stats( //fix xheights - TO_ROW **rows, /*rows of block */ - int rowcount, /*no of rows to do */ - TO_BLOCK* block - ) { +int Textord::correlate_with_stats(TO_ROW **rows, // rows of block. + int rowcount, // no of rows to do. + TO_BLOCK* block) { TO_ROW *row; /*current row */ register int rowindex; /*no of row */ float lineheight; /*mean x-height */ @@ -339,15 +330,12 @@ int correlate_with_stats( //fix xheights * Compute the baseline for the given row. **********************************************************************/ -void find_textlines( //get baseline - TO_BLOCK *block, //block row is in - TO_ROW *row, //row to do - int degree, //required approximation - QSPLINE *spline, //starting spline - tesseract::Tesseract* tess - ) { +void Textord::find_textlines(TO_BLOCK *block, // block row is in + TO_ROW *row, // row to do + int degree, // required approximation + QSPLINE *spline) { // starting spline int partcount; /*no of partitions of */ - BOOL8 holed_line; //lost too many blobs + BOOL8 holed_line = FALSE; //lost too many blobs int bestpart; /*biggest partition */ char *partids; /*partition no of each blob */ int partsizes[MAXPARTS]; /*no in each partition */ @@ -435,7 +423,8 @@ void find_textlines( //get baseline make_first_xheight (row, blobcoords, lineheight, (int) block->line_size, blobcount, &row->baseline, jumplimit); } else { - compute_row_xheight(row, row->line_m(), block->line_size, tess); + compute_row_xheight(row, block->block->classify_rotation(), + row->line_m(), block->line_size); } free_mem(partids); free_mem(xcoords); @@ -444,6 +433,8 @@ void find_textlines( //get baseline free_mem(ydiffs); } +} // namespace tesseract. + /********************************************************************** * get_blob_coords @@ -678,7 +669,7 @@ float gradient //of line float x; //centre of row ICOORD shift; //shift of spline - LMS lms(blobcount); //straight baseline + tesseract::DetLineFit lms; // straight baseline inT32 xstarts[2]; //straight line double coeffs[3]; float c; //line parameter @@ -686,13 +677,13 @@ float gradient //of line /*left edge of row */ leftedge = blobcoords[0].left (); /*right edge of line */ - rightedge = blobcoords[blobcount - 1].right (); + rightedge = blobcoords[blobcount - 1].right(); for (blobindex = 0; blobindex < blobcount; blobindex++) { - lms.add (FCOORD ((blobcoords[blobindex].left () + - blobcoords[blobindex].right ()) / 2.0, - blobcoords[blobindex].bottom ())); + lms.Add(ICOORD((blobcoords[blobindex].left() + + blobcoords[blobindex].right()) / 2, + blobcoords[blobindex].bottom())); } - lms.constrained_fit (gradient, c); + lms.ConstrainedFit(gradient, &c); xstarts[0] = leftedge; xstarts[1] = rightedge; coeffs[0] = 0; @@ -746,6 +737,8 @@ float ydiffs[] /*diff from spline */ startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs); *numparts = 1; /*1 partition */ bestpart = -1; /*first point */ + float drift = 0.0f; + float last_delta = 0.0f; for (blobindex = startx; blobindex < blobcount; blobindex++) { /*do each blob in row */ diff = ydiffs[blobindex]; /*diff from line */ @@ -754,14 +747,16 @@ float ydiffs[] /*diff from spline */ blobcoords[blobindex].left (), blobcoords[blobindex].bottom ()); } - bestpart = - choose_partition(diff, partdiffs, bestpart, jumplimit, numparts); + bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, + &drift, &last_delta, numparts); /*record partition */ partids[blobindex] = bestpart; partsizes[bestpart]++; /*another in it */ } bestpart = -1; /*first point */ + drift = 0.0f; + last_delta = 0.0f; partsizes[0]--; /*doing 1st pt again */ /*do each blob in row */ for (blobindex = startx; blobindex >= 0; blobindex--) { @@ -771,8 +766,8 @@ float ydiffs[] /*diff from spline */ blobcoords[blobindex].left (), blobcoords[blobindex].bottom ()); } - bestpart = - choose_partition(diff, partdiffs, bestpart, jumplimit, numparts); + bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, + &drift, &last_delta, numparts); /*record partition */ partids[blobindex] = bestpart; partsizes[bestpart]++; /*another in it */ @@ -969,32 +964,32 @@ register float diff, /*diff from spline */ float partdiffs[], /*diff on all parts */ int lastpart, /*last assigned partition */ float jumplimit, /*new part threshold */ +float* drift, +float* lastdelta, int *partcount /*no of partitions */ ) { register int partition; /*partition no */ int bestpart; /*best new partition */ float bestdelta; /*best gap from a part */ - static float drift; /*drift from spline */ float delta; /*diff from part */ - static float lastdelta; /*previous delta */ if (lastpart < 0) { partdiffs[0] = diff; lastpart = 0; /*first point */ - drift = 0.0f; - lastdelta = 0.0f; + *drift = 0.0f; + *lastdelta = 0.0f; } /*adjusted diff from part */ - delta = diff - partdiffs[lastpart] - drift; + delta = diff - partdiffs[lastpart] - *drift; if (textord_oldbl_debug) { - tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift); + tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift); } if (ABS (delta) > jumplimit / 2) { /*delta on part 0 */ - bestdelta = diff - partdiffs[0] - drift; + bestdelta = diff - partdiffs[0] - *drift; bestpart = 0; /*0 best so far */ for (partition = 1; partition < *partcount; partition++) { - delta = diff - partdiffs[partition] - drift; + delta = diff - partdiffs[partition] - *drift; if (ABS (delta) < ABS (bestdelta)) { bestdelta = delta; bestpart = partition; /*part with nearest jump */ @@ -1006,7 +1001,7 @@ int *partcount /*no of partitions */ && *partcount < MAXPARTS) { /*and spare part left */ bestpart = (*partcount)++; /*best was new one */ /*start new one */ - partdiffs[bestpart] = diff - drift; + partdiffs[bestpart] = diff - *drift; delta = 0.0f; } } @@ -1015,11 +1010,11 @@ int *partcount /*no of partitions */ } if (bestpart == lastpart - && (ABS (delta - lastdelta) < jumplimit / 2 + && (ABS (delta - *lastdelta) < jumplimit / 2 || ABS (delta) < jumplimit / 2)) /*smooth the drift */ - drift = (3 * drift + delta) / 3; - lastdelta = delta; + *drift = (3 * *drift + delta) / 3; + *lastdelta = delta; if (textord_oldbl_debug) { tprintf ("P=%d\n", bestpart); diff --git a/textord/oldbasel.h b/textord/oldbasel.h index 671c2fa9a1..b574a56448 100644 --- a/textord/oldbasel.h +++ b/textord/oldbasel.h @@ -20,10 +20,9 @@ #ifndef OLDBASEL_H #define OLDBASEL_H -#include "varable.h" +#include "params.h" #include "blobbox.h" #include "notdll.h" -#include "tesseractclass.h" extern BOOL_VAR_H (textord_really_old_xheight, FALSE, "Use original wiseowl xheight"); @@ -43,35 +42,6 @@ extern INT_VAR_H (oldbl_holed_losscount, 10, extern double_VAR_H (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot"); extern double_VAR_H (textord_oldbl_jumplimit, 0.15, "X fraction for new partition"); -void make_old_baselines( //make splines - TO_BLOCK *block, //block to do - BOOL8 testing_on, //correct orientation - float gradient, - tesseract::Tesseract *tess - ); -void correlate_lines( //cleanup lines - TO_BLOCK *block, //block to do - float gradient, - tesseract::Tesseract* tess - ); -void correlate_neighbours( //fix bad rows - TO_BLOCK *block, /*block rows are in */ - TO_ROW **rows, /*rows of block */ - int rowcount, /*no of rows to do */ - tesseract::Tesseract* tess - ); -int correlate_with_stats( //fix xheights - TO_ROW **rows, /*rows of block */ - int rowcount, /*no of rows to do */ - TO_BLOCK *block /*block rows are in */ - ); -void find_textlines( //get baseline - TO_BLOCK *block, //block row is in - TO_ROW *row, //row to do - int degree, //required approximation - QSPLINE *spline, //starting spline - tesseract::Tesseract *tess - ); int get_blob_coords( //get boxes TO_ROW *row, //row to use inT32 lineheight, //block level @@ -124,6 +94,8 @@ register float diff, /*diff from spline */ float partdiffs[], /*diff on all parts */ int lastpart, /*last assigned partition */ float jumplimit, /*new part threshold */ +float* drift, +float* last_delta, int *partcount /*no of partitions */ ); int partition_coords ( //find relevant coords diff --git a/textord/pithsync.h b/textord/pithsync.h index 2c9dde5863..87173cfd1f 100644 --- a/textord/pithsync.h +++ b/textord/pithsync.h @@ -21,7 +21,7 @@ #define PITHSYNC_H #include "blobbox.h" -#include "varable.h" +#include "params.h" #include "statistc.h" #include "notdll.h" diff --git a/textord/pitsync1.h b/textord/pitsync1.h index 901ec3814c..3c715393bc 100644 --- a/textord/pitsync1.h +++ b/textord/pitsync1.h @@ -23,7 +23,7 @@ #include "elst.h" #include "clst.h" #include "blobbox.h" -#include "varable.h" +#include "params.h" #include "statistc.h" #include "pithsync.h" #include "notdll.h" @@ -83,9 +83,6 @@ class FPSEGPT:public ELIST_LINK ELISTIZEH (FPSEGPT) CLISTIZEH (FPSEGPT_LIST) extern -BOOL_VAR_H (pitsync_projection_fix, FALSE, -"Fix bug in projection profile"); -extern INT_VAR_H (pitsync_linear_version, 0, "Use new fast algorithm"); extern double_VAR_H (pitsync_joined_edge, 0.75, diff --git a/textord/scanedg.cpp b/textord/scanedg.cpp index 5405e24ba9..de5f4bc21d 100644 --- a/textord/scanedg.cpp +++ b/textord/scanedg.cpp @@ -19,7 +19,6 @@ #include "mfcpch.h" #include "edgloop.h" -//#include "dirtab.h" #include "scanedg.h" #define WHITE_PIX 1 /*thresholded colours */ @@ -27,65 +26,54 @@ /*W->B->W */ #define FLIP_COLOUR(pix) (1-(pix)) -#define EWSIZE 4 /*edge operator size */ - -#define XMARGIN 2 //margin needed -#define YMARGIN 3 //by edge detector - - /*local freelist */ -static CRACKEDGE *free_cracks = NULL; - /********************************************************************** * block_edges * * Extract edges from a PDBLK. **********************************************************************/ -DLLSYM void block_edges( //get edges in a block - IMAGE *t_image, //threshold image - PDBLK *block, //block in image - ICOORD page_tr //corner of page - ) { - uinT8 margin; //margin colour - inT16 x; //line coords - inT16 y; //current line - ICOORD bleft; //bounding box +void block_edges(IMAGE *t_image, // thresholded image + PDBLK *block, // block in image + C_OUTLINE_IT* outline_it) { + uinT8 margin; // margin colour + inT16 x; // line coords + inT16 y; // current line + ICOORD bleft; // bounding box ICOORD tright; - ICOORD block_bleft; //bounding box + ICOORD block_bleft; // bounding box ICOORD block_tright; - int xindex; //index to pixel - BLOCK_LINE_IT line_it = block; //line iterator - IMAGELINE bwline; //thresholded line - //lines in progress + int xindex; // index to pixel + BLOCK_LINE_IT line_it = block; // line iterator + IMAGELINE bwline; // thresholded line + // lines in progress CRACKEDGE **ptrline = new CRACKEDGE*[t_image->get_xsize()+1]; - block->bounding_box (bleft, tright); // block box + CRACKEDGE *free_cracks = NULL; + + block->bounding_box(bleft, tright); // block box block_bleft = bleft; block_tright = tright; - for (x = tright.x () - bleft.x (); x >= 0; x--) + for (x = tright.x() - bleft.x(); x >= 0; x--) ptrline[x] = NULL; //no lines in progress - bwline.init (t_image->get_xsize()); + bwline.init(t_image->get_xsize()); margin = WHITE_PIX; - for (y = tright.y () - 1; y >= bleft.y () - 1; y--) { - if (y >= block_bleft.y () && y < block_tright.y ()) { - t_image->get_line (bleft.x (), y, tright.x () - bleft.x (), &bwline, - 0); - make_margins (block, &line_it, bwline.pixels, margin, bleft.x (), - tright.x (), y); - } - else { - x = tright.x () - bleft.x (); + for (y = tright.y() - 1; y >= bleft.y() - 1; y--) { + if (y >= block_bleft.y() && y < block_tright.y()) { + t_image->get_line(bleft.x(), y, tright.x() - bleft.x(), &bwline, 0); + make_margins(block, &line_it, bwline.pixels, margin, bleft.x(), + tright.x(), y); + } else { + x = tright.x() - bleft.x(); for (xindex = 0; xindex < x; xindex++) bwline.pixels[xindex] = margin; } - line_edges (bleft.x (), y, tright.x () - bleft.x (), - margin, bwline.pixels, ptrline); + line_edges(bleft.x(), y, tright.x() - bleft.x(), + margin, bwline.pixels, ptrline, &free_cracks, outline_it); } - free_crackedges(free_cracks); //really free them - free_cracks = NULL; + free_crackedges(free_cracks); // really free them delete[] ptrline; } @@ -187,87 +175,81 @@ void whiteout_block( //clean it * When edges close into loops, send them for approximation. **********************************************************************/ -void -line_edges ( //scan for edges -inT16 x, //coord of line start -inT16 y, //coord of line -inT16 xext, //width of line -uinT8 uppercolour, //start of prev line -uinT8 * bwpos, //thresholded line -CRACKEDGE ** prevline //edges in progress -) { - int xpos; //current x coord - int xmax; //max x coord - int colour; //of current pixel - int prevcolour; //of previous pixel - CRACKEDGE *current; //current h edge - CRACKEDGE *newcurrent; //new h edge - - xmax = x + xext; //max allowable coord - prevcolour = uppercolour; //forced plain margin - current = NULL; //nothing yet - - //do each pixel - for (xpos = x; xpos < xmax; xpos++, prevline++) { - colour = *bwpos++; //current pixel +void line_edges(inT16 x, // coord of line start + inT16 y, // coord of line + inT16 xext, // width of line + uinT8 uppercolour, // start of prev line + uinT8 * bwpos, // thresholded line + CRACKEDGE ** prevline, // edges in progress + CRACKEDGE **free_cracks, + C_OUTLINE_IT* outline_it) { + CrackPos pos = {free_cracks, x, y }; + int xmax; // max x coord + int colour; // of current pixel + int prevcolour; // of previous pixel + CRACKEDGE *current; // current h edge + CRACKEDGE *newcurrent; // new h edge + + xmax = x + xext; // max allowable coord + prevcolour = uppercolour; // forced plain margin + current = NULL; // nothing yet + + // do each pixel + for (; pos.x < xmax; pos.x++, prevline++) { + colour = *bwpos++; // current pixel if (*prevline != NULL) { - //changed above - //change colour - uppercolour = FLIP_COLOUR (uppercolour); + // changed above + // change colour + uppercolour = FLIP_COLOUR(uppercolour); if (colour == prevcolour) { if (colour == uppercolour) { - //finish a line - join_edges(current, *prevline); - current = NULL; //no edge now + // finish a line + join_edges(current, *prevline, free_cracks, outline_it); + current = NULL; // no edge now + } else { + // new horiz edge + current = h_edge(uppercolour - colour, *prevline, &pos); } - else - //new horiz edge - current = h_edge (xpos, y, uppercolour - colour, *prevline); - *prevline = NULL; //no change this time - } - else { + *prevline = NULL; // no change this time + } else { if (colour == uppercolour) - *prevline = v_edge (xpos, y, colour - prevcolour, *prevline); - //8 vs 4 connection + *prevline = v_edge(colour - prevcolour, *prevline, &pos); + // 8 vs 4 connection else if (colour == WHITE_PIX) { - join_edges(current, *prevline); - current = h_edge (xpos, y, uppercolour - colour, NULL); - *prevline = v_edge (xpos, y, colour - prevcolour, current); - } - else { - newcurrent = h_edge (xpos, y, uppercolour - colour, *prevline); - *prevline = v_edge (xpos, y, colour - prevcolour, current); - current = newcurrent; //right going h edge + join_edges(current, *prevline, free_cracks, outline_it); + current = h_edge(uppercolour - colour, NULL, &pos); + *prevline = v_edge(colour - prevcolour, current, &pos); + } else { + newcurrent = h_edge(uppercolour - colour, *prevline, &pos); + *prevline = v_edge(colour - prevcolour, current, &pos); + current = newcurrent; // right going h edge } - prevcolour = colour; //remember new colour + prevcolour = colour; // remember new colour } - } - else { + } else { if (colour != prevcolour) { - *prevline = current = - v_edge (xpos, y, colour - prevcolour, current); + *prevline = current = v_edge(colour - prevcolour, current, &pos); prevcolour = colour; } if (colour != uppercolour) - current = h_edge (xpos, y, uppercolour - colour, current); + current = h_edge(uppercolour - colour, current, &pos); else - current = NULL; //no edge now + current = NULL; // no edge now } } if (current != NULL) { - //out of block - if (*prevline != NULL) { //got one to join to? - join_edges(current, *prevline); - *prevline = NULL; //tidy now + // out of block + if (*prevline != NULL) { // got one to join to? + join_edges(current, *prevline, free_cracks, outline_it); + *prevline = NULL; // tidy now + } else { + // fake vertical + *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, current, &pos); } - else { - //fake vertical - *prevline = v_edge (xpos, y, FLIP_COLOUR(prevcolour)-prevcolour, current); - } - } - else if (*prevline != NULL) + } else if (*prevline != NULL) { //continue fake - *prevline = v_edge (xpos, y, FLIP_COLOUR(prevcolour)-prevcolour, *prevline); + *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, *prevline, &pos); + } } @@ -277,53 +259,44 @@ CRACKEDGE ** prevline //edges in progress * Create a new horizontal CRACKEDGE and join it to the given edge. **********************************************************************/ -CRACKEDGE * -h_edge ( //horizontal edge -inT16 x, //xposition -inT16 y, //y position -inT8 sign, //sign of edge -CRACKEDGE * join //edge to join to -) { - CRACKEDGE *newpt; //return value - - // check_mem("h_edge",JUSTCHECKS); - if (free_cracks != NULL) { - newpt = free_cracks; - free_cracks = newpt->next; //get one fast - } - else { +CRACKEDGE *h_edge(int sign, // sign of edge + CRACKEDGE* join, // edge to join to + CrackPos* pos) { + CRACKEDGE *newpt; // return value + + if (*pos->free_cracks != NULL) { + newpt = *pos->free_cracks; + *pos->free_cracks = newpt->next; // get one fast + } else { newpt = new CRACKEDGE; } - newpt->pos.set_y (y + 1); //coords of pt - newpt->stepy = 0; //edge is horizontal + newpt->pos.set_y(pos->y + 1); // coords of pt + newpt->stepy = 0; // edge is horizontal if (sign > 0) { - newpt->pos.set_x (x + 1); //start location + newpt->pos.set_x(pos->x + 1); // start location newpt->stepx = -1; newpt->stepdir = 0; - } - else { - newpt->pos.set_x (x); //start location + } else { + newpt->pos.set_x(pos->x); // start location newpt->stepx = 1; newpt->stepdir = 2; } if (join == NULL) { - newpt->next = newpt; //ptrs to other ends + newpt->next = newpt; // ptrs to other ends newpt->prev = newpt; - } - else { - if (newpt->pos.x () + newpt->stepx == join->pos.x () - && newpt->pos.y () == join->pos.y ()) { - newpt->prev = join->prev; //update other ends + } else { + if (newpt->pos.x() + newpt->stepx == join->pos.x() + && newpt->pos.y() == join->pos.y()) { + newpt->prev = join->prev; // update other ends newpt->prev->next = newpt; - newpt->next = join; //join up + newpt->next = join; // join up join->prev = newpt; - } - else { - newpt->next = join->next; //update other ends + } else { + newpt->next = join->next; // update other ends newpt->next->prev = newpt; - newpt->prev = join; //join up + newpt->prev = join; // join up join->next = newpt; } } @@ -337,32 +310,26 @@ CRACKEDGE * join //edge to join to * Create a new vertical CRACKEDGE and join it to the given edge. **********************************************************************/ -CRACKEDGE * -v_edge ( //vertical edge -inT16 x, //xposition -inT16 y, //y position -inT8 sign, //sign of edge -CRACKEDGE * join //edge to join to -) { - CRACKEDGE *newpt; //return value - - if (free_cracks != NULL) { - newpt = free_cracks; - free_cracks = newpt->next; //get one fast - } - else { +CRACKEDGE *v_edge(int sign, // sign of edge + CRACKEDGE* join, + CrackPos* pos) { + CRACKEDGE *newpt; // return value + + if (*pos->free_cracks != NULL) { + newpt = *pos->free_cracks; + *pos->free_cracks = newpt->next; // get one fast + } else { newpt = new CRACKEDGE; } - newpt->pos.set_x (x); //coords of pt - newpt->stepx = 0; //edge is vertical + newpt->pos.set_x(pos->x); // coords of pt + newpt->stepx = 0; // edge is vertical if (sign > 0) { - newpt->pos.set_y (y); //start location + newpt->pos.set_y(pos->y); // start location newpt->stepy = 1; newpt->stepdir = 3; - } - else { - newpt->pos.set_y (y + 1); //start location + } else { + newpt->pos.set_y(pos->y + 1); // start location newpt->stepy = -1; newpt->stepdir = 1; } @@ -370,19 +337,17 @@ CRACKEDGE * join //edge to join to if (join == NULL) { newpt->next = newpt; //ptrs to other ends newpt->prev = newpt; - } - else { - if (newpt->pos.x () == join->pos.x () - && newpt->pos.y () + newpt->stepy == join->pos.y ()) { - newpt->prev = join->prev; //update other ends + } else { + if (newpt->pos.x() == join->pos.x() + && newpt->pos.y() + newpt->stepy == join->pos.y()) { + newpt->prev = join->prev; // update other ends newpt->prev->next = newpt; - newpt->next = join; //join up + newpt->next = join; // join up join->prev = newpt; - } - else { - newpt->next = join->next; //update other ends + } else { + newpt->next = join->next; // update other ends newpt->next->prev = newpt; - newpt->prev = join; //join up + newpt->prev = join; // join up join->next = newpt; } } @@ -397,37 +362,28 @@ CRACKEDGE * join //edge to join to * closed loop is formed. **********************************************************************/ -void join_edges( //join edge fragments - CRACKEDGE *edge1, //edges to join - CRACKEDGE *edge2 //no specific order - ) { - CRACKEDGE *tempedge; //for exchanging - - if (edge1->pos.x () + edge1->stepx != edge2->pos.x () - || edge1->pos.y () + edge1->stepy != edge2->pos.y ()) { - tempedge = edge1; - edge1 = edge2; //swap araound +void join_edges(CRACKEDGE *edge1, // edges to join + CRACKEDGE *edge2, // no specific order + CRACKEDGE **free_cracks, + C_OUTLINE_IT* outline_it) { + if (edge1->pos.x() + edge1->stepx != edge2->pos.x() + || edge1->pos.y() + edge1->stepy != edge2->pos.y()) { + CRACKEDGE *tempedge = edge1; + edge1 = edge2; // swap araound edge2 = tempedge; } - // tprintf("Joining %x=(%d,%d)+(%d,%d)->%x<-%x ", - // edge1,edge1->pos.x(),edge1->pos.y(),edge1->stepx,edge1->stepy, - // edge1->next,edge1->prev); - // tprintf("to %x=(%d,%d)+(%d,%d)->%x<-%x\n", - // edge2,edge2->pos.x(),edge2->pos.y(),edge2->stepx,edge2->stepy, - // edge2->next,edge2->prev); if (edge1->next == edge2) { - //already closed - complete_edge(edge1); //approximate it - //attach freelist to end - edge1->prev->next = free_cracks; - free_cracks = edge1; //and free list - } - else { - //update opposite ends + // already closed + complete_edge(edge1, outline_it); + // attach freelist to end + edge1->prev->next = *free_cracks; + *free_cracks = edge1; // and free list + } else { + // update opposite ends edge2->prev->next = edge1->next; edge1->next->prev = edge2->prev; - edge1->next = edge2; //make joins + edge1->next = edge2; // make joins edge2->prev = edge1; } } @@ -439,14 +395,12 @@ void join_edges( //join edge fragments * Really free the CRACKEDGEs by giving them back to delete. **********************************************************************/ -void free_crackedges( //really free them - CRACKEDGE *start //start of loop - ) { - CRACKEDGE *current; //current edge to free - CRACKEDGE *next; //next one to free +void free_crackedges(CRACKEDGE *start) { + CRACKEDGE *current; // current edge to free + CRACKEDGE *next; // next one to free for (current = start; current != NULL; current = next) { next = current->next; - delete current; //delete them all + delete current; // delete them all } } diff --git a/textord/scanedg.h b/textord/scanedg.h index fce7fe1daf..9c219e4f81 100644 --- a/textord/scanedg.h +++ b/textord/scanedg.h @@ -20,55 +20,50 @@ #ifndef SCANEDG_H #define SCANEDG_H -#include "varable.h" +#include "params.h" #include "scrollview.h" #include "img.h" #include "pdblock.h" #include "crakedge.h" -DLLSYM void block_edges( //get edges in a block - IMAGE *t_image, //threshold image - PDBLK *block, //block in image - ICOORD page_tr //corner of page - ); -void make_margins( //get a line - PDBLK *block, //block in image - BLOCK_LINE_IT *line_it, //for old style - uinT8 *pixels, //pixels to strip - uinT8 margin, //white-out pixel - inT16 left, //block edges +class C_OUTLINE_IT; + +struct CrackPos { + CRACKEDGE** free_cracks; // Freelist for fast allocation. + int x; // Position of new edge. + int y; +}; + +void block_edges(IMAGE *t_image, // thresholded image + PDBLK *block, // block in image + C_OUTLINE_IT* outline_it); +void make_margins(PDBLK *block, // block in image + BLOCK_LINE_IT *line_it, // for old style + uinT8 *pixels, // pixels to strip + uinT8 margin, // white-out pixel + inT16 left, // block edges inT16 right, - inT16 y //line coord - ); -void whiteout_block( //clean it - IMAGE *t_image, //threshold image - PDBLK *block //block in image - ); -void line_edges ( //scan for edges -inT16 x, //coord of line start -inT16 y, //coord of line -inT16 xext, //width of line -uinT8 uppercolour, //start of prev line -uinT8 * bwpos, //thresholded line -CRACKEDGE ** prevline //edges in progress -); -CRACKEDGE *h_edge ( //horizontal edge -inT16 x, //xposition -inT16 y, //y position -inT8 sign, //sign of edge -CRACKEDGE * join //edge to join to -); -CRACKEDGE *v_edge ( //vertical edge -inT16 x, //xposition -inT16 y, //y position -inT8 sign, //sign of edge -CRACKEDGE * join //edge to join to -); -void join_edges( //join edge fragments - CRACKEDGE *edge1, //edges to join - CRACKEDGE *edge2 //no specific order - ); -void free_crackedges( //really free them - CRACKEDGE *start //start of loop - ); + inT16 y); // line coord ); +void whiteout_block(IMAGE *t_image, // thresholded image + PDBLK *block); // block in image +void line_edges(inT16 x, // coord of line start + inT16 y, // coord of line + inT16 xext, // width of line + uinT8 uppercolour, // start of prev line + uinT8 * bwpos, // thresholded line + CRACKEDGE ** prevline, // edges in progress + CRACKEDGE **free_cracks, + C_OUTLINE_IT* outline_it); +CRACKEDGE *h_edge(int sign, // sign of edge + CRACKEDGE * join, // edge to join to + CrackPos* pos); +CRACKEDGE *v_edge(int sign, // sign of edge + CRACKEDGE * join, // edge to join to + CrackPos* pos); +void join_edges(CRACKEDGE *edge1, // edges to join + CRACKEDGE *edge2, // no specific order + CRACKEDGE **free_cracks, + C_OUTLINE_IT* outline_it); +void free_crackedges(CRACKEDGE *start); + #endif diff --git a/textord/strokewidth.cpp b/textord/strokewidth.cpp index fba692b871..67f2f155d3 100644 --- a/textord/strokewidth.cpp +++ b/textord/strokewidth.cpp @@ -23,6 +23,9 @@ #include "strokewidth.h" #include "blobbox.h" +#include "colpartition.h" +#include "colpartitiongrid.h" +#include "statistc.h" #include "tabfind.h" #include "tordmain.h" // For SetBlobStrokeWidth. @@ -33,6 +36,20 @@ namespace tesseract { +INT_VAR(textord_tabfind_show_strokewidths, 0, "Show stroke widths"); +BOOL_VAR(textord_tabfind_only_strokewidths, false, "Only run stroke widths"); +double_VAR(textord_strokewidth_minsize, 0.25, + "Min multiple of linesize for medium-sized blobs"); +double_VAR(textord_strokewidth_maxsize, 4.0, + "Max multiple of linesize for medium-sized blobs"); +BOOL_VAR(textord_tabfind_vertical_text, true, "Enable vertical detection"); +BOOL_VAR(textord_tabfind_force_vertical_text, false, + "Force using vertical text page mode"); +BOOL_VAR(textord_tabfind_vertical_horizontal_mix, true, + "find horizontal lines such as headers in vertical page mode"); +double_VAR(textord_tabfind_vertical_text_ratio, 0.5, + "Fraction of textlines deemed vertical to use vertical page mode"); + /** Allowed proportional change in stroke width to be the same font. */ const double kStrokeWidthFractionTolerance = 0.125; /** @@ -40,19 +57,1051 @@ const double kStrokeWidthFractionTolerance = 0.125; * Really 1.5 pixels. */ const double kStrokeWidthTolerance = 1.5; +// Same but for CJK we are a bit more generous. +const double kStrokeWidthFractionCJK = 0.25; +const double kStrokeWidthCJK = 2.0; +// Radius in grid cells of search for broken CJK. Doesn't need to be very +// large as the grid size should be about the size of a character anyway. +const int kCJKRadius = 2; +// Max distance fraction of size to join close but broken CJK characters. +const double kCJKBrokenDistanceFraction = 0.25; +// Max number of components in a broken CJK character. +const int kCJKMaxComponents = 8; +// Max aspect ratio of CJK broken characters when put back together. +const double kCJKAspectRatio = 1.25; +// Max increase in aspect ratio of CJK broken characters when merged. +const double kCJKAspectRatioIncrease = 1.0625; +// Max multiple of the grid size that will be used in computing median CJKsize. +const int kMaxCJKSizeRatio = 5; +// Min multiple of diacritic height that a neighbour must be to be a +// convincing base character. +const int kMinDiacriticSizeRatio = 2; +// Radius of a search for diacritics in grid units. +const int kSearchRadius = 2; +// Ratio between longest side of a line and longest side of a character. +// (neighbor_min > blob_min * kLineTrapShortest && +// neighbor_max < blob_max / kLineTrapLongest) +// => neighbor is a grapheme and blob is a line. +const int kLineTrapLongest = 4; +// Ratio between shortest side of a line and shortest side of a character. +const int kLineTrapShortest = 2; +// Max aspect ratio of the total box before CountNeighbourGaps +// decides immediately based on the aspect ratio. +const int kMostlyOneDirRatio = 3; +// Max number of neighbour small objects per squared gridsize before a grid +// cell becomes image. +const double kMaxSmallNeighboursPerPix = 3.0 / 128; +// Aspect ratio filter for OSD. +const float kSizeRatioToReject = 2.0; /** Maximum height in inches of the largest possible text. */ const double kMaxTextSize = 2.0; StrokeWidth::StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright) - : BBGrid(gridsize, bleft, tright) { + : BBGrid(gridsize, bleft, tright), + noise_density_(NULL) { + leaders_win_ = NULL; + widths_win_ = NULL; + initial_widths_win_ = NULL; } StrokeWidth::~StrokeWidth() { + delete noise_density_; + if (widths_win_ != NULL) { + delete widths_win_->AwaitEvent(SVET_DESTROY); + if (textord_tabfind_only_strokewidths) + exit(0); + delete widths_win_; + } + delete leaders_win_; + delete initial_widths_win_; +} + +// Types all the blobs as vertical or horizontal text or unknown and +// returns true if the majority are vertical. +// If the blobs are rotated, it is necessary to call CorrectForRotation +// after rotating everything, otherwise the work done here will be enough. +// If cjk_merge is true, it will attempt to merge broken cjk characters. +// If osd_blobs is not null, a list of blobs from the dominant textline +// direction are returned for use in orientation and script detection. +bool StrokeWidth::TestVerticalTextDirection(bool cjk_merge, TO_BLOCK* block, + TabFind* line_grid, + BLOBNBOX_CLIST* osd_blobs) { + // Reset all blobs to initial state and filter by size. + ReFilterBlobs(block); + // Compute the noise density in the grid. + ComputeNoiseDensity(block, line_grid); + // Setup the grid with the remaining blobs + InsertBlobs(block, line_grid); + // Repair broken CJK characters if needed. + if (cjk_merge) + FixBrokenCJK(&block->blobs, line_grid); + if (textord_tabfind_force_vertical_text) return true; + // Grade blobs by inspection of neighbours. + FindTextlineFlowDirection(false); + if (!textord_tabfind_vertical_text) return false; + + int vertical_boxes = 0; + int horizontal_boxes = 0; + // Count vertical bboxes in the grid. + BlobGridSearch gsearch(this); + gsearch.StartFullSearch(); + BLOBNBOX* blob; + BLOBNBOX_CLIST vertical_blobs; + BLOBNBOX_CLIST horizontal_blobs; + BLOBNBOX_CLIST nondescript_blobs; + BLOBNBOX_C_IT v_it(&vertical_blobs); + BLOBNBOX_C_IT h_it(&horizontal_blobs); + BLOBNBOX_C_IT n_it(&nondescript_blobs); + while ((blob = gsearch.NextFullSearch()) != NULL) { + const TBOX& box = blob->bounding_box(); + float y_x = static_cast(box.height()) / box.width(); + float x_y = 1.0f / y_x; + // Select a >= 1.0 ratio + float ratio = x_y > y_x ? x_y : y_x; + // If the aspect ratio is small and we want them for osd, save the blob. + bool ok_blob = ratio <= kSizeRatioToReject && osd_blobs != NULL; + if (blob->UniquelyVertical()) { + ++vertical_boxes; + if (ok_blob) v_it.add_after_then_move(blob); + } else if (blob->UniquelyHorizontal()) { + ++horizontal_boxes; + if (ok_blob) h_it.add_after_then_move(blob); + } else if (ok_blob) { + n_it.add_after_then_move(blob); + } + } + if (textord_debug_tabfind) + tprintf("TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n", + horizontal_boxes, vertical_boxes, + horizontal_blobs.length(), vertical_blobs.length(), + nondescript_blobs.length()); + if (osd_blobs != NULL && vertical_boxes == 0 && horizontal_boxes == 0) { + // Only nondescript blobs available, so return those. + BLOBNBOX_C_IT osd_it(osd_blobs); + osd_it.add_list_after(&nondescript_blobs); + return false; + } + int min_vert_boxes = static_cast((vertical_boxes + horizontal_boxes) * + textord_tabfind_vertical_text_ratio); + if (vertical_boxes >= min_vert_boxes) { + if (osd_blobs != NULL) { + BLOBNBOX_C_IT osd_it(osd_blobs); + osd_it.add_list_after(&vertical_blobs); + } + return true; + } else { + if (osd_blobs != NULL) { + BLOBNBOX_C_IT osd_it(osd_blobs); + osd_it.add_list_after(&horizontal_blobs); + } + return false; + } +} + +// Corrects the data structures for the given rotation. +void StrokeWidth::CorrectForRotation(const FCOORD& rotation, TO_BLOCK* block, + TabFind* line_grid) { + noise_density_->Rotate(rotation); + Init(noise_density_->gridsize(), noise_density_->bleft(), + noise_density_->tright()); + // Reset all blobs to initial state and filter by size. + // Since they have rotated, the list they belong on could have changed. + ReFilterBlobs(block); +} + +// Finds leader partitions and inserts them into the give grid. +void StrokeWidth::FindLeaderPartitions(TO_BLOCK* block, TabFind* line_grid) { + Clear(); + // Find and isolate leaders in the noise list. + ColPartition_LIST leader_parts; + FindLeadersAndMarkNoise(true, block, line_grid, &leader_parts); + // Setup the grid with the remaining blobs + InsertBlobs(block, line_grid); + // Mark blobs that have leader neighbours. + for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) { + ColPartition* part = it.extract(); + MarkLeaderNeighbours(part, true); + MarkLeaderNeighbours(part, false); + delete part; + } +} + +static void PrintBoxWidths(BLOBNBOX* neighbour) { + TBOX nbox = neighbour->bounding_box(); + tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", + nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), + neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), + 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter()); +} + +/** Handles a click event in a display window. */ +void StrokeWidth::HandleClick(int x, int y) { + BBGrid::HandleClick(x, y); + // Run a radial search for blobs that overlap. + BlobGridSearch radsearch(this); + radsearch.StartRadSearch(x, y, 1); + BLOBNBOX* neighbour; + FCOORD click(static_cast(x), static_cast(y)); + while ((neighbour = radsearch.NextRadSearch()) != NULL) { + TBOX nbox = neighbour->bounding_box(); + if (nbox.contains(click) && neighbour->cblob() != NULL) { + PrintBoxWidths(neighbour); + if (neighbour->neighbour(BND_LEFT) != NULL) + PrintBoxWidths(neighbour->neighbour(BND_LEFT)); + if (neighbour->neighbour(BND_RIGHT) != NULL) + PrintBoxWidths(neighbour->neighbour(BND_RIGHT)); + if (neighbour->neighbour(BND_ABOVE) != NULL) + PrintBoxWidths(neighbour->neighbour(BND_ABOVE)); + if (neighbour->neighbour(BND_BELOW) != NULL) + PrintBoxWidths(neighbour->neighbour(BND_BELOW)); + int gaps[BND_COUNT]; + neighbour->NeighbourGaps(gaps); + tprintf("Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n" + "Good= %d %d %d %d\n", + gaps[BND_LEFT], gaps[BND_RIGHT], + gaps[BND_ABOVE], gaps[BND_BELOW], + neighbour->horz_possible(), + neighbour->vert_possible(), + neighbour->good_stroke_neighbour(BND_LEFT), + neighbour->good_stroke_neighbour(BND_RIGHT), + neighbour->good_stroke_neighbour(BND_ABOVE), + neighbour->good_stroke_neighbour(BND_BELOW)); + break; + } + } +} + +// Helper function to divide the input blobs over noise, small, medium +// and large lists. Blobs small in height and (small in width or large in width) +// go in the noise list. Dash (-) candidates go in the small list, and +// medium and large are by height. +// SIDE-EFFECT: reset all blobs to initial state by calling Init(). +static void SizeFilterBlobs(int min_height, int max_height, + BLOBNBOX_LIST* src_list, + BLOBNBOX_LIST* noise_list, + BLOBNBOX_LIST* small_list, + BLOBNBOX_LIST* medium_list, + BLOBNBOX_LIST* large_list) { + BLOBNBOX_IT noise_it(noise_list); + BLOBNBOX_IT small_it(small_list); + BLOBNBOX_IT medium_it(medium_list); + BLOBNBOX_IT large_it(large_list); + for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) { + BLOBNBOX* blob = src_it.extract(); + blob->ReInit(); + int width = blob->bounding_box().width(); + int height = blob->bounding_box().height(); + if (height < min_height && + (width < min_height || width > max_height)) + noise_it.add_after_then_move(blob); + else if (height > max_height) + large_it.add_after_then_move(blob); + else if (height < min_height) + small_it.add_after_then_move(blob); + else + medium_it.add_after_then_move(blob); + } +} + +// Reorganize the blob lists with a different definition of small, medium +// and large, compared to the original definition. +// Height is still the primary filter key, but medium width blobs of small +// height become small, and very wide blobs of small height stay noise, along +// with small dot-shaped blobs. +void StrokeWidth::ReFilterBlobs(TO_BLOCK* block) { + int min_height = + static_cast(textord_strokewidth_minsize * block->line_size + 0.5); + int max_height = + static_cast(textord_strokewidth_maxsize * block->line_size + 0.5); + BLOBNBOX_LIST noise_list; + BLOBNBOX_LIST small_list; + BLOBNBOX_LIST medium_list; + BLOBNBOX_LIST large_list; + SizeFilterBlobs(min_height, max_height, &block->blobs, + &noise_list, &small_list, &medium_list, &large_list); + SizeFilterBlobs(min_height, max_height, &block->large_blobs, + &noise_list, &small_list, &medium_list, &large_list); + SizeFilterBlobs(min_height, max_height, &block->small_blobs, + &noise_list, &small_list, &medium_list, &large_list); + SizeFilterBlobs(min_height, max_height, &block->noise_blobs, + &noise_list, &small_list, &medium_list, &large_list); + BLOBNBOX_IT blob_it(&block->blobs); + blob_it.add_list_after(&medium_list); + blob_it.set_to_list(&block->large_blobs); + blob_it.add_list_after(&large_list); + blob_it.set_to_list(&block->small_blobs); + blob_it.add_list_after(&small_list); + blob_it.set_to_list(&block->noise_blobs); + blob_it.add_list_after(&noise_list); +} + +// Computes the noise_density_ by summing the number of elements in a +// neighbourhood of each grid cell. +void StrokeWidth::ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid) { + // Run a preliminary strokewidth neighbour detection on the medium blobs. + line_grid->InsertBlobList(true, true, false, &block->blobs, false, this); + BLOBNBOX_IT blob_it(&block->blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + SetNeighbours(false, blob_it.data()); + } + // Remove blobs with a good strokewidth neighbour from the grid. + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (blob->GoodTextBlob() > 0) + RemoveBBox(blob); + blob->ClearNeighbours(); + } + // Insert the smaller blobs into the grid. + line_grid->InsertBlobList(true, true, false, &block->small_blobs, + false, this); + line_grid->InsertBlobList(true, true, false, &block->noise_blobs, + false, this); + if (noise_density_ != NULL) + delete noise_density_; + IntGrid* cell_counts = CountCellElements(); + noise_density_ = cell_counts->NeighbourhoodSum(); + delete cell_counts; + // Clear the grid as we don't want the small stuff hanging around in it. + Clear(); +} + +// Detects and marks leader dots/dashes. +// Leaders are horizontal chains of small or noise blobs that look +// monospace according to ColPartition::MarkAsLeaderIfMonospaced(). +// Detected leaders become the only occupants of small_blobs list. +// Non-leader small blobs get moved to the blobs list. +// Non-leader noise blobs remain singletons in the noise list. +// All small and noise blobs in high density regions are marked BTFT_NONTEXT. +void StrokeWidth::FindLeadersAndMarkNoise(bool final, TO_BLOCK* block, + TabFind* line_grid, + ColPartition_LIST* leader_parts) { + line_grid->InsertBlobList(true, true, false, &block->small_blobs, + false, this); + line_grid->InsertBlobList(true, true, false, &block->noise_blobs, + false, this); + int max_noise_count = + static_cast(kMaxSmallNeighboursPerPix * gridsize() * gridsize()); + BlobGridSearch gsearch(this); + BLOBNBOX* bbox; + // For every bbox in the grid, set its neighbours. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != NULL) { + int noise_count = noise_density_->GridCellValue(gsearch.GridX(), + gsearch.GridY()); + if (noise_count <= max_noise_count) { + SetNeighbours(true, bbox); + } else { + bbox->set_flow(BTFT_NONTEXT); + } + } + ColPartition_IT part_it(leader_parts); + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != NULL) { + if (bbox->flow() == BTFT_NONE) { + if (bbox->neighbour(BND_RIGHT) == NULL && + bbox->neighbour(BND_LEFT) == NULL) + continue; + // Put all the linked blobs into a ColPartition. + ColPartition* part = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1)); + BLOBNBOX* blob; + for (blob = bbox; blob != NULL && blob->flow() == BTFT_NONE; + blob = blob->neighbour(BND_RIGHT)) + part->AddBox(blob); + for (blob = bbox->neighbour(BND_LEFT); blob != NULL && + blob->flow() == BTFT_NONE; + blob = blob->neighbour(BND_LEFT)) + part->AddBox(blob); + if (part->MarkAsLeaderIfMonospaced()) + part_it.add_after_then_move(part); + else + delete part; + } + } + if (textord_tabfind_show_strokewidths && final) { + leaders_win_ = DisplayGoodBlobs("LeaderNeighbours", 0, 0); + } + // Move any non-leaders from the small to the blobs list, as they are + // most likely dashes or broken characters. + BLOBNBOX_IT blob_it(&block->blobs); + BLOBNBOX_IT small_it(&block->small_blobs); + for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { + BLOBNBOX* blob = small_it.data(); + if (blob->flow() != BTFT_LEADER) { + if (blob->flow() == BTFT_NEIGHBOURS) + blob->set_flow(BTFT_NONE); + blob->ClearNeighbours(); + blob_it.add_to_end(small_it.extract()); + } + } + // Move leaders from the noise list to the small list, leaving the small + // list exclusively leaders, so they don't get processed further, + // and the remaining small blobs all in the noise list. + BLOBNBOX_IT noise_it(&block->noise_blobs); + for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) { + BLOBNBOX* blob = noise_it.data(); + if (blob->flow() == BTFT_LEADER || blob->joined_to_prev()) { + small_it.add_to_end(noise_it.extract()); + } else if (blob->flow() == BTFT_NEIGHBOURS) { + blob->set_flow(BTFT_NONE); + blob->ClearNeighbours(); + } + } + // Clear the grid as we don't want the small stuff hanging around in it. + Clear(); } /** Puts the block blobs (normal and large) into the grid. */ void StrokeWidth::InsertBlobs(TO_BLOCK* block, TabFind* line_grid) { + // Insert the blobs into this grid using the separator lines in line_grid. + line_grid->InsertBlobList(true, true, false, &block->blobs, false, this); + line_grid->InsertBlobList(true, true, true, &block->large_blobs, + false, this); +} + +// Sets the leader_on_left or leader_on_right flags for blobs +// that are next to one end of the given leader partition. +// If left_of_part is true, then look at the left side of the partition for +// blobs on which to set the leader_on_right flag. +void StrokeWidth::MarkLeaderNeighbours(const ColPartition* part, + bool left_of_part) { + const TBOX& part_box = part->bounding_box(); + BlobGridSearch blobsearch(this); + // Search to the side of the leader for the nearest neighbour. + BLOBNBOX* best_blob = NULL; + int best_gap = 0; + blobsearch.StartSideSearch(left_of_part ? part_box.left() : part_box.right(), + part_box.bottom(), part_box.top()); + BLOBNBOX* blob; + while ((blob = blobsearch.NextSideSearch(left_of_part)) != NULL) { + const TBOX& blob_box = blob->bounding_box(); + if (!blob_box.y_overlap(part_box)) + continue; + int x_gap = blob_box.x_gap(part_box); + if (x_gap > 2 * gridsize()) { + break; + } else if (best_blob == NULL || x_gap < best_gap) { + best_blob = blob; + best_gap = x_gap; + } + } + if (best_blob != NULL) { + if (left_of_part) + best_blob->set_leader_on_right(true); + else + best_blob->set_leader_on_left(true); + if (leaders_win_ != NULL) { + leaders_win_->Pen(left_of_part ? ScrollView::RED : ScrollView::GREEN); + const TBOX& blob_box = best_blob->bounding_box(); + leaders_win_->Rectangle(blob_box.left(), blob_box.bottom(), + blob_box.right(), blob_box.top()); + } + } +} + +// Helper to compute the UQ of the square-ish CJK charcters. +static int UpperQuartileCJKSize(int gridsize, BLOBNBOX_LIST* blobs) { + STATS sizes(0, gridsize * kMaxCJKSizeRatio); + BLOBNBOX_IT it(blobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + int width = blob->bounding_box().width(); + int height = blob->bounding_box().height(); + if (width <= height * kCJKAspectRatio && height < width * kCJKAspectRatio) + sizes.add(height, 1); + } + return static_cast(sizes.ile(0.75f) + 0.5); +} + +// Fix broken CJK characters, using the fake joined blobs mechanism. +// Blobs are really merged, ie the master takes all the outlines and the +// others are deleted. +void StrokeWidth::FixBrokenCJK(BLOBNBOX_LIST* blobs, TabFind* line_grid) { + int median_height = UpperQuartileCJKSize(gridsize(), blobs); + int max_dist = static_cast(median_height * kCJKBrokenDistanceFraction); + int max_size = static_cast(median_height * kCJKAspectRatio); + BLOBNBOX_IT blob_it(blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (blob->cblob() == NULL || blob->cblob()->out_list()->empty()) + continue; + TBOX bbox = blob->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(), + bbox.bottom()); + if (debug) { + tprintf("Checking for Broken CJK (max size=%d):", max_size); + bbox.print(); + } + // Generate a list of blobs that overlap or are near enough to merge. + BLOBNBOX_CLIST overlapped_blobs; + AccumulateOverlaps(blob, debug, max_size, max_dist, + &bbox, &overlapped_blobs); + if (!overlapped_blobs.empty()) { + // There are overlapping blobs, so qualify them as being satisfactory + // before removing them from the grid and replacing them with the union. + // The final box must be roughly square. + if (bbox.width() > bbox.height() * kCJKAspectRatio || + bbox.height() > bbox.width() * kCJKAspectRatio) { + if (debug) { + tprintf("Bad final aspectratio:"); + bbox.print(); + } + continue; + } + // There can't be too many blobs to merge. + if (overlapped_blobs.length() >= kCJKMaxComponents) { + if (debug) + tprintf("Too many neighbours: %d\n", overlapped_blobs.length()); + continue; + } + // The strokewidths must match amongst the join candidates. + BLOBNBOX_C_IT n_it(&overlapped_blobs); + for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { + BLOBNBOX* neighbour = NULL; + neighbour = n_it.data(); + if (!blob->MatchingStrokeWidth(*neighbour, kStrokeWidthFractionCJK, + kStrokeWidthCJK)) + break; + } + if (!n_it.cycled_list()) { + if (debug) { + tprintf("Bad stroke widths:"); + PrintBoxWidths(blob); + } + continue; // Not good enough. + } + + // Merge all the candidates into blob. + // We must remove blob from the grid and reinsert it after merging + // to maintain the integrity of the grid. + RemoveBBox(blob); + // Everything else will be calculated later. + for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { + BLOBNBOX* neighbour = n_it.data(); + RemoveBBox(neighbour); + blob->really_merge(neighbour); + } + line_grid->InsertBlob(true, true, false, blob, this); + if (debug) { + tprintf("Done! Final box:"); + bbox.print(); + } + } + } + // Permanently delete all the empty shell blobs that contain no outlines. + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (blob->cblob() == NULL || blob->cblob()->out_list()->empty()) { + if (blob->cblob() != NULL) + delete blob->cblob(); + delete blob_it.extract(); + } + } +} + +// Helper function to determine whether it is reasonable to merge the +// bbox and the nbox for repairing broken CJK. +// The distance apart must not exceed max_dist, the combined size must +// not exceed max_size, and the aspect ratio must either improve or at +// least not get worse by much. +static bool AcceptableCJKMerge(const TBOX& bbox, const TBOX& nbox, + bool debug, int max_size, int max_dist, + int* x_gap, int* y_gap) { + *x_gap = bbox.x_gap(nbox); + *y_gap = bbox.y_gap(nbox); + TBOX merged(nbox); + merged += bbox; + if (debug) { + tprintf("gaps = %d, %d, merged_box:", *x_gap, *y_gap); + merged.print(); + } + if (*x_gap <= max_dist && *y_gap <= max_dist && + merged.width() <= max_size && merged.height() <= max_size) { + // Close enough to call overlapping. Check aspect ratios. + double old_ratio = static_cast(bbox.width()) / bbox.height(); + if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio; + double new_ratio = static_cast(merged.width()) / merged.height(); + if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio; + if (new_ratio <= old_ratio * kCJKAspectRatioIncrease) + return true; + } + return false; +} + +// Collect blobs that overlap or are within max_dist of the input bbox. +// Return them in the list of blobs and expand the bbox to be the union +// of all the boxes. not_this is excluded from the search, as are blobs +// that cause the merged box to exceed max_size in either dimension. +void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, + int max_size, int max_dist, + TBOX* bbox, BLOBNBOX_CLIST* blobs) { + // While searching, nearests holds the nearest failed blob in each + // direction. When we have a nearest in each of the 4 directions, then + // the search is over, and at this point the final bbox must not overlap + // any of the nearests. + BLOBNBOX* nearests[BND_COUNT]; + for (int i = 0; i < BND_COUNT; ++i) { + nearests[i] = NULL; + } + int x = (bbox->left() + bbox->right()) / 2; + int y = (bbox->bottom() + bbox->top()) / 2; + // Run a radial search for blobs that overlap or are sufficiently close. + BlobGridSearch radsearch(this); + radsearch.StartRadSearch(x, y, kCJKRadius); + BLOBNBOX* neighbour; + while ((neighbour = radsearch.NextRadSearch()) != NULL) { + if (neighbour == not_this) continue; + TBOX nbox = neighbour->bounding_box(); + int x_gap, y_gap; + if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, + &x_gap, &y_gap)) { + // Close enough to call overlapping. Merge boxes. + *bbox += nbox; + blobs->add_sorted(SortByBoxLeft, true, neighbour); + if (debug) { + tprintf("Added:"); + nbox.print(); + } + // Since we merged, search the nearests, as some might now me mergeable. + for (int dir = 0; dir < BND_COUNT; ++dir) { + if (nearests[dir] == NULL) continue; + nbox = nearests[dir]->bounding_box(); + if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, + max_dist, &x_gap, &y_gap)) { + // Close enough to call overlapping. Merge boxes. + *bbox += nbox; + blobs->add_sorted(SortByBoxLeft, true, nearests[dir]); + if (debug) { + tprintf("Added:"); + nbox.print(); + } + nearests[dir] = NULL; + dir = -1; // Restart the search. + } + } + } else if (x_gap < 0 && x_gap <= y_gap) { + // A vertical neighbour. Record the nearest. + BlobNeighbourDir dir = nbox.top() > bbox->top() ? BND_ABOVE : BND_BELOW; + if (nearests[dir] == NULL || + y_gap < bbox->y_gap(nearests[dir]->bounding_box())) { + nearests[dir] = neighbour; + } + } else if (y_gap < 0 && y_gap <= x_gap) { + // A horizontal neighbour. Record the nearest. + BlobNeighbourDir dir = nbox.left() > bbox->left() ? BND_RIGHT : BND_LEFT; + if (nearests[dir] == NULL || + x_gap < bbox->x_gap(nearests[dir]->bounding_box())) { + nearests[dir] = neighbour; + } + } + // If all nearests are non-null, then we have finished. + if (nearests[BND_LEFT] && nearests[BND_RIGHT] && + nearests[BND_ABOVE] && nearests[BND_BELOW]) + break; + } + // Final overlap with a nearest is not allowed. + for (int dir = 0; dir < BND_COUNT; ++dir) { + if (nearests[dir] == NULL) continue; + const TBOX& nbox = nearests[dir]->bounding_box(); + if (debug) { + tprintf("Testing for overlap with:"); + nbox.print(); + } + if (bbox->overlap(nbox)) { + blobs->shallow_clear(); + if (debug) + tprintf("Final box overlaps nearest\n"); + return; + } + } +} + +// Finds the textline direction to be horizontal or vertical according +// to distance to neighbours and 1st and 2nd order neighbours. +// Non-text tends to end up without a definite direction. +void StrokeWidth::FindTextlineFlowDirection(bool final) { + int max_noise_count = + static_cast(kMaxSmallNeighboursPerPix * gridsize() * gridsize()); + BlobGridSearch gsearch(this); + BLOBNBOX* bbox; + // For every bbox in the grid, set its neighbours, unless in a noisy area. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != NULL) { + int noise_count = noise_density_->GridCellValue(gsearch.GridX(), + gsearch.GridY()); + if (noise_count <= max_noise_count) { + SetNeighbours(false, bbox); + } else { + // The noise density is so high, that it must be non-text. + bbox->set_flow(BTFT_NONTEXT); + } + } + // Where vertical or horizontal wins by a big margin, clarify it. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != NULL) { + SimplifyObviousNeighbours(bbox); + } + // Now try to make the blobs only vertical or horizontal using neighbours. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != NULL) { + SetNeighbourFlows(bbox); + } + if ((textord_tabfind_show_strokewidths && final) || + textord_tabfind_show_strokewidths > 1) { + initial_widths_win_ = DisplayGoodBlobs("InitialStrokewidths", 400, 0); + } + // Improve flow direction with neighbours. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != NULL) { + SmoothNeighbourTypes(bbox, false); + } + // Now allow reset of firm values to fix renegades. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != NULL) { + SmoothNeighbourTypes(bbox, true); + } + // Repeat. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != NULL) { + SmoothNeighbourTypes(bbox, true); + } + if ((textord_tabfind_show_strokewidths && final) || + textord_tabfind_show_strokewidths > 1) { + widths_win_ = DisplayGoodBlobs("ImprovedStrokewidths", 800, 0); + } +} + +// Sets the neighbours and good_stroke_neighbours members of the blob by +// searching close on all 4 sides. +// When finding leader dots/dashes, there is a slightly different rule for +// what makes a good neighbour. +void StrokeWidth::SetNeighbours(bool leaders, BLOBNBOX* blob) { + int line_trap_count = 0; + for (int dir = 0; dir < BND_COUNT; ++dir) { + BlobNeighbourDir bnd = static_cast(dir); + line_trap_count += FindGoodNeighbour(bnd, leaders, blob); + } + if (line_trap_count > 0) { + // It looks like a line so isolate it by clearing its neighbours. + blob->ClearNeighbours(); + const TBOX& box = blob->bounding_box(); + blob->set_region_type(box.width() > box.height() ? BRT_HLINE : BRT_VLINE); + } +} + + +// Sets the good_stroke_neighbours member of the blob if it has a +// GoodNeighbour on the given side. +// Also sets the neighbour in the blob, whether or not a good one is found. +// Returns the number of blobs in the nearby search area that would lead us to +// believe that this blob is a line separator. +// Leaders get extra special lenient treatment. +int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, + BLOBNBOX* blob) { + // Search for neighbours that overlap vertically. + TBOX blob_box = blob->bounding_box(); + int top = blob_box.top(); + int bottom = blob_box.bottom(); + int left = blob_box.left(); + int right = blob_box.right(); + int width = right - left; + int height = top - bottom; + + // A trap to detect lines tests for the min dimension of neighbours + // being larger than a multiple of the min dimension of the line + // and the larger dimension being smaller than a fraction of the max + // dimension of the line. + int line_trap_max = MAX(width, height) / kLineTrapLongest; + int line_trap_min = MIN(width, height) * kLineTrapShortest; + int line_trap_count = 0; + + int min_good_overlap = (dir == BND_LEFT || dir == BND_RIGHT) + ? height / 2 : width / 2; + int min_decent_overlap = (dir == BND_LEFT || dir == BND_RIGHT) + ? height / 3 : width / 3; + if (leaders) + min_good_overlap = min_decent_overlap = 1; + + int search_pad = static_cast(sqrt(static_cast(width * height))); + if (gridsize() > search_pad) + search_pad = gridsize(); + TBOX search_box = blob_box; + // Pad the search in the appropriate direction. + switch (dir) { + case BND_LEFT: + search_box.set_left(search_box.left() - search_pad); + break; + case BND_RIGHT: + search_box.set_right(search_box.right() + search_pad); + break; + case BND_BELOW: + search_box.set_bottom(search_box.bottom() - search_pad); + break; + case BND_ABOVE: + search_box.set_top(search_box.top() + search_pad); + break; + case BND_COUNT: + return 0; + } + + BlobGridSearch rectsearch(this); + rectsearch.StartRectSearch(search_box); + BLOBNBOX* best_neighbour = NULL; + double best_goodness = 0.0; + bool best_is_good = false; + BLOBNBOX* neighbour; + while ((neighbour = rectsearch.NextRectSearch()) != NULL) { + TBOX nbox = neighbour->bounding_box(); + if (neighbour == blob) + continue; + int mid_x = (nbox.left() + nbox.right()) / 2; + if (mid_x < blob->left_rule() || mid_x > blob->right_rule()) + continue; // In a different column. + + // Last-minute line detector. There is a small upper limit to the line + // width accepted by the morphological line detector. + int n_width = nbox.width(); + int n_height = nbox.height(); + if (MIN(n_width, n_height) > line_trap_min && + MAX(n_width, n_height) < line_trap_max) + ++line_trap_count; + if (TabFind::VeryDifferentSizes(MAX(n_width, n_height), + MAX(width, height))) + continue; // Could be a different font size or non-text. + // Amount of vertical overlap between the blobs. + int overlap; + // If the overlap is along the short side of the neighbour, and it + // is fully overlapped, then perp_overlap holds the length of the long + // side of the neighbour. A measure to include hyphens and dashes as + // legitimate neighbours. + int perp_overlap; + int gap; + if (dir == BND_LEFT || dir == BND_RIGHT) { + overlap = MIN(nbox.top(), top) - MAX(nbox.bottom(), bottom); + if (overlap == nbox.height() && nbox.width() > nbox.height()) + perp_overlap = nbox.width(); + else + perp_overlap = overlap; + gap = dir == BND_LEFT ? left - nbox.left() : nbox.right() - right; + if (gap <= 0) + continue; // On the wrong side. + gap -= n_width; + } else { + overlap = MIN(nbox.right(), right) - MAX(nbox.left(), left); + if (overlap == nbox.width() && nbox.height() > nbox.width()) + perp_overlap = nbox.height(); + else + perp_overlap = overlap; + gap = dir == BND_BELOW ? bottom - nbox.bottom() : nbox.top() - top; + if (gap <= 0) + continue; // On the wrong side. + gap -= n_height; + } + if (-gap > overlap) + continue; // Overlaps the wrong way. + if (perp_overlap < min_decent_overlap) + continue; // Doesn't overlap enough. + bool bad_sizes = TabFind::DifferentSizes(height, n_height) && + TabFind::DifferentSizes(width, n_width); + bool is_good = overlap >= min_good_overlap && !bad_sizes && + blob->MatchingStrokeWidth(*neighbour, + kStrokeWidthFractionTolerance, + kStrokeWidthTolerance); + // Best is a fuzzy combination of gap, overlap and is good. + // Basically if you make one thing twice as good without making + // anything else twice as bad, then it is better. + if (gap < 1) gap = 1; + double goodness = (1.0 + is_good) * overlap / gap; + if (goodness > best_goodness) { + best_neighbour = neighbour; + best_goodness = goodness; + best_is_good = is_good; + } + } + blob->set_neighbour(dir, best_neighbour, best_is_good); + return line_trap_count; +} + +// Helper to get a list of 1st-order neighbours. +static void ListNeighbours(const BLOBNBOX* blob, + BLOBNBOX_CLIST* neighbours) { + for (int dir = 0; dir < BND_COUNT; ++dir) { + BlobNeighbourDir bnd = static_cast(dir); + BLOBNBOX* neighbour = blob->neighbour(bnd); + if (neighbour != NULL) { + neighbours->add_sorted(SortByBoxLeft, true, neighbour); + } + } +} + +// Helper to get a list of 1st and 2nd order neighbours. +static void List2ndNeighbours(const BLOBNBOX* blob, + BLOBNBOX_CLIST* neighbours) { + ListNeighbours(blob, neighbours); + for (int dir = 0; dir < BND_COUNT; ++dir) { + BlobNeighbourDir bnd = static_cast(dir); + BLOBNBOX* neighbour = blob->neighbour(bnd); + if (neighbour != NULL) { + ListNeighbours(neighbour, neighbours); + } + } +} + +// Helper to get a list of 1st, 2nd and 3rd order neighbours. +static void List3rdNeighbours(const BLOBNBOX* blob, + BLOBNBOX_CLIST* neighbours) { + List2ndNeighbours(blob, neighbours); + for (int dir = 0; dir < BND_COUNT; ++dir) { + BlobNeighbourDir bnd = static_cast(dir); + BLOBNBOX* neighbour = blob->neighbour(bnd); + if (neighbour != NULL) { + List2ndNeighbours(neighbour, neighbours); + } + } +} + +// Helper to count the evidence for verticalness or horizontalness +// in a list of neighbours. +static void CountNeighbourGaps(bool debug, BLOBNBOX_CLIST* neighbours, + int* pure_h_count, int* pure_v_count) { + if (neighbours->length() <= kMostlyOneDirRatio) + return; + BLOBNBOX_C_IT it(neighbours); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + int h_min, h_max, v_min, v_max; + blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max); + if (debug) + tprintf("Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max); + if (h_max < v_min || + blob->leader_on_left() || blob->leader_on_right()) { + // Horizontal gaps are clear winners. Count a pure horizontal. + ++*pure_h_count; + if (debug) tprintf("Horz at:"); + } else if (v_max < h_min) { + // Vertical gaps are clear winners. Clear a pure vertical. + ++*pure_v_count; + if (debug) tprintf("Vert at:"); + } else { + if (debug) tprintf("Neither at:"); + } + if (debug) + blob->bounding_box().print(); + } +} + +// Makes the blob to be only horizontal or vertical where evidence +// is clear based on gaps of 2nd order neighbours, or definite individual +// blobs. +void StrokeWidth::SetNeighbourFlows(BLOBNBOX* blob) { + if (blob->DefiniteIndividualFlow()) + return; + bool debug = AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), + blob->bounding_box().bottom()); + if (debug) { + tprintf("SetNeighbourFLows on:"); + blob->bounding_box().print(); + } + BLOBNBOX_CLIST neighbours; + List3rdNeighbours(blob, &neighbours); + // The number of pure horizontal and vertical neighbours. + int pure_h_count = 0; + int pure_v_count = 0; + CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count); + if (debug) { + HandleClick(blob->bounding_box().left() + 1, + blob->bounding_box().bottom() + 1); + tprintf("SetFlows: h_count=%d, v_count=%d\n", + pure_h_count, pure_v_count); + } + if (!neighbours.empty()) { + blob->set_vert_possible(true); + blob->set_horz_possible(true); + if (pure_h_count > 2 * pure_v_count) { + // Horizontal gaps are clear winners. Clear vertical neighbours. + blob->set_vert_possible(false); + } else if (pure_v_count > 2 * pure_h_count) { + // Vertical gaps are clear winners. Clear horizontal neighbours. + blob->set_horz_possible(false); + } + } else { + // Lonely blob. Can't tell its flow direction. + blob->set_vert_possible(false); + blob->set_horz_possible(false); + } +} + + +// Helper to count the number of horizontal and vertical blobs in a list. +static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours, + int* pure_h_count, int* pure_v_count) { + BLOBNBOX_C_IT it(neighbours); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + if (blob->UniquelyHorizontal()) + ++*pure_h_count; + if (blob->UniquelyVertical()) + ++*pure_v_count; + } +} + +// Nullify the neighbours in the wrong directions where the direction +// is clear-cut based on a distance margin. Good for isolating vertical +// text from neighbouring horizontal text. +void StrokeWidth::SimplifyObviousNeighbours(BLOBNBOX* blob) { + int margin = gridsize() / 2; + int h_min, h_max, v_min, v_max; + blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max); + if ((h_max + margin < v_min && h_max < margin / 2) || + blob->leader_on_left() || blob->leader_on_right()) { + // Horizontal gaps are clear winners. Clear vertical neighbours. + blob->set_neighbour(BND_ABOVE, NULL, false); + blob->set_neighbour(BND_BELOW, NULL, false); + } else if (v_max + margin < h_min && v_max < margin / 2) { + // Vertical gaps are clear winners. Clear horizontal neighbours. + blob->set_neighbour(BND_LEFT, NULL, false); + blob->set_neighbour(BND_RIGHT, NULL, false); + } +} + +// Smoothes the vertical/horizontal type of the blob based on the +// 2nd-order neighbours. If reset_all is true, then all blobs are +// changed. Otherwise, only ambiguous blobs are processed. +void StrokeWidth::SmoothNeighbourTypes(BLOBNBOX* blob, bool reset_all) { + if ((blob->vert_possible() && blob->horz_possible()) || reset_all) { + // There are both horizontal and vertical so try to fix it. + BLOBNBOX_CLIST neighbours; + List2ndNeighbours(blob, &neighbours); + // The number of pure horizontal and vertical neighbours. + int pure_h_count = 0; + int pure_v_count = 0; + CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count); + if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), + blob->bounding_box().bottom())) { + HandleClick(blob->bounding_box().left() + 1, + blob->bounding_box().bottom() + 1); + tprintf("pure_h=%d, pure_v=%d\n", + pure_h_count, pure_v_count); + } + if (pure_h_count > pure_v_count) { + // Horizontal gaps are clear winners. Clear vertical neighbours. + blob->set_vert_possible(false); + blob->set_horz_possible(true); + } else if (pure_v_count > pure_h_count) { + // Vertical gaps are clear winners. Clear horizontal neighbours. + blob->set_horz_possible(false); + blob->set_vert_possible(true); + } + } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), + blob->bounding_box().bottom())) { + HandleClick(blob->bounding_box().left() + 1, + blob->bounding_box().bottom() + 1); + tprintf("Clean on pass 3!\n"); + } +} + +// Puts the block blobs (normal and large) into the grid. +void StrokeWidth::InsertBlobsOld(TO_BLOCK* block, TabFind* line_grid) { // Insert the blobs into this grid using the separator lines in line_grid. line_grid->InsertBlobList(true, false, false, &block->blobs, false, this); line_grid->InsertBlobList(true, false, true, &block->large_blobs, @@ -82,17 +1131,19 @@ void StrokeWidth::MoveGoodLargeBlobs(int resolution, TO_BLOCK* block) { } } -/** Displays the blobs green or red according to whether they are good or not. */ +/** Displays the blobs colored according to the number of good neighbours + * and the vertical/horizontal flow. + */ ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name, - ScrollView* window) { + int x, int y) { + ScrollView* window = NULL; #ifndef GRAPHICS_DISABLED - if (window == NULL) - window = MakeWindow(0, 0, window_name); + window = MakeWindow(x, y, window_name); // For every blob in the grid, display it. window->Brush(ScrollView::NONE); // For every bbox in the grid, display it. - GridSearch gsearch(this); + BlobGridSearch gsearch(this); gsearch.StartFullSearch(); BLOBNBOX* bbox; while ((bbox = gsearch.NextFullSearch()) != NULL) { @@ -101,10 +1152,22 @@ ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name, int right_x = box.right(); int top_y = box.top(); int bottom_y = box.bottom(); - if (textord_debug_printable || GoodTextBlob(bbox)) - window->Pen(ScrollView::GREEN); - else - window->Pen(ScrollView::RED); + int goodness = bbox->GoodTextBlob(); + BlobRegionType blob_type = bbox->region_type(); + if (bbox->UniquelyVertical()) + blob_type = BRT_VERT_TEXT; + if (bbox->UniquelyHorizontal()) + blob_type = BRT_TEXT; + BlobTextFlowType flow = bbox->flow(); + if (flow == BTFT_NONE) { + if (goodness == 0) + flow = BTFT_NEIGHBOURS; + else if (goodness == 1) + flow = BTFT_CHAIN; + else + flow = BTFT_STRONG_CHAIN; + } + window->Pen(BLOBNBOX::TextlineColor(blob_type, flow)); window->Rectangle(left_x, bottom_y, right_x, top_y); } window->Update(); @@ -112,26 +1175,6 @@ ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name, return window; } -/** Handles a click event in a display window. */ -void StrokeWidth::HandleClick(int x, int y) { - BBGrid::HandleClick(x, y); - // Run a radial search for blobs that overlap. - GridSearch radsearch(this); - radsearch.StartRadSearch(x, y, 1); - BLOBNBOX* neighbour; - FCOORD click(x, y); - while ((neighbour = radsearch.NextRadSearch()) != NULL) { - TBOX nbox = neighbour->bounding_box(); - if (nbox.contains(click) && neighbour->cblob() != NULL) { - SetBlobStrokeWidth(true, neighbour); - tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", - nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), - neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), - 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter()); - } - } -} - /** * Returns true if there is at least one side neighbour that has a similar * stroke width and is not on the other side of a rule line. @@ -194,4 +1237,3 @@ bool StrokeWidth::GoodTextBlob(BLOBNBOX* blob) { } } // namespace tesseract. - diff --git a/textord/strokewidth.h b/textord/strokewidth.h index e0dc0a6e26..83724b2f39 100644 --- a/textord/strokewidth.h +++ b/textord/strokewidth.h @@ -21,6 +21,7 @@ #define TESSERACT_TEXTORD_STROKEWIDTH_H__ #include "bbgrid.h" // Base class. +#include "blobbox.h" // BlobNeighourDir. #include "tabvector.h" // For BLOBNBOX_CLIST. class TO_BLOCK; @@ -28,6 +29,7 @@ class ScrollView; namespace tesseract { +class ColPartition_LIST; class TabFind; /** @@ -40,23 +42,136 @@ class StrokeWidth : public BBGrid { StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright); virtual ~StrokeWidth(); + // To save computation, the process of generating partitions is broken + // into the following 4 steps: + // TestVerticalTextDirection + // CorrectForRotation (used only if a rotation is to be applied) + // FindLeaderPartitions + // TODO(rays) Coming soon: + // GradeBlobsIntoPartitions. + // which will replace entirely the old call sequence of: + // InsertBlobsOld + // MoveGoodLargeBlobs. + // These functions are all required, in sequence, except for + // CorrectForRotation, which is not needed if no rotation is applied. + + // Types all the blobs as vertical or horizontal text or unknown and + // returns true if the majority are vertical. + // If the blobs are rotated, it is necessary to call CorrectForRotation + // after rotating everything, otherwise the work done here will be enough. + // If cjk_merge is true, it will attempt to merge broken cjk characters. + // If osd_blobs is not null, a list of blobs from the dominant textline + // direction are returned for use in orientation and script detection. + bool TestVerticalTextDirection(bool cjk_merge, + TO_BLOCK* block, TabFind* line_grid, + BLOBNBOX_CLIST* osd_blobs); + + // Corrects the data structures for the given rotation. + void CorrectForRotation(const FCOORD& rotation, TO_BLOCK* block, + TabFind* line_grid); + + // Finds leader partitions and inserts them into the give grid. + void FindLeaderPartitions(TO_BLOCK* block, TabFind* line_grid); + + // Handles a click event in a display window. + virtual void HandleClick(int x, int y); + // Puts the block blobs (normal and large) into the grid. - void InsertBlobs(TO_BLOCK* block, TabFind* line_grid); + void InsertBlobsOld(TO_BLOCK* block, TabFind* line_grid); // Moves the large blobs that have good stroke-width neighbours to the normal // blobs list. void MoveGoodLargeBlobs(int resolution, TO_BLOCK* block); - // Displays the blobs green or red according to whether they are good or not. - ScrollView* DisplayGoodBlobs(const char* window_name, ScrollView* tab_win); + private: + // Reorganize the blob lists with a different definition of small, medium + // and large, compared to the original definition. + // Height is still the primary filter key, but medium width blobs of small + // height become medium, and very wide blobs of small height stay small. + void ReFilterBlobs(TO_BLOCK* block); - // Handles a click event in a display window. - virtual void HandleClick(int x, int y); + // Computes the noise_density_ by summing the number of elements in a + // neighbourhood of each grid cell. + void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid); + + // Detects and marks leader dots/dashes. + // Leaders are horizontal chains of small or noise blobs that look + // monospace according to ColPartition::MarkAsLeaderIfMonospaced(). + // Detected leaders become the only occupants of small_blobs list. + // Non-leader small blobs get moved to the blobs list. + // Non-leader noise blobs remain singletons in the noise list. + // All small and noise blobs in high density regions are marked BTFT_NONTEXT. + void FindLeadersAndMarkNoise(bool final, TO_BLOCK* block, TabFind* line_grid, + ColPartition_LIST* leader_parts); + + // Puts the block blobs (normal and large) into the grid. + void InsertBlobs(TO_BLOCK* block, TabFind* line_grid); + + // Fix broken CJK characters, using the fake joined blobs mechanism. + // Blobs are really merged, ie the master takes all the outlines and the + // others are deleted. + void FixBrokenCJK(BLOBNBOX_LIST* blobs, TabFind* line_grid); + + // Collect blobs that overlap or are within max_dist of the input bbox. + // Return them in the list of blobs and expand the bbox to be the union + // of all the boxes. not_this is excluded from the search, as are blobs + // that cause the merged box to exceed max_size in either dimension. + void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, + int max_size, int max_dist, + TBOX* bbox, BLOBNBOX_CLIST* blobs); + + // Finds the textline direction to be horizontal or vertical according + // to distance to neighbours and 1st and 2nd order neighbours. + // Non-text tends to end up without a definite direction. + void FindTextlineFlowDirection(bool final); + + // Sets the neighbours and good_stroke_neighbours members of the blob by + // searching close on all 4 sides. + // When finding leader dots/dashes, there is a slightly different rule for + // what makes a good neighbour. + void SetNeighbours(bool leaders, BLOBNBOX* blob); + + // Sets the good_stroke_neighbours member of the blob if it has a + // GoodNeighbour on the given side. + // Also sets the neighbour in the blob, whether or not a good one is found. + // Return value is the number of neighbours in the line trap size range. + // Leaders get extra special lenient treatment. + int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob); + + // Makes the blob to be only horizontal or vertical where evidence + // is clear based on gaps of 2nd order neighbours. + void SetNeighbourFlows(BLOBNBOX* blob); + + // Nullify the neighbours in the wrong directions where the direction + // is clear-cut based on a distance margin. Good for isolating vertical + // text from neighbouring horizontal text. + void SimplifyObviousNeighbours(BLOBNBOX* blob); + + // Smoothes the vertical/horizontal type of the blob based on the + // 2nd-order neighbours. If reset_all is true, then all blobs are + // changed. Otherwise, only ambiguous blobs are processed. + void SmoothNeighbourTypes(BLOBNBOX* blob, bool desperate); + + // Sets the leader_on_left or leader_on_right flags for blobs + // that are next to one end of the given leader partition. + // If left_of_part is true, then look at the left side of the partition for + // blobs on which to set the leader_on_right flag. + void MarkLeaderNeighbours(const ColPartition* part, bool left_of_part); + + // Displays the blobs colored according to the number of good neighbours + // and the vertical/horizontal flow. + ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y); private: // Returns true if there is at least one side neighbour that has a similar // stroke width. bool GoodTextBlob(BLOBNBOX* blob); + // Grid to indicate the dot noise density at each grid coord. + IntGrid* noise_density_; + // Windows for debug display. + ScrollView* leaders_win_; + ScrollView* initial_widths_win_; + ScrollView* widths_win_; }; } // namespace tesseract. diff --git a/textord/tabfind.cpp b/textord/tabfind.cpp index cb30f57c79..e832a9bfde 100644 --- a/textord/tabfind.cpp +++ b/textord/tabfind.cpp @@ -45,8 +45,10 @@ const int kMinColumnWidth = 200; const double kMinFractionalLinesInColumn = 0.125; // Fraction of height used as alignment tolerance for aligned tabs. const double kAlignedFraction = 0.03125; -// Fraction of height used as a minimum gap for aligned blobs. -const double kAlignedGapFraction = 0.75; +// Minimum gutter width in absolute inch (multiplied by resolution) +const double kMinGutterWidthAbsolute = 0.02; +// Maximum gutter width (in absolute inch) that we care about +const double kMaxGutterWidthAbsolute = 2.00; // Multiplier of new y positions in running average for skew estimation. const double kSmoothFactor = 0.25; // Min coverage for a good baseline between vectors @@ -68,24 +70,30 @@ const int kMaxTextLineBlobRatio = 5; const int kMinTextLineBlobRatio = 3; // Fraction of box area covered by image to make a blob image. const double kMinImageArea = 0.5; +// Upto 30 degrees is allowed for rotations of diacritic blobs. +// Keep this value slightly larger than kCosSmallAngle in blobbox.cpp +// so that the assert there never fails. +const double kCosMaxSkewAngle = 0.866025; BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates"); BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors"); -BOOL_VAR(textord_tabfind_vertical_text, true, "Enable vertical detection"); +double_VAR(textord_tabfind_aligned_gap_fraction, 0.75, + "Fraction of height used as a minimum gap for aligned blobs."); TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, - TabVector_LIST* vlines, int vertical_x, int vertical_y) + TabVector_LIST* vlines, int vertical_x, int vertical_y, + int resolution) : AlignedBlob(gridsize, bleft, tright), + resolution_(resolution), image_origin_(0, tright.y() - 1), tab_grid_(new BBGrid(gridsize, bleft, tright)) { - resolution_ = 0; width_cb_ = NULL; v_it_.set_to_list(&vectors_); v_it_.add_list_after(vlines); SetVerticalSkewAndParellelize(vertical_x, vertical_y); - width_cb_ = NewPermanentCallback(this, &TabFind::CommonWidth); + width_cb_ = NewPermanentTessCallback(this, &TabFind::CommonWidth); } TabFind::~TabFind() { @@ -141,6 +149,8 @@ bool TabFind::InsertBlob(bool h_spread, bool v_spread, bool large, blob->set_right_rule(RightEdgeForBox(box, false, false)); blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false)); blob->set_right_crossing_rule(RightEdgeForBox(box, true, false)); + if (blob->joined_to_prev()) + return false; if (large) { // Search the grid to see what intersects it. // Setup a Rectangle search for overlapping this blob. @@ -267,68 +277,6 @@ int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) { return v == NULL ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2); } -// Return true if the given width is close to one of the common -// widths in column_widths_. -bool TabFind::CommonWidth(int width) { - width /= kColumnWidthFactor; - ICOORDELT_IT it(&column_widths_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ICOORDELT* w = it.data(); - if (NearlyEqual(width, w->x(), 1)) - return true; - } - return false; -} - -// Return true if the sizes are more than a -// factor of 2 different. -bool TabFind::DifferentSizes(int size1, int size2) { - return size1 > size2 * 2 || size2 > size1 * 2; -} - -///////////////// PROTECTED functions (used by ColumnFinder). ////////////// - -// Top-level function to find TabVectors in an input page block. -void TabFind::FindTabVectors(int resolution, TabVector_LIST* hlines, - BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, - FCOORD* reskew, FCOORD* rerotate) { - resolution_ = resolution; - *rerotate = FCOORD(1.0f, 0.0f); - FindInitialTabVectors(image_blobs, block); - if (textord_tabfind_vertical_text && TextMostlyVertical()) { - ResetForVerticalText(hlines, image_blobs, block, rerotate); - FindInitialTabVectors(image_blobs, block); - } - TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this); - SortVectors(); - CleanupTabs(); - Deskew(hlines, image_blobs, block, reskew); - ApplyTabConstraints(); - if (textord_tabfind_show_finaltabs) { - ScrollView* tab_win = MakeWindow(640, 50, "FinalTabs"); - if (textord_debug_images) { - tab_win->Image(AlignedBlob::textord_debug_pix().string(), - image_origin_.x(), image_origin_.y()); - } else { - DisplayBoxes(tab_win); - DisplayTabs("FinalTabs", tab_win); - } - tab_win = DisplayTabVectors(tab_win); - } -} - -// Top-level function to not find TabVectors in an input page block, -// but setup for single column mode. -void TabFind::DontFindTabVectors(int resolution, BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* reskew) { - resolution_ = resolution; - InsertBlobList(false, false, false, image_blobs, false, this); - InsertBlobList(true, false, false, &block->blobs, false, this); - ComputeBlobGoodness(); - reskew->set_x(1); - reskew->set_y(0); -} - // This comment documents how this function works. // For its purpose and arguments, see the comment in tabfind.h. // TabVectors are stored sorted by perpendicular distance of middle from @@ -430,6 +378,72 @@ TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing, return best_v; } +// Return true if the given width is close to one of the common +// widths in column_widths_. +bool TabFind::CommonWidth(int width) { + width /= kColumnWidthFactor; + ICOORDELT_IT it(&column_widths_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ICOORDELT* w = it.data(); + if (NearlyEqual(width, w->x(), 1)) + return true; + } + return false; +} + +// Return true if the sizes are more than a +// factor of 2 different. +bool TabFind::DifferentSizes(int size1, int size2) { + return size1 > size2 * 2 || size2 > size1 * 2; +} + +// Return true if the sizes are more than a +// factor of 5 different. +bool TabFind::VeryDifferentSizes(int size1, int size2) { + return size1 > size2 * 5 || size2 > size1 * 5; +} + +///////////////// PROTECTED functions (used by ColumnFinder). ////////////// + +// Top-level function to find TabVectors in an input page block. +// Returns false if the detected skew angle is impossible. +bool TabFind::FindTabVectors(TabVector_LIST* hlines, + BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, + int min_gutter_width, + FCOORD* deskew, FCOORD* reskew) { + FindInitialTabVectors(image_blobs, block, min_gutter_width); + TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this); + SortVectors(); + CleanupTabs(); + if (!Deskew(hlines, image_blobs, block, deskew, reskew)) + return false; // Skew angle is too large. + ApplyTabConstraints(); + if (textord_tabfind_show_finaltabs) { + ScrollView* tab_win = MakeWindow(640, 50, "FinalTabs"); + if (textord_debug_images) { + tab_win->Image(AlignedBlob::textord_debug_pix().string(), + image_origin_.x(), image_origin_.y()); + } else { + DisplayBoxes(tab_win); + DisplayTabs("FinalTabs", tab_win); + } + tab_win = DisplayTabVectors(tab_win); + } + return true; +} + +// Top-level function to not find TabVectors in an input page block, +// but setup for single column mode. +void TabFind::DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, + FCOORD* deskew, FCOORD* reskew) { + InsertBlobList(false, false, false, image_blobs, false, this); + InsertBlobList(true, false, false, &block->blobs, false, this); + deskew->set_x(1.0f); + deskew->set_y(0.0f); + reskew->set_x(1.0f); + reskew->set_y(0.0f); +} + // Helper function to setup search limits for *TabForBox. void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) { int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2); @@ -456,7 +470,8 @@ ScrollView* TabFind::DisplayTabVectors(ScrollView* tab_win) { // First part of FindTabVectors, which may be used twice if the text // is mostly of vertical alignment. void TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block) { + TO_BLOCK* block, + int min_gutter_width) { if (textord_tabfind_show_initialtabs) { ScrollView* line_win = MakeWindow(0, 0, "VerticalLines"); line_win = DisplayTabVectors(line_win); @@ -464,7 +479,7 @@ void TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, // Prepare the grid. InsertBlobList(false, false, false, image_blobs, false, this); InsertBlobList(true, false, false, &block->blobs, false, this); - ScrollView* initial_win = FindTabBoxes(); + ScrollView* initial_win = FindTabBoxes(min_gutter_width); FindAllTabVectors(); if (textord_tabfind_show_initialtabs) initial_win = DisplayTabVectors(initial_win); @@ -473,19 +488,18 @@ void TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, SortVectors(); EvaluateTabs(); ComputeColumnWidths(initial_win); - if (textord_tabfind_vertical_text) - MarkVerticalText(); + MarkVerticalText(); } // For each box in the grid, decide whether it is a candidate tab-stop, // and if so add it to the tab_grid_. -ScrollView* TabFind::FindTabBoxes() { +ScrollView* TabFind::FindTabBoxes(int min_gutter_width) { // For every bbox in the grid, determine whether it uses a tab on an edge. GridSearch gsearch(this); gsearch.StartFullSearch(); BLOBNBOX* bbox; while ((bbox = gsearch.NextFullSearch()) != NULL) { - if (TestBoxForTabs(bbox)) { + if (TestBoxForTabs(bbox, min_gutter_width)) { // If it is any kind of tab, insert it into the tab grid. tab_grid_->InsertBBox(false, false, bbox); } @@ -499,7 +513,7 @@ ScrollView* TabFind::FindTabBoxes() { return tab_win; } -bool TabFind::TestBoxForTabs(BLOBNBOX* bbox) { +bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width) { GridSearch radsearch(this); TBOX box = bbox->bounding_box(); // If there are separator lines, get the column edges. @@ -520,8 +534,16 @@ bool TabFind::TestBoxForTabs(BLOBNBOX* bbox) { // Compute a search radius based on a multiple of the height. int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_; radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius); - int target_right = left_x - height * kAlignedGapFraction; - int target_left = right_x + height * kAlignedGapFraction; + // In Vertical Page mode, once we have an estimate of the vertical line + // spacing, the minimum amount of gutter space before a possible tab is + // increased under the assumption that column partition is always larger + // than line spacing. + int min_spacing = + static_cast(height * textord_tabfind_aligned_gap_fraction); + if (min_gutter_width > min_spacing) + min_spacing = min_gutter_width; + int target_right = left_x - min_spacing; + int target_left = right_x + min_spacing; // We will be evaluating whether the left edge could be a left tab, and // whether the right edge could be a right tab. // A box can be a tab if its bool is_(left/right)_tab remains true, meaning @@ -538,6 +560,16 @@ bool TabFind::TestBoxForTabs(BLOBNBOX* bbox) { int maybe_right_tab_up = 0; int maybe_left_tab_down = 0; int maybe_right_tab_down = 0; + if (bbox->leader_on_left()) { + is_left_tab = false; + maybe_left_tab_up = -MAX_INT32; + maybe_left_tab_down = -MAX_INT32; + } + if (bbox->leader_on_right()) { + is_right_tab = false; + maybe_right_tab_up = -MAX_INT32; + maybe_right_tab_down = -MAX_INT32; + } int alignment_tolerance = static_cast(resolution_ * kAlignedFraction); BLOBNBOX* neighbour = NULL; while ((neighbour = radsearch.NextRadSearch()) != NULL) { @@ -902,53 +934,41 @@ void TabFind::SetBlobRegionType(BLOBNBOX* blob) { // Mark blobs as being in a vertical text line where that is the case. // Returns true if the majority of the image is vertical text lines. void TabFind::MarkVerticalText() { - TabVector_IT it(&vectors_); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - TabVector* v = it.data(); - TabVector* partner = v->VerticalTextlinePartner(); - if (partner != NULL) { - TabVector* left = v->IsLeftTab() ? v : partner; - TabVector* right = v->IsLeftTab() ? partner : v; - // Setup a rectangle search to mark the text as vertical. - TBOX box; - box.set_left(MIN(left->startpt().x(), left->endpt().x())); - box.set_right(MAX(right->startpt().x(), right->endpt().x())); - box.set_bottom(MIN(left->startpt().y(), right->startpt().y())); - box.set_top(MAX(left->endpt().y(), right->endpt().y())); - - GridSearch rsearch(this); - rsearch.StartRectSearch(box); - BLOBNBOX* blob = NULL; - while ((blob = rsearch.NextRectSearch()) != NULL) { - if (blob->region_type() < BRT_UNKNOWN) - continue; - const TBOX& blob_box = blob->bounding_box(); - if ((LeftTabForBox(blob_box, false, false) == left || - LeftTabForBox(blob_box, true, false) == left) && - (RightTabForBox(blob_box, false, false) == right || - RightTabForBox(blob_box, true, false) == right)) { - blob->set_region_type(BRT_VERT_TEXT); - } - } + if (textord_debug_tabfind) + tprintf("Checking for vertical lines\n"); + BlobGridSearch gsearch(this); + gsearch.StartFullSearch(); + BLOBNBOX* blob = NULL; + while ((blob = gsearch.NextFullSearch()) != NULL) { + if (blob->region_type() < BRT_UNKNOWN) + continue; + if (blob->UniquelyVertical()) { + blob->set_region_type(BRT_VERT_TEXT); } } } -// Returns true if the majority of the image is vertical text lines. -bool TabFind::TextMostlyVertical() { - int vertical_boxes = 0; - int horizontal_boxes = 0; - // Count vertical bboxes in the grid. - GridSearch gsearch(this); - gsearch.StartFullSearch(); - BLOBNBOX* bbox; - while ((bbox = gsearch.NextFullSearch()) != NULL) { - if (bbox->region_type() == BRT_VERT_TEXT) - ++vertical_boxes; - else - ++horizontal_boxes; +int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) { + TabVector_IT it(lines); + int prev_right = -1; + int max_gap = static_cast(kMaxGutterWidthAbsolute * resolution_); + STATS gaps(0, max_gap); + STATS heights(0, max_gap); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* v = it.data(); + TabVector* partner = v->GetSinglePartner(); + if (!v->IsLeftTab() || v->IsSeparator() || !partner) continue; + heights.add(partner->startpt().x() - v->startpt().x(), 1); + if (prev_right > 0 && v->startpt().x() > prev_right) { + gaps.add(v->startpt().x() - prev_right, 1); + } + prev_right = partner->startpt().x(); } - return vertical_boxes > horizontal_boxes; + if (textord_debug_tabfind) + tprintf("TabGutter total %d median_gap %.2f median_hgt %.2f\n", + gaps.get_total(), gaps.median(), heights.median()); + if (gaps.get_total() < kMinLinesInColumn) return 0; + return static_cast(gaps.median()); } // If this box looks like it is on a textline in the given direction, @@ -1343,7 +1363,8 @@ void TabFind::CleanupTabs() { } } -static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) { +// Apply the given rotation to the given list of blobs. +void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) { BLOBNBOX_IT it(blobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->rotate_box(rotation); @@ -1351,22 +1372,23 @@ static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) { } // Recreate the grid with deskewed BLOBNBOXes. -void TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* reskew) { - FCOORD deskew; - ComputeDeskewVectors(&deskew, reskew); - RotateBlobList(deskew, image_blobs); - RotateBlobList(deskew, &block->blobs); - RotateBlobList(deskew, &block->small_blobs); - RotateBlobList(deskew, &block->noise_blobs); -#ifdef HAVE_LIBLEPT +// Returns false if the detected skew angle is impossible. +bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, + TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) { + ComputeDeskewVectors(deskew, reskew); + if (deskew->x() < kCosMaxSkewAngle) + return false; + RotateBlobList(*deskew, image_blobs); + RotateBlobList(*deskew, &block->blobs); + RotateBlobList(*deskew, &block->small_blobs); + RotateBlobList(*deskew, &block->noise_blobs); if (textord_debug_images) { // Rotate the debug pix and arrange for it to be drawn at the correct // pixel offset. Pix* pix_grey = pixRead(AlignedBlob::textord_debug_pix().string()); int width = pixGetWidth(pix_grey); int height = pixGetHeight(pix_grey); - float angle = atan2(deskew.y(), deskew.x()); + float angle = atan2(deskew->y(), deskew->x()); // Positive angle is clockwise to pixRotate. Pix* pix_rot = pixRotate(pix_grey, -angle, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, width, height); @@ -1374,7 +1396,7 @@ void TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, // has just been rotated about its center. ICOORD center_offset(width / 2, height / 2); ICOORD new_center_offset(center_offset); - new_center_offset.rotate(deskew); + new_center_offset.rotate(*deskew); image_origin_ += new_center_offset - center_offset; // The image grew as it was rotated, so offset the (top/left) origin // by half the change in size. y is opposite to x because it is drawn @@ -1386,77 +1408,75 @@ void TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, pixDestroy(&pix_grey); pixDestroy(&pix_rot); } -#endif // HAVE_LIBLEPT // Rotate the horizontal vectors. The vertical vectors don't need // rotating as they can just be refitted. TabVector_IT h_it(hlines); for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { TabVector* h = h_it.data(); - h->Rotate(deskew); + h->Rotate(*deskew); + } + TabVector_IT d_it(&dead_vectors_); + for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) { + TabVector* d = d_it.data(); + d->Rotate(*deskew); } SetVerticalSkewAndParellelize(0, 1); // Rebuild the grid to the new size. TBOX grid_box(bleft_, tright_); - grid_box.rotate_large(deskew); + grid_box.rotate_large(*deskew); Init(gridsize(), grid_box.botleft(), grid_box.topright()); + tab_grid_->Init(gridsize(), grid_box.botleft(), grid_box.topright()); InsertBlobList(false, false, false, image_blobs, false, this); InsertBlobList(true, false, false, &block->blobs, false, this); + return true; } -static void ResetBlobList(BLOBNBOX_LIST* blobs) { - BLOBNBOX_IT it(blobs); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX* blob = it.data(); - blob->set_left_tab_type(TT_NONE); - blob->set_right_tab_type(TT_NONE); - blob->set_region_type(BRT_UNKNOWN); - } -} - -// Restart everything and rotate the input blobs ready for vertical text. -void TabFind::ResetForVerticalText(TabVector_LIST* hlines, - BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* rerotate) { - // Rotate anti-clockwise, so vertical CJK text is still in reading order. - FCOORD derotate(0.0f, 1.0f); - *rerotate = FCOORD(0.0f, -1.0f); - RotateBlobList(derotate, image_blobs); - RotateBlobList(derotate, &block->blobs); - RotateBlobList(derotate, &block->small_blobs); - RotateBlobList(derotate, &block->noise_blobs); - ResetBlobList(&block->blobs); - +// Flip the vertical and horizontal lines and rotate the grid ready +// for working on the rotated image. +// This also makes parameter adjustments for FindInitialTabVectors(). +void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate, + TabVector_LIST* horizontal_lines, + int* min_gutter_width) { // Rotate the horizontal and vertical vectors and swap them over. - // Only the separators are kept, and existing tabs are deleted. - // Note that to retain correct relative orientation, vertical and - // horizontal lines must be rotated in opposite directions! + // Only the separators are kept and rotated; other tabs are used + // to estimate the gutter width then thrown away. TabVector_LIST ex_verticals; TabVector_IT ex_v_it(&ex_verticals); + TabVector_LIST vlines; + TabVector_IT v_it(&vlines); while (!v_it_.empty()) { TabVector* v = v_it_.extract(); if (v->IsSeparator()) { - v->Rotate(*rerotate); + v->Rotate(rotate); ex_v_it.add_after_then_move(v); } else { - delete v; + v_it.add_after_then_move(v); } v_it_.forward(); } - TabVector_IT h_it(hlines); + + // Adjust the min gutter width for better tabbox selection + // in 2nd call to FindInitialTabVectors(). + int median_gutter = FindMedianGutterWidth(&vlines); + if (median_gutter > *min_gutter_width) + *min_gutter_width = median_gutter; + + TabVector_IT h_it(horizontal_lines); for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { TabVector* h = h_it.data(); - h->Rotate(derotate); + h->Rotate(rotate); } - v_it_.add_list_after(hlines); + v_it_.add_list_after(horizontal_lines); v_it_.move_to_first(); - h_it.set_to_list(hlines); + h_it.set_to_list(horizontal_lines); h_it.add_list_after(&ex_verticals); // Rebuild the grid to the new size. - TBOX grid_box(bleft_, tright_); - grid_box.rotate_large(derotate); + TBOX grid_box(bleft(), tright()); + grid_box.rotate_large(rotate); Init(gridsize(), grid_box.botleft(), grid_box.topright()); + tab_grid_->Init(gridsize(), grid_box.botleft(), grid_box.topright()); column_widths_.clear(); } @@ -1464,8 +1484,8 @@ void TabFind::ResetForVerticalText(TabVector_LIST* hlines, void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) { double length = vertical_skew_ % vertical_skew_; length = sqrt(length); - deskew->set_x(vertical_skew_.y() / length); - deskew->set_y(vertical_skew_.x() / length); + deskew->set_x(static_cast(vertical_skew_.y() / length)); + deskew->set_y(static_cast(vertical_skew_.x() / length)); reskew->set_x(deskew->x()); reskew->set_y(-deskew->y()); } @@ -1512,4 +1532,3 @@ void TabFind::ApplyTabConstraints() { } } // namespace tesseract. - diff --git a/textord/tabfind.h b/textord/tabfind.h index 57bbe531c2..f235ae401a 100644 --- a/textord/tabfind.h +++ b/textord/tabfind.h @@ -21,10 +21,19 @@ #define TESSERACT_TEXTORD_TABFIND_H__ #include "alignedblob.h" -#include "callback.h" +#include "tesscallback.h" #include "tabvector.h" #include "linefind.h" +extern BOOL_VAR_H(textord_tabfind_force_vertical_text, false, + "Force using vertical text page mode"); +extern BOOL_VAR_H(textord_tabfind_vertical_horizontal_mix, true, + "find horizontal lines such as headers in vertical page mode"); +extern double_VAR_H(textord_tabfind_vertical_text_ratio, 0.5, + "Fraction of textlines deemed vertical to use vertical page mode"); +extern double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75, + "Fraction of height used as a minimum gap for aligned blobs."); + class BLOBNBOX; class BLOBNBOX_LIST; class TO_BLOCK; @@ -33,7 +42,7 @@ struct Pix; namespace tesseract { -typedef ResultCallback1 WidthCallback; +typedef TessResultCallback1 WidthCallback; struct AlignedBlobParams; @@ -52,7 +61,8 @@ const int kColumnWidthFactor = 20; class TabFind : public AlignedBlob { public: TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, - TabVector_LIST* vlines, int vertical_x, int vertical_y); + TabVector_LIST* vlines, int vertical_x, int vertical_y, + int resolution); virtual ~TabFind(); /** @@ -111,9 +121,26 @@ class TabFind : public AlignedBlob { int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended); /** - * Compute the rotation required to deskew, and its inverse rotation. + * Return the TabVector that corresponds to the right edge for the given + * box. If there is a TabVector to the right that vertically overlaps it, + * then return it, otherwise return NULL. Note that Right and Left refer + * to the position of the TabVector, not its type, ie RightTabForBox + * returns the nearest TabVector to the right of the box, regardless of + * its type. + * If a TabVector crosses right through the box (as opposed to grazing one + * edge or missing entirely), then crossing false will ignore such a line. + * Crossing true will return the line for BOTH left and right edges. + * If extended is true, then TabVectors are considered to extend to their + * extended_start/end_y, otherwise, just the startpt_ and endpt_. + * These functions make use of an internal iterator to the vectors_ list + * for speed when used repeatedly on neighbouring boxes. The caveat is + * that the iterator must be updated whenever the list is modified. */ - void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew); + TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended); + /** + * As RightTabForBox, but finds the left TabVector instead. + */ + TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended); /** * Return true if the given width is close to one of the common @@ -125,6 +152,11 @@ class TabFind : public AlignedBlob { * factor of 2 different. */ static bool DifferentSizes(int size1, int size2); + /** + * Return true if the sizes are more than a + * factor of 5 different. + */ + static bool VeryDifferentSizes(int size1, int size2); /** * Return a callback for testing CommonWidth. @@ -144,48 +176,27 @@ class TabFind : public AlignedBlob { /** // Accessors */ - TabVector_LIST* get_vectors() { + TabVector_LIST* vectors() { return &vectors_; } + TabVector_LIST* dead_vectors() { + return &dead_vectors_; + } /** - // Top-level function to find TabVectors in an input page block. + * Top-level function to find TabVectors in an input page block. + * Returns false if the detected skew angle is impossible. */ - void FindTabVectors(int resolution, TabVector_LIST* hlines, + bool FindTabVectors(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, - FCOORD* reskew, FCOORD* rerotate); + int min_gutter_width, + FCOORD* deskew, FCOORD* reskew); - /** // Top-level function to not find TabVectors in an input page block, // but setup for single column mode. - */ - void DontFindTabVectors(int resolution, BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* reskew); - - /** - * Return the TabVector that corresponds to the right edge for the given - * box. If there is a TabVector to the right that vertically overlaps it, - * then return it, otherwise return NULL. Note that Right and Left refer - * to the position of the TabVector, not its type, ie RightTabForBox - * returns the nearest TabVector to the right of the box, regardless of - * its type. - * If a TabVector crosses right through the box (as opposed to grazing one - * edge or missing entirely), then crossing false will ignore such a line. - * Crossing true will return the line for BOTH left and right edges. - * If extended is true, then TabVectors are considered to extend to their - * extended_start/end_y, otherwise, just the startpt_ and endpt_. - * These functions make use of an internal iterator to the vectors_ list - * for speed when used repeatedly on neighbouring boxes. The caveat is - * that the iterator must be updated whenever the list is modified. - */ - TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended); - /** - * As RightTabForBox, but finds the left TabVector instead. - */ - TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended); - /** - * Helper function to setup search limits for *TabForBox. - */ + void DontFindTabVectors(BLOBNBOX_LIST* image_blobs, + TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew); + // Helper function to setup search limits for *TabForBox. void SetupTabSearch(int x, int y, int* min_key, int* max_key); /** @@ -193,18 +204,33 @@ class TabFind : public AlignedBlob { */ ScrollView* DisplayTabVectors(ScrollView* tab_win); - private: // First part of FindTabVectors, which may be used twice if the text - // is mostly of vertical alignment. - void FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block); + // is mostly of vertical alignment. If find_vertical_text flag is + // true, this finds vertical textlines in possibly rotated blob space. + // In other words, when the page has mostly vertical lines and is rotated, + // setting this to true will find horizontal lines on the page. + void FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, + int min_gutter_width); + + // Apply the given rotation to the given list of blobs. + static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs); + + // Flip the vertical and horizontal lines and rotate the grid ready + // for working on the rotated image. + // The min_gutter_width will be adjusted to the median gutter width between + // vertical tabs to set a better threshold for tabboxes in the 2nd pass. + void ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate, + TabVector_LIST* horizontal_lines, + int* min_gutter_width); + private: // For each box in the grid, decide whether it is a candidate tab-stop, // and if so add it to the tab_grid_. - ScrollView* FindTabBoxes(); + ScrollView* FindTabBoxes(int min_gutter_width); // Return true if this box looks like a candidate tab stop, and set // the appropriate tab type(s) to TT_UNCONFIRMED. - bool TestBoxForTabs(BLOBNBOX* bbox); + bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width); // Fills the list of TabVector with the tabstops found in the grid, // and estimates the logical vertical direction. @@ -250,8 +276,10 @@ class TabFind : public AlignedBlob { // Mark blobs as being in a vertical text line where that is the case. void MarkVerticalText(); - // Returns true if the majority of the image is vertical text lines. - bool TextMostlyVertical(); + // Returns the median gutter width between pairs of matching tab vectors + // assuming they are sorted left-to-right. If there are too few data + // points (< kMinLinesInColumn), then 0 is returned. + int FindMedianGutterWidth(TabVector_LIST* tab_vectors); // If this box looks like it is on a textline in the given direction, // return the width of the textline-like group of blobs, and the number @@ -313,15 +341,13 @@ class TabFind : public AlignedBlob { /** * Deskew the tab vectors and blobs, computing the rotation and resetting * the storked vertical_skew_. The deskew inverse is returned in reskew. + * Returns false if the detected skew angle is impossible. */ - void Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* reskew); + bool Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, + TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew); - /** - * Restart everything and rotate the input blobs ready for vertical text. - */ - void ResetForVerticalText(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, - TO_BLOCK* block, FCOORD* rerotate); + // Compute the rotation required to deskew, and its inverse rotation. + void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew); /** * Compute and apply constraints to the end positions of TabVectors so @@ -333,7 +359,7 @@ class TabFind : public AlignedBlob { ICOORD vertical_skew_; //< Estimate of true vertical in this image. int resolution_; //< Of source image in pixels per inch. private: - ICOORD image_origin_; // Top-left of image in deskewed coords + ICOORD image_origin_; //< Top-left of image in deskewed coords TabVector_LIST vectors_; //< List of rule line and tabstops. TabVector_IT v_it_; //< Iterator for searching vectors_. TabVector_LIST dead_vectors_; //< Separators and unpartnered tab vectors. @@ -347,4 +373,3 @@ class TabFind : public AlignedBlob { } // namespace tesseract. #endif // TESSERACT_TEXTORD_TABFIND_H__ - diff --git a/textord/tablefind.cpp b/textord/tablefind.cpp index 346682d788..552249c5a5 100644 --- a/textord/tablefind.cpp +++ b/textord/tablefind.cpp @@ -21,7 +21,7 @@ #pragma warning(disable:4244) // Conversion warnings #endif -#include "colfind.h" +#include "tablefind.h" #include #ifdef HAVE_CONFIG_H #include "config_auto.h" @@ -29,11 +29,35 @@ #ifdef HAVE_LIBLEPT #include "allheaders.h" #endif +#include "colpartitionset.h" +#include "tablerecog.h" namespace tesseract { -// Maximum vertical spacing between neighbor partitions +// These numbers are used to calculate the global median stats. +// They just set an upper bound on the stats objects. +// Maximum vertical spacing between neighbor partitions. const int kMaxVerticalSpacing = 500; +// Maximum width of a blob in a partition. +const int kMaxBlobWidth = 500; + +// Minimum whitespace size to split a partition (measured as a multiple +// of a partition's median width). +const double kSplitPartitionSize = 2.0; +// To insert text, the partition must satisfy these size constraints +// in AllowTextPartition(). The idea is to filter noise partitions +// determined by the size compared to the global medians. +// TODO(nbeato): Need to find good numbers again. +const double kAllowTextHeight = 0.5; +const double kAllowTextWidth = 0.6; +const double kAllowTextArea = 0.8; +// The same thing applies to blobs (to filter noise). +// TODO(nbeato): These numbers are a shot in the dark... +// height and width are 0.5 * gridsize() in colfind.cpp +// area is a rough guess for the size of a period. +const double kAllowBlobHeight = 0.3; +const double kAllowBlobWidth = 0.4; +const double kAllowBlobArea = 0.05; // Minimum number of components in a text partition. A partition having fewer // components than that is more likely a data partition and is a candidate @@ -50,13 +74,17 @@ const double kMaxGapInTextPartition = 4.0; // factor of its median size. const double kMinMaxGapInTextPartition = 0.5; +// The amount of overlap that is "normal" for adjacent blobs in a text +// partition. This is used to calculate gap between overlapping blobs. +const double kMaxBlobOverlapFactor = 4.0; + // Maximum x-height a table partition can have as a multiple of global // median x-height const double kMaxTableCellXheight = 2.0; // Maximum line spacing between a table column header and column contents -// for merging the two -const int kMaxColumnHeaderDistance = 100; +// for merging the two (as a multiple of the partition's median_size). +const int kMaxColumnHeaderDistance = 4; // Minimum ratio of num_table_partitions to num_text_partitions in a column // block to be called it a table column @@ -77,106 +105,197 @@ const int kSideSpaceMargin = 10; // Fraction of the peak of x-projection of a table region to set the // threshold for the x-projection histogram -const double kProjectionThreshold = 0.35; +const double kSmallTableProjectionThreshold = 0.35; +const double kLargeTableProjectionThreshold = 0.45; +// Minimum number of rows required to look for more rows in the projection. +const int kLargeTableRowCount = 6; -// Minmimum number of rows in a table +// Minimum number of rows in a table const int kMinRowsInTable = 3; +// The number of "whitespace blobs" that should appear between the +// ColPartition's bounding box and the column tab stops to the left/right +// when looking for center justified tab stops. +const double kRequiredFullJustifiedSpacing = 4.0; + +// The amount of padding (multiplied by global_median_xheight_ during use) +// that is vertically added to the search adjacent leader search during +// ColPartition marking. +const int kAdjacentLeaderSearchPadding = 2; + +// Used when filtering false positives. When finding the last line +// of a paragraph (typically left-aligned), the previous line should have +// its center to the right of the last line by this scaled amount. +const double kParagraphEndingPreviousLineRatio = 1.3; + +// The maximum amount of whitespace allowed left of a paragraph ending. +// Do not filter a ColPartition with more than this space left of it. +const double kMaxParagraphEndingLeftSpaceMultiple = 3.0; + +// Used when filtering false positives. The last line of a paragraph +// should be preceded by a line that is predominantly text. This is the +// ratio of text to whitespace (to the right of the text) that is required +// for the previous line to be a text. +const double kMinParagraphEndingTextToWhitespaceRatio = 3.0; + +// When counting table columns, this is the required gap between two columns +// (it is multiplied by global_median_xheight_). +const double kMaxXProjectionGapFactor = 2.0; + +// Used for similarity in partitions using stroke width. Values copied +// from ColFind.cpp in Ray's CL. +const double kStrokeWidthFractionalTolerance = 0.25; +const double kStrokeWidthConstantTolerance = 2.0; + BOOL_VAR(textord_dump_table_images, false, "Paint table detection output"); BOOL_VAR(textord_show_tables, false, "Show table regions"); +BOOL_VAR(textord_tablefind_show_mark, false, + "Debug table marking steps in detail"); +BOOL_VAR(textord_tablefind_show_stats, false, + "Show page stats used in table finding"); +BOOL_VAR(textord_tablefind_recognize_tables, false, + "Enables the table recognizer for table layout and filtering."); ELISTIZE(ColSegment) CLISTIZE(ColSegment) -// Copy cleaned partitions from part_grid_ to clean_part_grid_ and -// insert dot-like noise into period_grid_ -void ColumnFinder::GetCleanPartitions(TO_BLOCK* block) { - double min_dim = block->line_size/3.0; +// Templated helper function used to create destructor callbacks for the +// BBGrid::ClearGridData() method. +template void DeleteObject(T *object) { + delete object; +} + +TableFinder::TableFinder() + : resolution_(0), + global_median_xheight_(0), + global_median_blob_width_(0), + global_median_ledding_(0), + left_to_right_language_(true) { +} + +TableFinder::~TableFinder() { + // ColPartitions and ColSegments created by this class for storage in grids + // need to be deleted explicitly. + clean_part_grid_.ClearGridData(&DeleteObject); + leader_and_ruling_grid_.ClearGridData(&DeleteObject); + fragmented_text_grid_.ClearGridData(&DeleteObject); + col_seg_grid_.ClearGridData(&DeleteObject); + table_grid_.ClearGridData(&DeleteObject); +} + +void TableFinder::set_left_to_right_language(bool order) { + left_to_right_language_ = order; +} + +void TableFinder::Init(int grid_size, const ICOORD& bottom_left, + const ICOORD& top_right) { // Initialize clean partitions list and grid - clean_part_grid_.Init(gridsize(), bleft(), tright()); - period_grid_.Init(gridsize(), bleft(), tright()); + clean_part_grid_.Init(grid_size, bottom_left, top_right); + leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right); + fragmented_text_grid_.Init(grid_size, bottom_left, top_right); + col_seg_grid_.Init(grid_size, bottom_left, top_right); + table_grid_.Init(grid_size, bottom_left, top_right); +} + +// Copy cleaned partitions from part_grid_ to clean_part_grid_ and +// insert leaders and rulers into the leader_and_ruling_grid_ +void TableFinder::InsertCleanPartitions(ColPartitionGrid* grid, + TO_BLOCK* block) { + // Calculate stats. This lets us filter partitions in AllowTextPartition() + // and filter blobs in AllowBlob(). + SetGlobalSpacings(grid); + // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&part_grid_); + ColPartitionGridSearch gsearch(grid); + gsearch.SetUniqueMode(true); gsearch.StartFullSearch(); - ColPartition* part; + ColPartition* part = NULL; while ((part = gsearch.NextFullSearch()) != NULL) { + // Reject partitions with nothing useful inside of them. + if (part->blob_type() == BRT_NOISE || part->bounding_box().area() <= 0) + continue; ColPartition* clean_part = part->ShallowCopy(); + ColPartition* leader_part = NULL; + if (part->IsLineType()) { + InsertRulingPartition(clean_part); + continue; + } // Insert all non-text partitions to clean_parts if (!part->IsTextType()) { - clean_part_grid_.InsertBBox(true, true, clean_part); + InsertImagePartition(clean_part); continue; } // Insert text colpartitions after removing noisy components from them + // The leaders are split into a separate grid. BLOBNBOX_CLIST* part_boxes = part->boxes(); BLOBNBOX_C_IT pit(part_boxes); for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) { BLOBNBOX *pblob = pit.data(); - if (!pblob->noise_flag()) { - clean_part->AddBox(pblob); - } else { - TBOX blob_box = pblob->bounding_box(); - if (blob_box.height() < min_dim && blob_box.width() < 2*min_dim) { - period_grid_.InsertBBox(false, false, pblob); + // Bad blobs... happens in UNLV set. + // news.3G1, page 17 (around x=6) + if (!AllowBlob(*pblob)) + continue; + if (pblob->flow() == BTFT_LEADER) { + if (leader_part == NULL) { + leader_part = part->ShallowCopy(); + leader_part->set_flow(BTFT_LEADER); } + leader_part->AddBox(pblob); + } else if (pblob->region_type() != BRT_NOISE) { + clean_part->AddBox(pblob); } } - if (!clean_part->IsEmpty()) - clean_part_grid_.InsertBBox(true, true, clean_part); - else - delete clean_part; - } - -// TODO(rays) This is the previous period blob code. Neither is completely -// satisfactory, as a more disciplined approach to noise removal would be -// better, so revisit this choice and decide what to keep when the earlier -// stages do a better job of noise removal. -#if 0 - BLOBNBOX_IT sit(&block->small_blobs); - BLOBNBOX_IT nit(&block->noise_blobs); - BLOBNBOX_IT it(&period_blobs_); - // Insert dot sized boxes from small_blobs into period_blobs_ - for (sit.mark_cycle_pt(); !sit.cycled_list(); sit.forward()) { - BLOBNBOX * blob = sit.data(); - TBOX blob_box = blob->bounding_box(); - if (blob_box.height() < min_dim && blob_box.width() < 2*min_dim) { - it.add_after_then_move(sit.extract()); - } - } - // Insert dot sized boxes from noise_blobs into period_blobs_ - for (nit.mark_cycle_pt(); !nit.cycled_list(); nit.forward()) { - BLOBNBOX * blob = nit.data(); - TBOX blob_box = blob->bounding_box(); - if (blob_box.height() < min_dim && blob_box.width() < 2*min_dim) { - it.add_after_then_move(nit.extract()); + clean_part->ComputeLimits(); + ColPartition* fragmented = clean_part->CopyButDontOwnBlobs(); + InsertTextPartition(clean_part); + SplitAndInsertFragmentedTextPartition(fragmented); + if (leader_part != NULL) { + // TODO(nbeato): Note that ComputeLimits does not update the column + // information. So the leader may appear to span more columns than it + // really does later on when IsInSameColumnAs gets called to test + // for adjacent leaders. + leader_part->ComputeLimits(); + InsertLeaderPartition(leader_part); } } - InsertBlobList(false, false, false, &period_blobs_, false, &period_grid_); -#endif + + // Make the partition partners better for upper and lower neighbors. + clean_part_grid_.FindPartitionPartners(); + clean_part_grid_.RefinePartitionPartners(false); } // High level function to perform table detection -void ColumnFinder::LocateTables() { +void TableFinder::LocateTables(ColPartitionGrid* grid, + ColPartitionSet** all_columns, + WidthCallback* width_cb, + const FCOORD& reskew) { + // initialize spacing, neighbors, and columns + InitializePartitions(all_columns); + if (textord_show_tables) { + ScrollView* table_win = MakeWindow(0, 300, "Column Partitions & Neighbors"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + DisplayColPartitionConnections(table_win, &clean_part_grid_, + ScrollView::ORANGE); + + table_win = MakeWindow(100, 300, "Fragmented Text"); + DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE); + } + // mark, filter, and smooth candidate table partitions + MarkTablePartitions(); + // Make single-column blocks from good_columns_ partitions. col_segments are // moved to a grid later which takes the ownership ColSegment_LIST column_blocks; - GetColumnBlocks(&column_blocks); - - SetPartitionSpacings(); - - // Mark ColPartitions as being candidate table partition depending on - // the inter-word spacing - GridMarkTablePartitions(); - FilterFalseAlarms(); - SmoothTablePartitionRuns(); - + GetColumnBlocks(all_columns, &column_blocks); // Set the ratio of candidate table partitions in each column SetColumnsType(&column_blocks); // Move column segments to col_seg_grid_ MoveColSegmentsToGrid(&column_blocks, &col_seg_grid_); - // Detect split in column layout that might have occured due to the + // Detect split in column layout that might have occurred due to the // presence of a table. In such a case, merge the corresponding columns. GridMergeColumnBlocks(); @@ -191,6 +310,12 @@ void ColumnFinder::LocateTables() { ColSegment_LIST table_regions; GetTableRegions(&table_columns, &table_regions); + if (textord_tablefind_show_mark) { + ScrollView* table_win = MakeWindow(1200, 300, "Table Columns and Regions"); + DisplayColSegments(table_win, &table_columns, ScrollView::DARK_TURQUOISE); + DisplayColSegments(table_win, &table_regions, ScrollView::YELLOW); + } + // Merge table regions across columns for tables spanning multiple // columns MoveColSegmentsToGrid(&table_regions, &table_grid_); @@ -201,33 +326,208 @@ void ColumnFinder::LocateTables() { AdjustTableBoundaries(); GridMergeTableRegions(); - // Remove false alarms consiting of a single column - DeleteSingleColumnTables(); + if (textord_tablefind_recognize_tables) { + // Remove false alarms consiting of a single column + DeleteSingleColumnTables(); - if (textord_show_tables) { - ScrollView* table_win = MakeWindow(1500, 300, "Detected Tables"); - DisplayColPartitions(table_win, ScrollView::BLUE); - DisplayColSegments(&table_columns, table_win, ScrollView::GREEN); - table_grid_.DisplayBoxes(table_win); + if (textord_show_tables) { + ScrollView* table_win = MakeWindow(1200, 300, "Detected Table Locations"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColSegments(table_win, &table_columns, ScrollView::KHAKI); + table_grid_.DisplayBoxes(table_win); + } + + // Find table grid structure and reject tables that are malformed. + RecognizeTables(); + GridMergeTableRegions(); + RecognizeTables(); + + if (textord_show_tables) { + ScrollView* table_win = MakeWindow(1400, 600, "Recognized Tables"); + DisplayColPartitions(table_win, &clean_part_grid_, + ScrollView::BLUE, ScrollView::BLUE); + table_grid_.DisplayBoxes(table_win); + } + } else { + // Remove false alarms consiting of a single column + // TODO(nbeato): verify this is a NOP after structured table rejection. + // Right now it isn't. If the recognize function is doing what it is + // supposed to do, this function is obsolete. + DeleteSingleColumnTables(); + + if (textord_show_tables) { + ScrollView* table_win = MakeWindow(1500, 300, "Detected Tables"); + DisplayColPartitions(table_win, &clean_part_grid_, + ScrollView::BLUE, ScrollView::BLUE); + table_grid_.DisplayBoxes(table_win); + } } if (textord_dump_table_images) - WriteToPix(); + WriteToPix(reskew); // Merge all colpartitions in table regions to make them a single // colpartition and revert types of isolated table cells not // assigned to any table to their original types. - MakeTableBlocks(); + MakeTableBlocks(grid, all_columns, width_cb); +} +// All grids have the same dimensions. The clean_part_grid_ sizes are set from +// the part_grid_ that is passed to InsertCleanPartitions, which was the same as +// the grid that is the base of ColumnFinder. Just return the clean_part_grid_ +// dimensions instead of duplicated memory. +int TableFinder::gridsize() const { + return clean_part_grid_.gridsize(); +} +int TableFinder::gridwidth() const { + return clean_part_grid_.gridwidth(); +} +int TableFinder::gridheight() const { + return clean_part_grid_.gridheight(); +} +const ICOORD& TableFinder::bleft() const { + return clean_part_grid_.bleft(); +} +const ICOORD& TableFinder::tright() const { + return clean_part_grid_.tright(); +} + +void TableFinder::InsertTextPartition(ColPartition* part) { + ASSERT_HOST(part != NULL); + if (AllowTextPartition(*part)) { + clean_part_grid_.InsertBBox(true, true, part); + } else { + delete part; + } +} +void TableFinder::InsertFragmentedTextPartition(ColPartition* part) { + ASSERT_HOST(part != NULL); + if (AllowTextPartition(*part)) { + fragmented_text_grid_.InsertBBox(true, true, part); + } else { + delete part; + } +} +void TableFinder::InsertLeaderPartition(ColPartition* part) { + ASSERT_HOST(part != NULL); + if (!part->IsEmpty() && part->bounding_box().area() > 0) { + leader_and_ruling_grid_.InsertBBox(true, true, part); + } else { + delete part; + } +} +void TableFinder::InsertRulingPartition(ColPartition* part) { + leader_and_ruling_grid_.InsertBBox(true, true, part); +} +void TableFinder::InsertImagePartition(ColPartition* part) { + // NOTE: If images are placed into a different grid in the future, + // the function SetPartitionSpacings needs to be updated. It should + // be the only thing that cares about image partitions. + clean_part_grid_.InsertBBox(true, true, part); +} + +// Splits a partition into its "words". The splits happen +// at locations with wide inter-blob spacing. This is useful +// because it allows the table recognize to "cut through" the +// text lines on the page. The assumption is that a table +// will have several lines with similar overlapping whitespace +// whereas text will not have this type of property. +// Note: The code Assumes that blobs are sorted by the left side x! +// This will not work (as well) if the blobs are sorted by center/right. +void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition* part) { + ASSERT_HOST(part != NULL); + // Bye bye empty partitions! + if (part->boxes()->empty()) { + delete part; + return; + } + + // The AllowBlob function prevents this. + ASSERT_HOST(part->median_width() > 0); + const double kThreshold = part->median_width() * kSplitPartitionSize; + + ColPartition* right_part = part; + bool found_split = true; + while (found_split) { + found_split = false; + BLOBNBOX_C_IT box_it(right_part->boxes()); + // Blobs are sorted left side first. If blobs overlap, + // the previous blob may have a "more right" right side. + // Account for this by always keeping the largest "right" + // so far. + int previous_right = MIN_INT32; + + // Look for the next split in the partition. + for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { + const TBOX& box = box_it.data()->bounding_box(); + if (previous_right != MIN_INT32 && + box.left() - previous_right > kThreshold) { + // We have a split position. Split the partition in two pieces. + // Insert the left piece in the grid and keep processing the right. + int mid_x = (box.left() + previous_right) / 2; + ColPartition* left_part = right_part; + right_part = left_part->SplitAt(mid_x); + + InsertFragmentedTextPartition(left_part); + found_split = true; + break; + } + + // The right side of the previous blobs. + previous_right = MAX(previous_right, box.right()); + } + } + // When a split is not found, the right part is minimized + // as much as possible, so process it. + InsertFragmentedTextPartition(right_part); +} + +// Some simple criteria to filter out now. We want to make sure the +// average blob size in the partition is consistent with the +// global page stats. +// The area metric will almost always pass for multi-blob partitions. +// It is useful when filtering out noise caused by an isolated blob. +bool TableFinder::AllowTextPartition(const ColPartition& part) const { + const double kHeightRequired = global_median_xheight_ * kAllowTextHeight; + const double kWidthRequired = global_median_blob_width_ * kAllowTextWidth; + const int median_area = global_median_xheight_ * global_median_blob_width_; + const double kAreaPerBlobRequired = median_area * kAllowTextArea; + // Keep comparisons strictly greater to disallow 0! + return part.median_size() > kHeightRequired && + part.median_width() > kWidthRequired && + part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count(); +} + +// Same as above, applied to blobs. Keep in mind that +// leaders, commas, and periods are important in tables. +bool TableFinder::AllowBlob(const BLOBNBOX& blob) const { + const TBOX& box = blob.bounding_box(); + const double kHeightRequired = global_median_xheight_ * kAllowBlobHeight; + const double kWidthRequired = global_median_blob_width_ * kAllowBlobWidth; + const int median_area = global_median_xheight_ * global_median_blob_width_; + const double kAreaRequired = median_area * kAllowBlobArea; + // Keep comparisons strictly greater to disallow 0! + return box.height() > kHeightRequired && + box.width() > kWidthRequired && + box.area() > kAreaRequired; +} + +// TODO(nbeato): The grid that makes the window doesn't seem to matter. +// The only downside is that window messages will be caught by +// clean_part_grid_ instead of a useful object. This is a temporary solution +// for the debug windows created by the TableFinder. +ScrollView* TableFinder::MakeWindow(int x, int y, const char* window_name) { + return clean_part_grid_.MakeWindow(x, y, window_name); } // Make single-column blocks from good_columns_ partitions. -void ColumnFinder::GetColumnBlocks(ColSegment_LIST* column_blocks) { - for (int i = 0; i < gridheight_; ++i) { - ColPartitionSet* columns = best_columns_[i]; +void TableFinder::GetColumnBlocks(ColPartitionSet** all_columns, + ColSegment_LIST* column_blocks) { + for (int i = 0; i < gridheight(); ++i) { + ColPartitionSet* columns = all_columns[i]; if (columns != NULL) { ColSegment_LIST new_blocks; // Get boxes from the current vertical position on the grid - columns->GetColumnBoxes(i*gridsize_, (i + 1) * gridsize_, &new_blocks); + columns->GetColumnBoxes(i * gridsize(), (i+1) * gridsize(), &new_blocks); // Merge the new_blocks boxes into column_blocks if they are well-aligned GroupColumnBlocks(&new_blocks, column_blocks); } @@ -235,8 +535,8 @@ void ColumnFinder::GetColumnBlocks(ColSegment_LIST* column_blocks) { } // Merge column segments into the current list if they are well aligned. -void ColumnFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks, - ColSegment_LIST* column_blocks) { +void TableFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks, + ColSegment_LIST* column_blocks) { ColSegment_IT src_it(new_blocks); ColSegment_IT dest_it(column_blocks); // iterate through the source list @@ -265,7 +565,7 @@ void ColumnFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks, } // are the two boxes immediate neighbors along the vertical direction -bool ColumnFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) { +bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) { int x_margin = 20; int y_margin = 5; return (abs(b1.left() - b2.left()) < x_margin) && @@ -274,15 +574,23 @@ bool ColumnFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) { abs(b2.top()-b1.bottom()) < y_margin); } +// Set up info for clean_part_grid_ partitions to be valid during detection +// code. +void TableFinder::InitializePartitions(ColPartitionSet** all_columns) { + FindNeighbors(); + SetPartitionSpacings(&clean_part_grid_, all_columns); + SetGlobalSpacings(&clean_part_grid_); +} + // Set left, right and top, bottom spacings of each colpartition. -void ColumnFinder::SetPartitionSpacings() { +void TableFinder::SetPartitionSpacings(ColPartitionGrid* grid, + ColPartitionSet** all_columns) { // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&clean_part_grid_); + ColPartitionGridSearch gsearch(grid); gsearch.StartFullSearch(); - ColPartition* part; + ColPartition* part = NULL; while ((part = gsearch.NextFullSearch()) != NULL) { - ColPartitionSet* columns = best_columns_[gsearch.GridY()]; + ColPartitionSet* columns = all_columns[gsearch.GridY()]; TBOX box = part->bounding_box(); int y = part->MidY(); ColPartition* left_column = columns->ColumnContaining(box.left(), y); @@ -297,13 +605,65 @@ void ColumnFinder::SetPartitionSpacings() { int right_space = MAX(0, right_column->RightAtY(y) - box.right()); part->set_space_to_right(right_space); } - SetVerticalSpacing(part); + + // Look for images that may be closer. + // NOTE: used to be part_grid_, might cause issues now + ColPartitionGridSearch hsearch(grid); + hsearch.StartSideSearch(box.left(), box.bottom(), box.top()); + ColPartition* neighbor = NULL; + while ((neighbor = hsearch.NextSideSearch(true)) != NULL) { + if (neighbor->type() == PT_PULLOUT_IMAGE || + neighbor->type() == PT_FLOWING_IMAGE || + neighbor->type() == PT_HEADING_IMAGE) { + int right = neighbor->bounding_box().right(); + if (right < box.left()) { + int space = MIN(box.left() - right, part->space_to_left()); + part->set_space_to_left(space); + } + } + } + hsearch.StartSideSearch(box.left(), box.bottom(), box.top()); + neighbor = NULL; + while ((neighbor = hsearch.NextSideSearch(false)) != NULL) { + if (neighbor->type() == PT_PULLOUT_IMAGE || + neighbor->type() == PT_FLOWING_IMAGE || + neighbor->type() == PT_HEADING_IMAGE) { + int left = neighbor->bounding_box().left(); + if (left > box.right()) { + int space = MIN(left - box.right(), part->space_to_right()); + part->set_space_to_right(space); + } + } + } + + ColPartition* upper_part = part->SingletonPartner(true); + if (upper_part) { + int space = MAX(0, upper_part->bounding_box().bottom() - + part->bounding_box().bottom()); + part->set_space_above(space); + } else { + // TODO(nbeato): What constitutes a good value? + // 0 is the default value when not set, explicitly noting it needs to + // be something else. + part->set_space_above(MAX_INT32); + } + + ColPartition* lower_part = part->SingletonPartner(false); + if (lower_part) { + int space = MAX(0, part->bounding_box().bottom() - + lower_part->bounding_box().bottom()); + part->set_space_below(space); + } else { + // TODO(nbeato): What constitutes a good value? + // 0 is the default value when not set, explicitly noting it needs to + // be something else. + part->set_space_below(MAX_INT32); + } } - SetGlobalSpacings(); } // Set spacing and closest neighbors above and below a given colpartition. -void ColumnFinder::SetVerticalSpacing(ColPartition* part) { +void TableFinder::SetVerticalSpacing(ColPartition* part) { TBOX box = part->bounding_box(); int top_range = MIN(box.top() + kMaxVerticalSpacing, tright().y()); int bottom_range = MAX(box.bottom() - kMaxVerticalSpacing, bleft().y()); @@ -346,44 +706,128 @@ void ColumnFinder::SetVerticalSpacing(ColPartition* part) { } // Set global spacing and x-height estimates -void ColumnFinder::SetGlobalSpacings() { +void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) { STATS xheight_stats(0, kMaxVerticalSpacing + 1); + STATS width_stats(0, kMaxBlobWidth + 1); STATS ledding_stats(0, kMaxVerticalSpacing + 1); // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&clean_part_grid_); + ColPartitionGridSearch gsearch(grid); + gsearch.SetUniqueMode(true); gsearch.StartFullSearch(); - ColPartition* part; + ColPartition* part = NULL; while ((part = gsearch.NextFullSearch()) != NULL) { + // TODO(nbeato): HACK HACK HACK! medians are equal to partition length. + // ComputeLimits needs to get called somewhere outside of TableFinder + // to make sure the partitions are properly initialized. + // When this is called, SmoothPartitionPartners dies in an assert after + // table find runs. Alternative solution. + // part->ComputeLimits(); if (part->IsTextType()) { - xheight_stats.add(part->median_size(), 1); + // xheight_stats.add(part->median_size(), part->boxes_count()); + // width_stats.add(part->median_width(), part->boxes_count()); + + // This loop can be removed when above issues are fixed. + // Replace it with the 2 lines commented out above. + BLOBNBOX_C_IT it(part->boxes()); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + xheight_stats.add(it.data()->bounding_box().height(), 1); + width_stats.add(it.data()->bounding_box().width(), 1); + } + ledding_stats.add(part->space_above(), 1); ledding_stats.add(part->space_below(), 1); } } // Set estimates based on median of statistics obtained - global_median_xheight_ = static_cast(xheight_stats.median() + 0.5); - global_median_ledding_ = static_cast(ledding_stats.median() + 0.5); - if (textord_show_tables) { - ScrollView* stats_win = MakeWindow(500, 10, - "X-height and ledding histograms"); - xheight_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN); - ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::RED); + set_global_median_xheight(static_cast(xheight_stats.median() + 0.5)); + set_global_median_blob_width(static_cast(width_stats.median() + 0.5)); + set_global_median_ledding(static_cast(ledding_stats.median() + 0.5)); + if (textord_tablefind_show_stats) { + const char* kWindowName = "X-height (R), X-width (G), and ledding (B)"; + ScrollView* stats_win = MakeWindow(500, 10, kWindowName); + xheight_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::RED); + width_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN); + ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::BLUE); + } +} + +void TableFinder::set_global_median_xheight(int xheight) { + global_median_xheight_ = xheight; +} +void TableFinder::set_global_median_blob_width(int width) { + global_median_blob_width_ = width; +} +void TableFinder::set_global_median_ledding(int ledding) { + global_median_ledding_ = ledding; +} + +void TableFinder::FindNeighbors() { + ColPartitionGridSearch gsearch(&clean_part_grid_); + gsearch.StartFullSearch(); + ColPartition* part = NULL; + while ((part = gsearch.NextFullSearch()) != NULL) { + // TODO(nbeato): Rename this function, meaning is different now. + // IT is finding nearest neighbors its own way + //SetVerticalSpacing(part); + + ColPartition* upper = part->SingletonPartner(true); + if (upper) + part->set_nearest_neighbor_above(upper); + + ColPartition* lower = part->SingletonPartner(false); + if (lower) + part->set_nearest_neighbor_below(lower); } } -// Three types of partitions are maked as table partitions: +// High level interface. Input is an unmarked ColPartitionGrid +// (namely, clean_part_grid_). Partitions are identified using local +// information and filter/smoothed. The function exit should contain +// a good sampling of the table partitions. +void TableFinder::MarkTablePartitions() { + MarkPartitionsUsingLocalInformation(); + if (textord_tablefind_show_mark) { + ScrollView* table_win = MakeWindow(300, 300, "Initial Table Partitions"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + } + FilterFalseAlarms(); + if (textord_tablefind_show_mark) { + ScrollView* table_win = MakeWindow(600, 300, "Filtered Table Partitions"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + } + SmoothTablePartitionRuns(); + if (textord_tablefind_show_mark) { + ScrollView* table_win = MakeWindow(900, 300, "Smoothed Table Partitions"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + } + FilterFalseAlarms(); + if (textord_tablefind_show_mark || textord_show_tables) { + ScrollView* table_win = MakeWindow(900, 300, "Final Table Partitions"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + } +} + +// These types of partitions are marked as table partitions: // 1- Partitions that have at lease one large gap between words // 2- Partitions that consist of only one word (no significant gap // between components) // 3- Partitions that vertically overlap with other partitions within the // same column. -void ColumnFinder::GridMarkTablePartitions() { +// 4- Partitions with leaders before/after them. +void TableFinder::MarkPartitionsUsingLocalInformation() { // Iterate the ColPartitions in the grid. GridSearch gsearch(&clean_part_grid_); gsearch.StartFullSearch(); - ColPartition* part; + ColPartition* part = NULL; while ((part = gsearch.NextFullSearch()) != NULL) { if (!part->IsTextType()) // Only consider text partitions continue; @@ -393,66 +837,78 @@ void ColumnFinder::GridMarkTablePartitions() { // Mark partitions with a large gap, or no significant gap as // table partitions. // Comments: It produces several false alarms at: - // - last line of a paragraph + // - last line of a paragraph (fixed) // - single word section headings // - page headers and footers // - numbered equations // - line drawing regions // TODO(faisal): detect and fix above-mentioned cases - if (HasWideOrNoInterWordGap(part)) { + if (HasWideOrNoInterWordGap(part) || + HasLeaderAdjacent(*part)) { part->set_table_type(); } } } -// Check if the partition has at lease one large gap between words or no +// Check if the partition has at least one large gap between words or no // significant gap at all -bool ColumnFinder::HasWideOrNoInterWordGap(ColPartition* part) { +bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { + // Should only get text partitions. + ASSERT_HOST(part->IsTextType()); + // Blob access BLOBNBOX_CLIST* part_boxes = part->boxes(); - BLOBNBOX_C_IT pit(part_boxes); - + BLOBNBOX_C_IT it(part_boxes); + // Check if this is a relatively small partition (such as a single word) if (part->bounding_box().width() < kMinBoxesInTextPartition * part->median_size() && - pit.length() < kMinBoxesInTextPartition) + part_boxes->length() < kMinBoxesInTextPartition) return true; - // Make a copy of the components in the current partition and insert periods - // into it to compute gaps while taking periods into account. - BLOBNBOX_CLIST boxes; - BLOBNBOX_C_IT it(&boxes); - for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) { - BLOBNBOX *pblob = pit.data(); - it.add_after_then_move(pblob); - } - // Start rectangular search to find periods in this partition - GridSearch rectsearch(&period_grid_); - TBOX part_box = part->bounding_box(); - rectsearch.StartRectSearch(part_box); - BLOBNBOX* period; - while ((period = rectsearch.NextRectSearch()) != NULL) { - // Insert a period only if it lies in a gap between two consecutive boxes - if (LiesInGap(period, &boxes)) - boxes.add_sorted(SortByBoxLeft, true, period); - } - - int current_x0; - int current_x1; + // Variables used to compute inter-blob spacing. + int current_x0 = -1; + int current_x1 = -1; int previous_x1 = -1; - int max_partition_gap = -1; - double max_gap = kMaxGapInTextPartition * part->median_size(); - double min_gap = kMinMaxGapInTextPartition * part->median_size(); + // Stores the maximum gap detected. + int largest_partition_gap_found = -1; + // Text partition gap limits. If this is text (and not a table), + // there should be at least one gap larger than min_gap and no gap + // larger than max_gap. + const double max_gap = kMaxGapInTextPartition * part->median_size(); + const double min_gap = kMinMaxGapInTextPartition * part->median_size(); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - BLOBNBOX *blob = it.data(); + BLOBNBOX* blob = it.data(); current_x0 = blob->bounding_box().left(); current_x1 = blob->bounding_box().right(); if (previous_x1 != -1) { int gap = current_x0 - previous_x1; + + // TODO(nbeato): Boxes may overlap? Huh? + // For example, mag.3B 8003_033.3B.tif in UNLV data. The titles/authors + // on the top right of the page are filtered out with this line. + // Note 2: Iterating over blobs in a partition, so we are looking for + // spacing between the words. + if (gap < 0) { + // More likely case, the blobs slightly overlap. This can happen + // with diacritics (accents) or broken alphabet symbols (characters). + // Merge boxes together by taking max of right sides. + if (-gap < part->median_size() * kMaxBlobOverlapFactor) { + previous_x1 = MAX(previous_x1, current_x1); + continue; + } + // Extreme case, blobs overlap significantly in the same partition... + // This should not happen often (if at all), but it does. + // TODO(nbeato): investigate cases when this happens. + else { + // The behavior before was to completely ignore this case. + } + } + // If a large enough gap is found, mark it as a table cell (return true) if (gap > max_gap) return true; - if (gap > max_partition_gap) - max_partition_gap = gap; + if (gap > largest_partition_gap_found) + largest_partition_gap_found = gap; } previous_x1 = current_x1; } @@ -460,43 +916,85 @@ bool ColumnFinder::HasWideOrNoInterWordGap(ColPartition* part) { // long to be a data cell if (part->bounding_box().width() > kMaxBoxesInDataPartition * part->median_size() || - pit.length() > kMaxBoxesInDataPartition) + part_boxes->length() > kMaxBoxesInDataPartition) return false; + // A partition may be a single blob. In this case, it's an isolated symbol + // or non-text (such as a ruling or image). + // Detect these as table partitions? Shouldn't this be case by case? + // The behavior before was to ignore this, making max_partition_gap < 0 + // and implicitly return true. Just making it explicit. + if (largest_partition_gap_found == -1) + return true; + // return true if the maximum gap found is smaller than the minimum allowed - // max_gap in a text partition - return (max_partition_gap < min_gap); -} - -// Check if the period lies in a gap between consecutive boxes -bool ColumnFinder::LiesInGap(BLOBNBOX* period, BLOBNBOX_CLIST* boxes) { - BLOBNBOX_C_IT it(boxes); - TBOX period_box = period->bounding_box(); - int num_boxes = it.length(); - // skip the first element since it has no gap to its left. - it.forward(); - for (int i = 1; i < num_boxes; i++) { - TBOX box = it.data()->bounding_box(); - TBOX previous_blob = it.data_relative(-1)->bounding_box(); - TBOX gap_box = TBOX(previous_blob.botright(), box.topleft()); - if (gap_box.major_overlap(period_box)) { + // max_gap in a text partition. This indicates that there is no signficant + // space in the partition, hence it is likely a single word. + return largest_partition_gap_found < min_gap; +} + +// A criteria for possible tables is that a table may have leaders +// between data cells. An aggressive solution to find such tables is to +// explicitly mark partitions that have adjacent leaders. +// Note that this includes overlapping leaders. However, it does not +// include leaders in different columns on the page. +// Possible false-positive will include lists, such as a table of contents. +// As these arise, the agressive nature of this search may need to be +// trimmed down. +bool TableFinder::HasLeaderAdjacent(const ColPartition& part) { + if (part.flow() == BTFT_LEADER) + return true; + // Search range is left and right bounded by an offset of the + // median xheight. This offset is to allow some tolerance to the + // the leaders on the page in the event that the alignment is still + // a bit off. + const TBOX& box = part.bounding_box(); + const int search_size = kAdjacentLeaderSearchPadding * global_median_xheight_; + const int top = box.top() + search_size; + const int bottom = box.bottom() - search_size; + ColPartitionGridSearch hsearch(&leader_and_ruling_grid_); + for (int direction = 0; direction < 2; ++direction) { + bool right_to_left = (direction == 0); + int x = right_to_left ? box.right() : box.left(); + hsearch.StartSideSearch(x, bottom, top); + ColPartition* leader = NULL; + while ((leader = hsearch.NextSideSearch(right_to_left)) != NULL) { + // This should not happen, they are in different grids. + ASSERT_HOST(&part != leader); + // The leader could be a horizontal ruling in the grid. + // Make sure it is actually a leader. + if (leader->flow() != BTFT_LEADER) + continue; + // Make sure the leader shares a page column with the partition, + // otherwise we are spreading across columns. + if (!part.IsInSameColumnAs(*leader)) + break; + // There should be a significant vertical overlap + if (!leader->VOverlaps(part)) + continue; + // Leader passed all tests, so it is adjacent. return true; } - it.forward(); } + // No leaders are adjacent to the given partition. return false; } // Filter individual text partitions marked as table partitions // consisting of paragraph endings, small section headings, and // headers and footers. -void ColumnFinder::FilterFalseAlarms() { +void TableFinder::FilterFalseAlarms() { + FilterParagraphEndings(); + FilterHeaderAndFooter(); + // TODO(nbeato): Fully justified text as non-table? +} + +void TableFinder::FilterParagraphEndings() { // Detect last line of paragraph // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&clean_part_grid_); + ColPartitionGridSearch gsearch(&clean_part_grid_); gsearch.StartFullSearch(); - ColPartition* part; + ColPartition* part = NULL; while ((part = gsearch.NextFullSearch()) != NULL) { if (part->type() != PT_TABLE) continue; // Consider only table partitions @@ -510,21 +1008,76 @@ void ColumnFinder::FilterFalseAlarms() { if (upper_part->bounding_box().width() < 2 * part->bounding_box().width()) continue; - // Paragraph ending should be left-aligned to text line above it. - if (abs(part->bounding_box().left() - upper_part->bounding_box().left()) - > global_median_xheight_) + // Check if its the last line of a paragraph. + // In most cases, a paragraph ending should be left-aligned to text line + // above it. Sometimes, it could be a 2 line paragraph, in which case + // the line above it is indented. + // To account for that, check if the partition center is to + // the left of the one above it. + int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2; + int upper_mid = (upper_part->bounding_box().left() + + upper_part->bounding_box().right()) / 2; + int current_spacing = 0; // spacing of the current line to margin + int upper_spacing = 0; // spacing of the previous line to the margin + if (left_to_right_language_) { + // Left to right languages, use mid - left to figure out the distance + // the middle is from the left margin. + int left = MIN(part->bounding_box().left(), + upper_part->bounding_box().left()); + current_spacing = mid - left; + upper_spacing = upper_mid - left; + } else { + // Right to left languages, use right - mid to figure out the distance + // the middle is from the right margin. + int right = MAX(part->bounding_box().right(), + upper_part->bounding_box().right()); + current_spacing = right - mid; + upper_spacing = right - upper_mid; + } + if (current_spacing * kParagraphEndingPreviousLineRatio > upper_spacing) + continue; + + // Paragraphs should have similar fonts. + if (!part->MatchingSizes(*upper_part) || + !part->MatchingStrokeWidth(*upper_part, kStrokeWidthFractionalTolerance, + kStrokeWidthConstantTolerance)) { + continue; + } + + // The last line of a paragraph should be left aligned. + // TODO(nbeato): This would be untrue if the text was right aligned. + // How often is that? + if (part->space_to_left() > + kMaxParagraphEndingLeftSpaceMultiple * part->median_size()) + continue; + // The line above it should be right aligned (assuming justified format). + // Since we can't assume justified text, we compare whitespace to text. + // The above line should have majority spanning text (or the current + // line could have fit on the previous line). So compare + // whitespace to text. + if (upper_part->bounding_box().width() < + kMinParagraphEndingTextToWhitespaceRatio * upper_part->space_to_right()) continue; + // Ledding above the line should be less than ledding below - if (part->space_above() < part->space_below() && - part->space_above() <= 2 * global_median_ledding_) - part->clear_table_type(); + if (part->space_above() >= part->space_below() || + part->space_above() > 2 * global_median_ledding_) + continue; + + // If all checks failed, it is probably text. + part->clear_table_type(); } +} + +void TableFinder::FilterHeaderAndFooter() { // Consider top-most text colpartition as header and bottom most as footer ColPartition* header = NULL; ColPartition* footer = NULL; - int max_top = -MAX_INT32; + int max_top = MIN_INT32; int min_bottom = MAX_INT32; + ColPartitionGridSearch gsearch(&clean_part_grid_); gsearch.StartFullSearch(); + ColPartition* part = NULL; while ((part = gsearch.NextFullSearch()) != NULL) { if (!part->IsTextType()) continue; // Consider only text partitions @@ -550,14 +1103,13 @@ void ColumnFinder::FilterFalseAlarms() { // TODO(faisal): This is too aggressive at the moment. The method needs to // consider spacing and alignment as well. Detection of false alarm table cells // should also be done as part of it. -void ColumnFinder::SmoothTablePartitionRuns() { +void TableFinder::SmoothTablePartitionRuns() { // Iterate the ColPartitions in the grid. - GridSearch - gsearch(&clean_part_grid_); + ColPartitionGridSearch gsearch(&clean_part_grid_); gsearch.StartFullSearch(); - ColPartition* part; + ColPartition* part = NULL; while ((part = gsearch.NextFullSearch()) != NULL) { - if (part->type() >= PT_TABLE) + if (part->type() >= PT_TABLE || part->type() == PT_UNKNOWN) continue; // Consider only text partitions ColPartition* upper_part = part->nearest_neighbor_above(); ColPartition* lower_part = part->nearest_neighbor_below(); @@ -566,10 +1118,27 @@ void ColumnFinder::SmoothTablePartitionRuns() { if (upper_part->type() == PT_TABLE && lower_part->type() == PT_TABLE) part->set_table_type(); } + + // Pass 2, do the opposite. If both the upper and lower neighbors + // exist and are not tables, this probably shouldn't be a table. + gsearch.StartFullSearch(); + part = NULL; + while ((part = gsearch.NextFullSearch()) != NULL) { + if (part->type() != PT_TABLE) + continue; // Consider only text partitions + ColPartition* upper_part = part->nearest_neighbor_above(); + ColPartition* lower_part = part->nearest_neighbor_below(); + + // table can't be by itself + if ((upper_part && upper_part->type() != PT_TABLE) && + (lower_part && lower_part->type() != PT_TABLE)) { + part->clear_table_type(); + } + } } // Set the type of a column segment based on the ratio of table to text cells -void ColumnFinder::SetColumnsType(ColSegment_LIST* column_blocks) { +void TableFinder::SetColumnsType(ColSegment_LIST* column_blocks) { ColSegment_IT it(column_blocks); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColSegment* seg = it.data(); @@ -578,11 +1147,10 @@ void ColumnFinder::SetColumnsType(ColSegment_LIST* column_blocks) { int num_text_cells = 0; GridSearch rsearch(&clean_part_grid_); + rsearch.SetUniqueMode(true); rsearch.StartRectSearch(box); - ColPartition* part; + ColPartition* part = NULL; while ((part = rsearch.NextRectSearch()) != NULL) { - if (!rsearch.ReturnedSeedElement()) - continue; // Consider each partition only once if (part->type() == PT_TABLE) { num_table_cells++; } else if (part->type() == PT_FLOWING_TEXT) { @@ -603,9 +1171,8 @@ void ColumnFinder::SetColumnsType(ColSegment_LIST* column_blocks) { } // Move column blocks to grid -void ColumnFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, +void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid) { - col_seg_grid->Init(gridsize(), bleft(), tright()); ColSegment_IT it(segments); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColSegment* seg = it.extract(); @@ -623,8 +1190,8 @@ void ColumnFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, // Comment: This method does not handle split due to a full page table // since table columns in this case do not have a text column on which // split decision can be based. -void ColumnFinder::GridMergeColumnBlocks() { - int margin = gridsize_; +void TableFinder::GridMergeColumnBlocks() { + int margin = gridsize(); // Iterate the Column Blocks in the grid. GridSearch @@ -649,11 +1216,11 @@ void ColumnFinder::GridMergeColumnBlocks() { GridSearch rectsearch(&col_seg_grid_); rectsearch.StartRectSearch(box); - ColSegment* neighbor; + ColSegment* neighbor = NULL; while ((neighbor = rectsearch.NextRectSearch()) != NULL) { if (neighbor == seg) continue; - TBOX neighbor_box = neighbor->bounding_box(); + const TBOX& neighbor_box = neighbor->bounding_box(); // If the neighbor box significantly overlaps with the current // box (due to the expansion of the current box in the // previous iteration of this loop), remove the neighbor box @@ -701,7 +1268,7 @@ void ColumnFinder::GridMergeColumnBlocks() { // column below/above it // 4- cells from two vertically adjacent tables merge together to make a // single column resulting in merging of the two tables -void ColumnFinder::GetTableColumns(ColSegment_LIST *table_columns) { +void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) { ColSegment_IT it(table_columns); // Iterate the ColPartitions in the grid. GridSearch @@ -711,31 +1278,31 @@ void ColumnFinder::GetTableColumns(ColSegment_LIST *table_columns) { while ((part = gsearch.NextFullSearch()) != NULL) { if (part->inside_table_column() || part->type() != PT_TABLE) continue; // prevent a partition to be assigned to multiple columns - TBOX box = part->bounding_box(); + const TBOX& box = part->bounding_box(); ColSegment* col = new ColSegment(); col->InsertBox(box); part->set_inside_table_column(true); // Start a search below the current cell to find bottom neighbours + // Note: a full search will always process things above it first, so + // this should be starting at the highest cell and working its way down. GridSearch vsearch(&clean_part_grid_); vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom()); - ColPartition* neighbor; + ColPartition* neighbor = NULL; bool found_neighbours = false; while ((neighbor = vsearch.NextVerticalSearch(true)) != NULL) { // only consider neighbors not assigned to any column yet if (neighbor->inside_table_column()) continue; - + // Horizontal lines should not break the flow + if (neighbor->IsHorizontalLine()) + continue; // presence of a non-table neighbor marks the end of current // table column - if (neighbor->type() != PT_TABLE) { - // Horizontal lines should not break the flow - if (neighbor->IsLineType()) - continue; - else - break; - } - TBOX neighbor_box = neighbor->bounding_box(); + if (neighbor->type() != PT_TABLE) + break; + // add the neighbor partition to the table column + const TBOX& neighbor_box = neighbor->bounding_box(); col->InsertBox(neighbor_box); neighbor->set_inside_table_column(true); found_neighbours = true; @@ -751,8 +1318,8 @@ void ColumnFinder::GetTableColumns(ColSegment_LIST *table_columns) { // Mark regions in a column that are x-bounded by the column boundaries and // y-bounded by the table columns' projection on the y-axis as table regions -void ColumnFinder::GetTableRegions(ColSegment_LIST* table_columns, - ColSegment_LIST* table_regions) { +void TableFinder::GetTableRegions(ColSegment_LIST* table_columns, + ColSegment_LIST* table_regions) { ColSegment_IT cit(table_columns); ColSegment_IT rit(table_regions); // Iterate through column blocks @@ -793,6 +1360,7 @@ void ColumnFinder::GetTableRegions(ColSegment_LIST* table_columns, if (!table_region[i - 1] && table_region[i]) { current_table_box.set_bottom(i + bleft().y()); } + // TODO(nbeato): Is it guaranteed that the last row is not a table region? // detect end of a table region if (table_region[i - 1] && !table_region[i]) { current_table_box.set_top(i + bleft().y()); @@ -813,18 +1381,18 @@ void ColumnFinder::GetTableRegions(ColSegment_LIST* table_columns, // TODO(faisal): A rare error occurs if there are two horizontally adjacent // tables with aligned ruling lines. In this case, line finder returns a // single line and hence the tables get merged together -void ColumnFinder::GridMergeTableRegions() { +void TableFinder::GridMergeTableRegions() { // Iterate the table regions in the grid. GridSearch gsearch(&table_grid_); gsearch.StartFullSearch(); - ColSegment* seg; + ColSegment* seg = NULL; while ((seg = gsearch.NextFullSearch()) != NULL) { bool neighbor_found = false; bool modified = false; // Modified at least once do { // Start a rectangle search x-bounded by the image and y by the table - TBOX box = seg->bounding_box(); + const TBOX& box = seg->bounding_box(); TBOX search_region(box); search_region.set_left(bleft().x()); search_region.set_right(tright().x()); @@ -832,11 +1400,11 @@ void ColumnFinder::GridMergeTableRegions() { GridSearch rectsearch(&table_grid_); rectsearch.StartRectSearch(search_region); - ColSegment* neighbor; + ColSegment* neighbor = NULL; while ((neighbor = rectsearch.NextRectSearch()) != NULL) { if (neighbor == seg) continue; - TBOX neighbor_box = neighbor->bounding_box(); + const TBOX& neighbor_box = neighbor->bounding_box(); // Check if a neighbor box has a large overlap with the table // region. This may happen as a result of merging two table // regions in the previous iteration. @@ -854,6 +1422,9 @@ void ColumnFinder::GridMergeTableRegions() { seg->InsertBox(neighbor_box); neighbor_found = true; modified = true; + rectsearch.RemoveBBox(); + gsearch.RepositionIterator(); + delete neighbor; } } } while (neighbor_found); @@ -868,16 +1439,21 @@ void ColumnFinder::GridMergeTableRegions() { // Decide if two table regions belong to one table based on a common // horizontal ruling line or another colpartition -bool ColumnFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) { +bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) { + // Check the obvious case. Most likely not true because overlapping boxes + // should already be merged, but seems like a good thing to do in case things + // change. + if (box1.overlap(box2)) + return true; // Check for ColPartitions spanning both table regions TBOX bbox = box1.bounding_union(box2); // Start a rect search on bbox GridSearch rectsearch(&clean_part_grid_); rectsearch.StartRectSearch(bbox); - ColPartition* part; + ColPartition* part = NULL; while ((part = rectsearch.NextRectSearch()) != NULL) { - TBOX part_box = part->bounding_box(); + const TBOX& part_box = part->bounding_box(); // return true if a colpartition spanning both table regions is found if (part_box.overlap(box1) && part_box.overlap(box2)) return true; @@ -904,60 +1480,31 @@ bool ColumnFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) { // 4- the criteria for including left out column headers also tends to // occasionally include text-lines above the tables, typically from // table caption -void ColumnFinder::AdjustTableBoundaries() { +void TableFinder::AdjustTableBoundaries() { // Iterate the table regions in the grid ColSegment_CLIST adjusted_tables; ColSegment_C_IT it(&adjusted_tables); - GridSearch - gsearch(&table_grid_); + ColSegmentGridSearch gsearch(&table_grid_); gsearch.StartFullSearch(); - ColSegment* table; - // search for horizontal ruling lines within the vertical margin - int vertical_margin = kRulingVerticalMargin * gridsize_; + ColSegment* table = NULL; while ((table = gsearch.NextFullSearch()) != NULL) { - TBOX table_box = table->bounding_box(); - TBOX search_box = table_box; - int top = MIN(search_box.top() + vertical_margin, tright().y()); - int bottom = MAX(search_box.bottom() - vertical_margin, bleft().y()); - search_box.set_top(top); - search_box.set_bottom(bottom); - TBOX box; - // Start a rect search on table_box - GridSearch - rectsearch(&clean_part_grid_); - rectsearch.StartRectSearch(search_box); - ColPartition* part; - while ((part = rectsearch.NextRectSearch()) != NULL) { - // Do not consider image partitions - if (part->IsImageType()) - continue; - TBOX part_box = part->bounding_box(); - // Include partition in the table if more than half of it - // is covered by the table - if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) { - box = box.bounding_union(part_box); - continue; - } - // Include a partially overlapping horizontal line only if the - // extra ColPartitions that will be included due to expansion - // have large side spacing w.r.t. columns containing them. - if (HLineBelongsToTable(part, table_box)) { - box = box.bounding_union(part_box); - } - } - IncludeLeftOutColumnHeaders(box); + const TBOX& table_box = table->bounding_box(); + TBOX grown_box = table_box; + GrowTableBox(table_box, &grown_box); // To prevent a table from expanding again, do not insert the // modified box back to the grid. Instead move it to a list and // and remove it from the grid. The list is moved later back to the grid. - if (!box.null_box()) { + if (!grown_box.null_box()) { ColSegment* col = new ColSegment(); - col->InsertBox(box); + col->InsertBox(grown_box); it.add_after_then_move(col); } gsearch.RemoveBBox(); delete table; } // clear table grid to move final tables in it + // TODO(nbeato): table_grid_ should already be empty. The above loop + // removed everything. Maybe just assert it is empty? table_grid_.Clear(); it.move_to_first(); // move back final tables to table_grid_ @@ -967,48 +1514,141 @@ void ColumnFinder::AdjustTableBoundaries() { } } +void TableFinder::GrowTableBox(const TBOX& table_box, TBOX* result_box) { + // TODO(nbeato): The growing code is a bit excessive right now. + // By removing these lines, the partitions considered need + // to have some overlap or be special cases. These lines could + // be added again once a check is put in place to make sure that + // growing tables don't stomp on a lot of non-table partitions. + + // search for horizontal ruling lines within the vertical margin + // int vertical_margin = kRulingVerticalMargin * gridsize(); + TBOX search_box = table_box; + // int top = MIN(search_box.top() + vertical_margin, tright().y()); + // int bottom = MAX(search_box.bottom() - vertical_margin, bleft().y()); + // search_box.set_top(top); + // search_box.set_bottom(bottom); + + GrowTableToIncludePartials(table_box, search_box, result_box); + GrowTableToIncludeLines(table_box, search_box, result_box); + IncludeLeftOutColumnHeaders(result_box); +} + +// Grow a table by increasing the size of the box to include +// partitions with significant overlap with the table. +void TableFinder::GrowTableToIncludePartials(const TBOX& table_box, + const TBOX& search_range, + TBOX* result_box) { + // Rulings are in a different grid, so search 2 grids for rulings, text, + // and table partitions that are not entirely within the new box. + for (int i = 0; i < 2; ++i) { + ColPartitionGrid* grid = (i == 0) ? &fragmented_text_grid_ : + &leader_and_ruling_grid_; + ColPartitionGridSearch rectsearch(grid); + rectsearch.StartRectSearch(search_range); + ColPartition* part = NULL; + while ((part = rectsearch.NextRectSearch()) != NULL) { + // Only include text and table types. + if (part->IsImageType()) + continue; + const TBOX& part_box = part->bounding_box(); + // Include partition in the table if more than half of it + // is covered by the table + if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) { + *result_box = result_box->bounding_union(part_box); + continue; + } + } + } +} + +// Grow a table by expanding to the extents of significantly +// overlapping lines. +void TableFinder::GrowTableToIncludeLines(const TBOX& table_box, + const TBOX& search_range, + TBOX* result_box) { + ColPartitionGridSearch rsearch(&leader_and_ruling_grid_); + rsearch.SetUniqueMode(true); + rsearch.StartRectSearch(search_range); + ColPartition* part = NULL; + while ((part = rsearch.NextRectSearch()) != NULL) { + // TODO(nbeato) This should also do vertical, but column + // boundaries are breaking things. This function needs to be + // updated to allow vertical lines as well. + if (!part->IsLineType()) + continue; + // Avoid the following function call if the result of the + // function is irrelevant. + const TBOX& part_box = part->bounding_box(); + if (result_box->contains(part_box)) + continue; + // Include a partially overlapping horizontal line only if the + // extra ColPartitions that will be included due to expansion + // have large side spacing w.r.t. columns containing them. + if (HLineBelongsToTable(*part, table_box)) + *result_box = result_box->bounding_union(part_box); + // TODO(nbeato): Vertical + } +} + // Checks whether the horizontal line belong to the table by looking at the // side spacing of extra ColParitions that will be included in the table // due to expansion -bool ColumnFinder::HLineBelongsToTable(ColPartition* part, - const TBOX& table_box) { - TBOX part_box = part->bounding_box(); - if (!part->IsLineType() || !part_box.major_x_overlap(table_box)) +bool TableFinder::HLineBelongsToTable(const ColPartition& part, + const TBOX& table_box) { + if (!part.IsHorizontalLine()) return false; - // Do not consider top-most horizontal line since it usually - // originates from noise - if (!part->nearest_neighbor_above()) + const TBOX& part_box = part.bounding_box(); + if (!part_box.major_x_overlap(table_box)) return false; - TBOX bbox = part_box.bounding_union(table_box); - // Start a rect search on bbox - GridSearch - rectsearch(&clean_part_grid_); - rectsearch.StartRectSearch(bbox); - ColPartition* extra_part; + // Do not consider top-most horizontal line since it usually + // originates from noise. + // TODO(nbeato): I had to comment this out because the ruling grid doesn't + // have neighbors solved. + // if (!part.nearest_neighbor_above()) + // return false; + const TBOX bbox = part_box.bounding_union(table_box); + // In the "unioned table" box (the table extents expanded by the line), + // keep track of how many partitions have significant padding to the left + // and right. If more than half of the partitions covered by the new table + // have significant spacing, the line belongs to the table and the table + // grows to include all of the partitions. int num_extra_partitions = 0; int extra_space_to_right = 0; int extra_space_to_left = 0; - while ((extra_part = rectsearch.NextRectSearch()) != NULL) { - if (!rectsearch.ReturnedSeedElement()) - continue; - TBOX extra_part_box = extra_part->bounding_box(); - if (extra_part_box.overlap_fraction(table_box) > 0.6) - continue; // ColPartition already in table - if (extra_part->IsImageType()) // Non-text ColPartitions do not contribute - continue; - num_extra_partitions++; - // presense of a table cell is a strong hint, so just increment the scores - // without looking at the spacing. - if (extra_part->type() == PT_TABLE || extra_part->IsLineType()) { - extra_space_to_right++; - extra_space_to_left++; - continue; + // Rulings are in a different grid, so search 2 grids for rulings, text, + // and table partitions that are introduced by the new box. + for (int i = 0; i < 2; ++i) { + ColPartitionGrid* grid = (i == 0) ? &clean_part_grid_ : + &leader_and_ruling_grid_; + // Start a rect search on bbox + ColPartitionGridSearch rectsearch(grid); + rectsearch.SetUniqueMode(true); + rectsearch.StartRectSearch(bbox); + ColPartition* extra_part = NULL; + while ((extra_part = rectsearch.NextRectSearch()) != NULL) { + // ColPartition already in table + const TBOX& extra_part_box = extra_part->bounding_box(); + if (extra_part_box.overlap_fraction(table_box) > kMinOverlapWithTable) + continue; + // Non-text ColPartitions do not contribute + if (extra_part->IsImageType()) + continue; + // Consider this partition. + num_extra_partitions++; + // presence of a table cell is a strong hint, so just increment the scores + // without looking at the spacing. + if (extra_part->type() == PT_TABLE || extra_part->IsLineType()) { + extra_space_to_right++; + extra_space_to_left++; + continue; + } + int space_threshold = kSideSpaceMargin * part.median_size(); + if (extra_part->space_to_right() > space_threshold) + extra_space_to_right++; + if (extra_part->space_to_left() > space_threshold) + extra_space_to_left++; } - int space_threshold = kSideSpaceMargin * part->median_size(); - if (extra_part->space_to_right() > space_threshold) - extra_space_to_right++; - if (extra_part->space_to_left() > space_threshold) - extra_space_to_left++; } // tprintf("%d %d %d\n", // num_extra_partitions,extra_space_to_right,extra_space_to_left); @@ -1018,25 +1658,26 @@ bool ColumnFinder::HLineBelongsToTable(ColPartition* part, // Look for isolated column headers above the given table box and // include them in the table -void ColumnFinder::IncludeLeftOutColumnHeaders(TBOX& table_box) { +void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) { // Start a search above the current table to look for column headers - GridSearch - vsearch(&clean_part_grid_); - vsearch.StartVerticalSearch(table_box.left(), table_box.right(), - table_box.top()); - ColPartition* neighbor; + ColPartitionGridSearch vsearch(&clean_part_grid_); + vsearch.StartVerticalSearch(table_box->left(), table_box->right(), + table_box->top()); + ColPartition* neighbor = NULL; ColPartition* previous_neighbor = NULL; while ((neighbor = vsearch.NextVerticalSearch(false)) != NULL) { - int table_top = table_box.top(); - TBOX box = neighbor->bounding_box(); + // Max distance to find a table heading. + const int max_distance = kMaxColumnHeaderDistance * + neighbor->median_size(); + int table_top = table_box->top(); + const TBOX& box = neighbor->bounding_box(); // Do not continue if the next box is way above - // TODO(faisal): make the threshold some factor of line spacing - if (box.bottom() - table_top > kMaxColumnHeaderDistance) + if (box.bottom() - table_top > max_distance) break; // Unconditionally include partitions of type TABLE or LINE // TODO(faisal): add some reasonable conditions here if (neighbor->type() == PT_TABLE || neighbor->IsLineType()) { - table_box.set_top(box.top()); + table_box->set_top(box.top()); previous_neighbor = NULL; continue; } @@ -1045,7 +1686,7 @@ void ColumnFinder::IncludeLeftOutColumnHeaders(TBOX& table_box) { if (previous_neighbor == NULL) { previous_neighbor = neighbor; } else { - TBOX previous_box = previous_neighbor->bounding_box(); + const TBOX& previous_box = previous_neighbor->bounding_box(); if (!box.major_y_overlap(previous_box)) break; } @@ -1056,7 +1697,7 @@ void ColumnFinder::IncludeLeftOutColumnHeaders(TBOX& table_box) { // projection on the x-axis. Projection of a real table on the x-axis // should have at least one zero-valley larger than the global median // x-height of the page. -void ColumnFinder::DeleteSingleColumnTables() { +void TableFinder::DeleteSingleColumnTables() { int page_width = tright().x() - bleft().x(); ASSERT_HOST(page_width > 0); // create an integer array to hold projection on x-axis @@ -1075,27 +1716,39 @@ void ColumnFinder::DeleteSingleColumnTables() { // Start a rect search on table_box GridSearch rectsearch(&clean_part_grid_); + rectsearch.SetUniqueMode(true); rectsearch.StartRectSearch(table_box); ColPartition* part; while ((part = rectsearch.NextRectSearch()) != NULL) { - if (!rectsearch.ReturnedSeedElement()) - continue; // Consider each partition only once if (!part->IsTextType()) continue; // Do not consider non-text partitions + if (part->flow() == BTFT_LEADER) + continue; // Assume leaders are in tables TBOX part_box = part->bounding_box(); // Do not consider partitions partially covered by the table if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable) continue; BLOBNBOX_CLIST* part_boxes = part->boxes(); BLOBNBOX_C_IT pit(part_boxes); + + // Make sure overlapping blobs don't artificially inflate the number + // of rows in the table. This happens frequently with things such as + // decimals and split characters. Do this by assuming the column + // partition is sorted mostly left to right and just clip + // bounding boxes by the previous box's extent. + int next_position_to_write = 0; + for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) { BLOBNBOX *pblob = pit.data(); // ignore blob height for the purpose of projection since we // are only interested in finding valleys int xstart = pblob->bounding_box().left(); int xend = pblob->bounding_box().right(); + + xstart = MAX(xstart, next_position_to_write); for (int i = xstart; i < xend; i++) table_xprojection[i - bleft().x()]++; + next_position_to_write = xend; } } // Find largest valley between two reasonable peaks in the table @@ -1109,7 +1762,7 @@ void ColumnFinder::DeleteSingleColumnTables() { // Return true if at least one gap larger than the global x-height // exists in the horizontal projection -bool ColumnFinder::GapInXProjection(int* xprojection, int length) { +bool TableFinder::GapInXProjection(int* xprojection, int length) { // Find peak value of the histogram int peak_value = 0; for (int i = 0; i < length; i++) { @@ -1122,10 +1775,12 @@ bool ColumnFinder::GapInXProjection(int* xprojection, int length) { // number of rows in the table if (peak_value < kMinRowsInTable) return false; - double projection_threshold = kProjectionThreshold * peak_value; + double projection_threshold = kSmallTableProjectionThreshold * peak_value; + if (peak_value >= kLargeTableRowCount) + projection_threshold = kLargeTableProjectionThreshold * peak_value; // Threshold the histogram for (int i = 0; i < length; i++) { - xprojection[i] = (xprojection[i] > projection_threshold) ? 1 : 0; + xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0; } // Find the largest run of zeros between two ones int largest_gap = 0; @@ -1143,72 +1798,201 @@ bool ColumnFinder::GapInXProjection(int* xprojection, int length) { run_start = -1; } } - return (largest_gap > global_median_xheight_); + return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_; +} + +// Given the location of a table "guess", try to overlay a cellular +// grid in the location, adjusting the boundaries. +// TODO(nbeato): Falsely introduces: +// -headers/footers (not any worse, too much overlap destroys cells) +// -page numbers (not worse, included because maximize margins) +// -equations (nicely fit into a celluar grid, but more sparsely) +// -figures (random text box, also sparse) +// -small left-aligned text areas with overlapping positioned whitespace +// (rejected before) +// Overall, this just needs some more work. +void TableFinder::RecognizeTables() { + ScrollView* table_win = NULL; + if (textord_show_tables) { + table_win = MakeWindow(0, 0, "Table Structure"); + DisplayColPartitions(table_win, &fragmented_text_grid_, + ScrollView::BLUE, ScrollView::LIGHT_BLUE); + // table_grid_.DisplayBoxes(table_win); + } + + + TableRecognizer recognizer; + recognizer.Init(); + recognizer.set_line_grid(&leader_and_ruling_grid_); + recognizer.set_text_grid(&fragmented_text_grid_); + recognizer.set_max_text_height(global_median_xheight_ * 2.0); + recognizer.set_min_height(1.5 * gridheight()); + // Loop over all of the tables and try to fit them. + // Store the good tables here. + ColSegment_CLIST good_tables; + ColSegment_C_IT good_it(&good_tables); + + ColSegmentGridSearch gsearch(&table_grid_); + gsearch.StartFullSearch(); + ColSegment* found_table = NULL; + while ((found_table = gsearch.NextFullSearch()) != NULL) { + gsearch.RemoveBBox(); + + // The goal is to make the tables persistent in a list. + // When that happens, this will move into the search loop. + const TBOX& found_box = found_table->bounding_box(); + StructuredTable* table_structure = recognizer.RecognizeTable(found_box); + + // Process a table. Good tables are inserted into the grid again later on + // We can't change boxes in the grid while it is running a search. + if (table_structure != NULL) { + if (textord_show_tables) { + table_structure->Display(table_win, ScrollView::LIME_GREEN); + } + found_table->set_bounding_box(table_structure->bounding_box()); + delete table_structure; + good_it.add_after_then_move(found_table); + } else { + delete found_table; + } + } + // TODO(nbeato): MERGE!! There is awesome info now available for merging. + + // At this point, the grid is empty. We can safely insert the good tables + // back into grid. + for (good_it.mark_cycle_pt(); !good_it.cycled_list(); good_it.forward()) + table_grid_.InsertBBox(true, true, good_it.extract()); } // Displays the column segments in some window. -void ColumnFinder::DisplayColSegments(ColSegment_LIST *segments, - ScrollView* win, - ScrollView::Color color) { +void TableFinder::DisplayColSegments(ScrollView* win, + ColSegment_LIST *segments, + ScrollView::Color color) { #ifndef GRAPHICS_DISABLED win->Pen(color); win->Brush(ScrollView::NONE); ColSegment_IT it(segments); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ColSegment *col = it.data(); - TBOX box = col->bounding_box(); + ColSegment* col = it.data(); + const TBOX& box = col->bounding_box(); int left_x = box.left(); int right_x = box.right(); int top_y = box.top(); int bottom_y = box.bottom(); win->Rectangle(left_x, bottom_y, right_x, top_y); } - win->Update(); + win->UpdateWindow(); +#endif +} + +void TableFinder::DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid, + ScrollView::Color color) { +#ifndef GRAPHICS_DISABLED + // Iterate the ColPartitions in the grid. + GridSearch + gsearch(grid); + gsearch.StartFullSearch(); + ColSegment* seg = NULL; + while ((seg = gsearch.NextFullSearch()) != NULL) { + const TBOX& box = seg->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + win->Brush(ScrollView::NONE); + win->Pen(color); + win->Rectangle(left_x, bottom_y, right_x, top_y); + } + win->UpdateWindow(); #endif } // Displays the colpartitions using a new coloring on an existing window. // Note: This method is only for debug purpose during development and // would not be part of checked in code -void ColumnFinder::DisplayColPartitions(ScrollView* win, - ScrollView::Color default_color) { +void TableFinder::DisplayColPartitions(ScrollView* win, + ColPartitionGrid* grid, + ScrollView::Color default_color, + ScrollView::Color table_color) { #ifndef GRAPHICS_DISABLED + ScrollView::Color color = default_color; // Iterate the ColPartitions in the grid. GridSearch - gsearch(&clean_part_grid_); + gsearch(grid); gsearch.StartFullSearch(); - ColPartition* part; - win->Brush(ScrollView::NONE); - ScrollView::Color color; + ColPartition* part = NULL; while ((part = gsearch.NextFullSearch()) != NULL) { color = default_color; - TBOX box = part->bounding_box(); -// ColPartition* upper_part = part->nearest_neighbor_above(); -// ColPartition* lower_part = part->nearest_neighbor_below(); -// if (!upper_part && !lower_part) -// color = ScrollView::ORANGE; -// else if (upper_part && !lower_part) -// color = ScrollView::CYAN; -// else if (!upper_part && lower_part) -// color = ScrollView::MAGENTA; if (part->type() == PT_TABLE) - color = ScrollView::YELLOW; + color = table_color; + const TBOX& box = part->bounding_box(); int left_x = box.left(); int right_x = box.right(); int top_y = box.top(); int bottom_y = box.bottom(); + win->Brush(ScrollView::NONE); win->Pen(color); win->Rectangle(left_x, bottom_y, right_x, top_y); } - win->Update(); + win->UpdateWindow(); #endif } +void TableFinder::DisplayColPartitions(ScrollView* win, + ColPartitionGrid* grid, + ScrollView::Color default_color) { + DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW); +} + +void TableFinder::DisplayColPartitionConnections( + ScrollView* win, + ColPartitionGrid* grid, + ScrollView::Color color) { +#ifndef GRAPHICS_DISABLED + // Iterate the ColPartitions in the grid. + GridSearch + gsearch(grid); + gsearch.StartFullSearch(); + ColPartition* part = NULL; + while ((part = gsearch.NextFullSearch()) != NULL) { + const TBOX& box = part->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + + ColPartition* upper_part = part->nearest_neighbor_above(); + if (upper_part) { + TBOX upper_box = upper_part->bounding_box(); + int mid_x = (left_x + right_x) / 2; + int mid_y = (top_y + bottom_y) / 2; + int other_x = (upper_box.left() + upper_box.right()) / 2; + int other_y = (upper_box.top() + upper_box.bottom()) / 2; + win->Brush(ScrollView::NONE); + win->Pen(color); + win->Line(mid_x, mid_y, other_x, other_y); + } + ColPartition* lower_part = part->nearest_neighbor_below(); + if (lower_part) { + TBOX lower_box = lower_part->bounding_box(); + int mid_x = (left_x + right_x) / 2; + int mid_y = (top_y + bottom_y) / 2; + int other_x = (lower_box.left() + lower_box.right()) / 2; + int other_y = (lower_box.top() + lower_box.bottom()) / 2; + win->Brush(ScrollView::NONE); + win->Pen(color); + win->Line(mid_x, mid_y, other_x, other_y); + } + } + win->UpdateWindow(); +#endif +} + // Write debug image and text file. // Note: This method is only for debug purpose during development and // would not be part of checked in code -void ColumnFinder::WriteToPix() { +void TableFinder::WriteToPix(const FCOORD& reskew) { #ifdef HAVE_LIBLEPT // Input file must be named test1.tif PIX* pix = pixRead("test1.tif"); @@ -1229,7 +2013,7 @@ void ColumnFinder::WriteToPix() { // load colpartitions into text_box_array and table_box_array while ((part = gsearch.NextFullSearch()) != NULL) { TBOX box = part->bounding_box(); - box.rotate_large(reskew_); + box.rotate_large(reskew); BOX* lept_box = boxCreate(box.left(), img_height - box.top(), box.right() - box.left(), box.top() - box.bottom()); @@ -1252,7 +2036,7 @@ void ColumnFinder::WriteToPix() { // load table boxes to table_array and write them to text file as well while ((table = table_search.NextFullSearch()) != NULL) { TBOX box = table->bounding_box(); - box.rotate_large(reskew_); + box.rotate_large(reskew); // Since deskewing introduces negative coordinates, reskewing // might not completely recover from that since both steps enlarge // the actual box. Hence a box that undergoes deskewing/reskewing @@ -1283,13 +2067,16 @@ void ColumnFinder::WriteToPix() { // Merge all colpartitions in table regions to make them a single // colpartition and revert types of isolated table cells not // assigned to any table to their original types. -void ColumnFinder::MakeTableBlocks() { +void TableFinder::MakeTableBlocks(ColPartitionGrid* grid, + ColPartitionSet** all_columns, + WidthCallback* width_cb) { // Since we have table blocks already, remove table tags from all // colpartitions GridSearch - gsearch(&part_grid_); + gsearch(grid); gsearch.StartFullSearch(); - ColPartition* part; + ColPartition* part = NULL; + while ((part = gsearch.NextFullSearch()) != NULL) { if (part->type() == PT_TABLE) { part->clear_table_type(); @@ -1305,7 +2092,7 @@ void ColumnFinder::MakeTableBlocks() { TBOX table_box = table->bounding_box(); // Start a rect search on table_box GridSearch - rectsearch(&part_grid_); + rectsearch(grid); rectsearch.StartRectSearch(table_box); ColPartition* part; ColPartition* table_partition = NULL; @@ -1319,7 +2106,7 @@ void ColumnFinder::MakeTableBlocks() { if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) { rectsearch.RemoveBBox(); if (table_partition) { - table_partition->Absorb(part, WidthCB()); + table_partition->Absorb(part, width_cb); } else { table_partition = part; } @@ -1327,13 +2114,25 @@ void ColumnFinder::MakeTableBlocks() { } // Insert table colpartition back to part_grid_ if (table_partition) { - table_partition->SetPartitionType(best_columns_[table_search.GridY()]); + table_partition->SetPartitionType(resolution_, + all_columns[table_search.GridY()]); table_partition->set_table_type(); - part_grid_.InsertBBox(true, true, table_partition); + grid->InsertBBox(true, true, table_partition); } } } +//////// ColSegment code +//////// +ColSegment::ColSegment() + : ELIST_LINK(), + num_table_cells_(0), + num_text_cells_(0), + type_(COL_UNKNOWN) { +} +ColSegment::~ColSegment() { +} + // Provides a color for BBGrid to draw the rectangle. ScrollView::Color ColSegment::BoxColor() const { const ScrollView::Color kBoxColors[PT_COUNT] = { diff --git a/textord/tablefind.h b/textord/tablefind.h index aa472396b6..3612e48d58 100644 --- a/textord/tablefind.h +++ b/textord/tablefind.h @@ -20,8 +20,9 @@ #ifndef TESSERACT_TEXTORD_TABLEFIND_H__ #define TESSERACT_TEXTORD_TABLEFIND_H__ -#include "rect.h" +#include "colpartitiongrid.h" #include "elst.h" +#include "rect.h" namespace tesseract { @@ -34,6 +35,8 @@ enum ColSegType { COL_COUNT }; +class ColPartitionSet; + // ColSegment holds rectangular blocks that represent segmentation of a page // into regions containing single column text/table. class ColSegment; @@ -42,10 +45,8 @@ CLISTIZEH(ColSegment) class ColSegment : public ELIST_LINK { public: - ColSegment() : num_table_cells_(0), num_text_cells_(0), - type_(COL_UNKNOWN) { - } - ~ColSegment() { } + ColSegment(); + ~ColSegment(); // Simple accessors and mutators const TBOX& bounding_box() const { @@ -72,7 +73,7 @@ class ColSegment : public ELIST_LINK { bounding_box_ = other; } - int get_num_table_cells() { + int get_num_table_cells() const { return num_table_cells_; } @@ -81,7 +82,7 @@ class ColSegment : public ELIST_LINK { num_table_cells_ = n; } - int get_num_text_cells() { + int get_num_text_cells() const { return num_text_cells_; } @@ -90,7 +91,7 @@ class ColSegment : public ELIST_LINK { num_text_cells_ = n; } - ColSegType type() { + ColSegType type() const { return type_; } @@ -105,10 +106,6 @@ class ColSegment : public ELIST_LINK { void InsertBox(const TBOX& other); private: - // Initializes the bulk of the members to default values. - void Init() { - } - TBOX bounding_box_; // bounding box int num_table_cells_; int num_text_cells_; @@ -119,7 +116,319 @@ class ColSegment : public ELIST_LINK { typedef BBGrid ColSegmentGrid; +typedef GridSearch ColSegmentGridSearch; + +// TableFinder is a utility class to find a set of tables given a set of +// ColPartitions and Columns. The TableFinder will mark candidate ColPartitions +// based on research in "Table Detection in Heterogeneous Documents". +// Usage flow is as follows: +// TableFinder finder; +// finder.InsertCleanPartitions(/* grid info */) +// finder.LocateTables(/* ColPartitions and Columns */); +// finder.Update TODO(nbeato) +class TableFinder { + public: + // Constructor is simple initializations + TableFinder(); + ~TableFinder(); + + // Set the resolution of the connected components in ppi. + void set_resolution(int resolution) { + resolution_ = resolution; + } + // Change the reading order. Initially it is left to right. + void set_left_to_right_language(bool order); + + // Initialize + void Init(int grid_size, const ICOORD& bottom_left, const ICOORD& top_right); + + // Copy cleaned partitions from ColumnFinder's part_grid_ to this + // clean_part_grid_ and insert dot-like noise into period_grid_. + // It resizes the grids in this object to the dimensions of grid. + void InsertCleanPartitions(ColPartitionGrid* grid, TO_BLOCK* block); + + // High level function to perform table detection + // Finds tables and updates the grid object with new partitions for the + // tables. The columns and width callbacks are used to merge tables. + // The reskew argument is only used to write the tables to the out.png + // if that feature is enabled. + void LocateTables(ColPartitionGrid* grid, + ColPartitionSet** columns, + WidthCallback* width_cb, + const FCOORD& reskew); + + protected: + // Access for the grid dimensions. + // The results will not be correct until InsertCleanPartitions + // has been called. The values are taken from the grid passed as an argument + // to that function. + int gridsize() const; + int gridwidth() const; + int gridheight() const; + const ICOORD& bleft() const; + const ICOORD& tright() const; + + // Makes a window for debugging, see BBGrid + ScrollView* MakeWindow(int x, int y, const char* window_name); + + //////// Functions to insert objects from the grid into the table finder. + //////// In all cases, ownership is transferred to the table finder. + // Inserts text into the table finder. + void InsertTextPartition(ColPartition* part); + void InsertFragmentedTextPartition(ColPartition* part); + void InsertLeaderPartition(ColPartition* part); + void InsertRulingPartition(ColPartition* part); + void InsertImagePartition(ColPartition* part); + void SplitAndInsertFragmentedTextPartition(ColPartition* part); + bool AllowTextPartition(const ColPartition& part) const; + bool AllowBlob(const BLOBNBOX& blob) const; + + //////// Functions that manipulate ColPartitions in the part_grid_ ///// + //////// to find tables. + //////// + + // Utility function to move segments to col_seg_grid + // Note: Move includes ownership, + // so segments will be be owned by col_seg_grid + void MoveColSegmentsToGrid(ColSegment_LIST* segments, + ColSegmentGrid* col_seg_grid); + + //////// Set up code to run during table detection to correctly + //////// initialize variables on column partitions that are used later. + //////// + + // Initialize the grid and partitions + void InitializePartitions(ColPartitionSet** all_columns); + + // Set left, right and top, bottom spacings of each colpartition. + // Left/right spacings are w.r.t the column boundaries + // Top/bottom spacings are w.r.t. previous and next colpartitions + static void SetPartitionSpacings(ColPartitionGrid* grid, + ColPartitionSet** all_columns); + + // Set spacing and closest neighbors above and below a given colpartition. + void SetVerticalSpacing(ColPartition* part); + + // Set global spacing estimates. This function is dependent on the + // partition spacings. So make sure SetPartitionSpacings is called + // on the same grid before this. + void SetGlobalSpacings(ColPartitionGrid* grid); + // Access to the global median xheight. The xheight is the height + // of a lowercase 'x' character on the page. This can be viewed as the + // average height of a lowercase letter in a textline. As a result + // it is used to make assumptions about spacing between words and + // table cells. + void set_global_median_xheight(int xheight); + // Access to the global median blob width. The width is useful + // when deciding if a partition is noise. + void set_global_median_blob_width(int width); + // Access to the global median ledding. The ledding is the distance between + // two adjacent text lines. This value can be used to get a rough estimate + // for the amount of space between two lines of text. As a result, it + // is used to calculate appropriate spacing between adjacent rows of text. + void set_global_median_ledding(int ledding); + + // Updates the nearest neighbors for each ColPartition in clean_part_grid_. + // The neighbors are most likely SingletonPartner calls after the neighbors + // are assigned. This is hear until it is decided to remove the + // nearest_neighbor code in ColPartition + void FindNeighbors(); + + //////// Functions to mark candidate column partitions as tables. + //////// Tables are marked as described in + //////// Table Detection in Heterogeneous Documents (2010, Shafait & Smith) + //////// + // High level function to mark partitions as table rows/cells. + // When this function is done, the column partitions in clean_part_grid_ + // should mostly be marked as tables. + void MarkTablePartitions(); + // Marks partitions given a local view of a single partition + void MarkPartitionsUsingLocalInformation(); + /////// Heuristics for local marking + // Check if the partition has at least one large gap between words or no + // significant gap at all + // TODO(nbeato): Make const, prevented because blobnbox array access + bool HasWideOrNoInterWordGap(ColPartition* part) const; + // Checks if a partition is adjacent to leaders on the page + bool HasLeaderAdjacent(const ColPartition& part); + // Filter individual text partitions marked as table partitions + // consisting of paragraph endings, small section headings, and + // headers and footers. + void FilterFalseAlarms(); + void FilterParagraphEndings(); + void FilterHeaderAndFooter(); + // Mark all ColPartitions as table cells that have a table cell above + // and below them + void SmoothTablePartitionRuns(); + + //////// Functions to create bounding boxes (ColSegment) objects for + //////// the columns on the page. The columns are not necessarily + //////// vertical lines, meaning if tab stops strongly suggests that + //////// a column changes horizontal position, as in the case below, + //////// The ColSegment objects will respect that after processing. + //////// + //////// _____________ + //////// Ex. | | | + //////// |_____|______| 5 boxes: 2 on this line + //////// | | | | 3 on this line + //////// |___|____|___| + //////// + + // Get Column segments from best_columns_ + void GetColumnBlocks(ColPartitionSet** columns, + ColSegment_LIST *col_segments); + + // Group Column segments into consecutive single column regions. + void GroupColumnBlocks(ColSegment_LIST *current_segments, + ColSegment_LIST *col_segments); + + // Check if two boxes are consecutive within the same column + bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2); + + // Set the ratio of candidate table partitions in each column + void SetColumnsType(ColSegment_LIST* col_segments); + + // Merge Column Blocks that were split due to the presence of a table + void GridMergeColumnBlocks(); + + //////// Functions to turn marked ColPartitions into candidate tables + //////// using a modified T-Recs++ algorithm described in + //////// Applying The T-Recs Table Recognition System + //////// To The Business Letter Domain (2001, Kieninger & Dengel) + //////// + + // Merge partititons cells into table columns + // Differs from paper by just looking at marked table partitions + // instead of similarity metric. + // Modified section 4.1 of paper. + void GetTableColumns(ColSegment_LIST *table_columns); + + // Finds regions within a column that potentially contain a table. + // Ie, the table columns from GetTableColumns are turned into boxes + // that span the entire page column (using ColumnBlocks found in + // earlier functions) in the x direction and the min/max extent of + // overlapping table columns in the y direction. + // Section 4.2 of paper. + void GetTableRegions(ColSegment_LIST *table_columns, + ColSegment_LIST *table_regions); + + + //////// Functions to "patch up" found tables + //////// + + // Merge table regions corresponding to tables spanning multiple columns + void GridMergeTableRegions(); + bool BelongToOneTable(const TBOX &box1, const TBOX &box2); + + // Adjust table boundaries by building a tight bounding box around all + // ColPartitions contained in it. + void AdjustTableBoundaries(); + + // Grows a table to include partitions that are partially covered + // by the table. This includes lines and text. It does not include + // noise or images. + // On entry, result_box is the minimum size of the result. The results of the + // function will union the actual result with result_box. + void GrowTableBox(const TBOX& table_box, TBOX* result_box); + // Grow a table by increasing the size of the box to include + // partitions with significant overlap with the table. + void GrowTableToIncludePartials(const TBOX& table_box, + const TBOX& search_range, + TBOX* result_box); + // Grow a table by expanding to the extents of significantly + // overlapping lines. + void GrowTableToIncludeLines(const TBOX& table_box, const TBOX& search_range, + TBOX* result_box); + // Checks whether the horizontal line belong to the table by looking at the + // side spacing of extra ColParitions that will be included in the table + // due to expansion + bool HLineBelongsToTable(const ColPartition& part, const TBOX& table_box); + + // Look for isolated column headers above the given table box and + // include them in the table + void IncludeLeftOutColumnHeaders(TBOX* table_box); + + // Remove false alarms consiting of a single column + void DeleteSingleColumnTables(); + + // Return true if at least one gap larger than the global x-height + // exists in the horizontal projection + bool GapInXProjection(int* xprojection, int length); + + //////// Recognize the tables. + //////// + // This function will run the table recognizer and try to find better + // bounding boxes. The structures of the tables never leave this function + // right now. It just tries to prune and merge tables based on info it + // has available. + void RecognizeTables(); + + //////// Debugging functions. Render different structures to GUI + //////// for visual debugging / intuition. + //////// + + // Displays Colpartitions marked as table row. Overlays them on top of + // part_grid_. + void DisplayColSegments(ScrollView* win, ColSegment_LIST *cols, + ScrollView::Color color); + + // Displays the colpartitions using a new coloring on an existing window. + // Note: This method is only for debug purpose during development and + // would not be part of checked in code + void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, + ScrollView::Color text_color, + ScrollView::Color table_color); + void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, + ScrollView::Color default_color); + void DisplayColPartitionConnections(ScrollView* win, + ColPartitionGrid* grid, + ScrollView::Color default_color); + void DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid, + ScrollView::Color color); + + // Write ColParitions and Tables to a PIX image + // Note: This method is only for debug purpose during development and + // would not be part of checked in code + void WriteToPix(const FCOORD& reskew); + + // Merge all colpartitions in table regions to make them a single + // colpartition and revert types of isolated table cells not + // assigned to any table to their original types. + void MakeTableBlocks(ColPartitionGrid* grid, + ColPartitionSet** columns, + WidthCallback* width_cb); + + ///////////////////////////////////////////////// + // Useful objects used during table find process. + ///////////////////////////////////////////////// + // Resolution of the connected components in ppi. + int resolution_; + // Estimate of median x-height over the page + int global_median_xheight_; + // Estimate of the median blob width on the page + int global_median_blob_width_; + // Estimate of median leading on the page + int global_median_ledding_; + // Grid to hold cleaned colpartitions after removing all + // colpartitions that consist of only noise blobs, and removing + // noise blobs from remaining colpartitions. + ColPartitionGrid clean_part_grid_; + // Grid contains the leaders and ruling lines. + ColPartitionGrid leader_and_ruling_grid_; + // Grid contains the broken down column partitions. It can be thought + // of as a "word" grid. However, it usually doesn't break apart text lines. + // It does break apart table data (most of the time). + ColPartitionGrid fragmented_text_grid_; + // Grid of page column blocks + ColSegmentGrid col_seg_grid_; + // Grid of detected tables + ColSegmentGrid table_grid_; + // The reading order of text. Defaults to true, for languages such as English. + bool left_to_right_language_; +}; } // namespace tesseract. diff --git a/textord/tablerecog.cpp b/textord/tablerecog.cpp new file mode 100644 index 0000000000..5f49584b13 --- /dev/null +++ b/textord/tablerecog.cpp @@ -0,0 +1,1058 @@ +/////////////////////////////////////////////////////////////////////// +// File: tablerecog.cpp +// Description: Helper class to help structure table areas. Given an bounding +// box from TableFinder, the TableRecognizer should give a +// StructuredTable (maybe a list in the future) of "good" tables +// in that area. +// Author: Nicholas Beato +// Created: Friday, Aug. 20, 2010 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "tablerecog.h" + +namespace tesseract { + +// The amount of space required between the ColPartitions in 2 columns +// of a non-lined table as a multiple of the median width. +const double kHorizontalSpacing = 0.30; +// The amount of space required between the ColPartitions in 2 rows +// of a non-lined table as multiples of the median height. +const double kVerticalSpacing = -0.2; +// The number of cells that the grid lines may intersect. +// See FindCellSplitLocations for explanation. +const int kCellSplitRowThreshold = 0; +const int kCellSplitColumnThreshold = 0; +// For "lined tables", the number of required lines. Currently a guess. +const int kLinedTableMinVerticalLines = 3; +const int kLinedTableMinHorizontalLines = 3; +// Number of columns required, as a fraction of the most columns found. +// None of these are tweaked at all. +const double kRequiredColumns = 0.7; +// The tolerance for comparing margins of potential tables. +const double kMarginFactor = 1.1; +// The first and last row should be consistent cell height. +// This factor is the first and last row cell height max. +const double kMaxRowSize = 2.5; +// Number of filled columns required to form a strong table row. +// For small tables, this is an absolute number. +const double kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }; +const int kGoodRowNumberOfColumnsSmallSize = + sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1; +// For large tables, it is a relative number +const double kGoodRowNumberOfColumnsLarge = 0.7; +// The amount of area that must be covered in a cell by ColPartitions to +// be considered "filled" +const double kMinFilledArea = 0.35; + +//////// +//////// StructuredTable Class +//////// + +StructuredTable::StructuredTable() + : text_grid_(NULL), + line_grid_(NULL), + is_lined_(false), + space_above_(0), + space_below_(0), + space_left_(0), + space_right_(0), + median_cell_height_(0), + median_cell_width_(0), + max_text_height_(MAX_INT32) { +} + +StructuredTable::~StructuredTable() { +} + +void StructuredTable::Init() { +} + +void StructuredTable::set_text_grid(ColPartitionGrid* text_grid) { + text_grid_ = text_grid; +} +void StructuredTable::set_line_grid(ColPartitionGrid* line_grid) { + line_grid_ = line_grid; +} +void StructuredTable::set_max_text_height(int height) { + max_text_height_ = height; +} +bool StructuredTable::is_lined() const { + return is_lined_; +} +int StructuredTable::row_count() const { + return cell_y_.length() == 0 ? 0 : cell_y_.length() - 1; +} +int StructuredTable::column_count() const { + return cell_x_.length() == 0 ? 0 : cell_x_.length() - 1; +} +int StructuredTable::cell_count() const { + return row_count() * column_count(); +} +void StructuredTable::set_bounding_box(const TBOX& box) { + bounding_box_ = box; +} +const TBOX& StructuredTable::bounding_box() const { + return bounding_box_; +} +int StructuredTable::median_cell_height() { + return median_cell_height_; +} +int StructuredTable::median_cell_width() { + return median_cell_width_; +} +int StructuredTable::row_height(int row) const { + ASSERT_HOST(0 <= row && row < row_count()); + return cell_y_[row + 1] - cell_y_[row]; +} +int StructuredTable::column_width(int column) const { + ASSERT_HOST(0 <= column && column < column_count()); + return cell_x_[column + 1] - cell_x_[column]; +} +int StructuredTable::space_above() const { + return space_above_; +} +int StructuredTable::space_below() const { + return space_below_; +} + +// At this point, we know that the lines are contained +// by the box (by FindLinesBoundingBox). +// So try to find the cell structure and make sure it works out. +// The assumption is that all lines span the table. If this +// assumption fails, the VerifyLinedTable method will +// abort the lined table. The TableRecognizer will fall +// back on FindWhitespacedStructure. +bool StructuredTable::FindLinedStructure() { + ClearStructure(); + + // Search for all of the lines in the current box. + // Update the cellular structure with the exact lines. + ColPartitionGridSearch box_search(line_grid_); + box_search.SetUniqueMode(true); + box_search.StartRectSearch(bounding_box_); + ColPartition* line = NULL; + + while ((line = box_search.NextRectSearch()) != NULL) { + if (line->IsHorizontalLine()) + cell_y_.push_back(line->MidY()); + if (line->IsVerticalLine()) + cell_x_.push_back(line->MidX()); + } + + // HasSignificantLines should guarantee cells. + // Because that code is a different class, just gracefully + // return false. This could be an assert. + if (cell_x_.length() < 3 || cell_y_.length() < 3) + return false; + + cell_x_.sort(); + cell_y_.sort(); + + // Remove duplicates that may have occurred due to split lines. + cell_x_.compact_sorted(); + cell_y_.compact_sorted(); + + // The border should be the extents of line boxes, not middle. + cell_x_[0] = bounding_box_.left(); + cell_x_[cell_x_.length() - 1] = bounding_box_.right(); + cell_y_[0] = bounding_box_.bottom(); + cell_y_[cell_y_.length() - 1] = bounding_box_.top(); + + // Remove duplicates that may have occurred due to moving the borders. + cell_x_.compact_sorted(); + cell_y_.compact_sorted(); + + CalculateMargins(); + CalculateStats(); + is_lined_ = VerifyLinedTableCells(); + return is_lined_; +} + +// Finds the cellular structure given a particular box. +bool StructuredTable::FindWhitespacedStructure() { + ClearStructure(); + FindWhitespacedColumns(); + FindWhitespacedRows(); + + if (!VerifyWhitespacedTable()) { + return false; + } else { + bounding_box_.set_left(cell_x_[0]); + bounding_box_.set_right(cell_x_[cell_x_.length() - 1]); + bounding_box_.set_bottom(cell_y_[0]); + bounding_box_.set_top(cell_y_[cell_y_.length() - 1]); + AbsorbNearbyLines(); + CalculateMargins(); + CalculateStats(); + return true; + } +} + +// Tests if a partition fits inside the table structure. +// Partitions must fully span a grid line in order to intersect it. +// This means that a partition does not intersect a line +// that it "just" touches. This is mainly because the assumption +// throughout the code is that "0" distance is a very very small space. +bool StructuredTable::DoesPartitionFit(const ColPartition& part) const { + const TBOX& box = part.bounding_box(); + for (int i = 0; i < cell_x_.length(); ++i) + if (box.left() < cell_x_[i] && cell_x_[i] < box.right()) + return false; + for (int i = 0; i < cell_y_.length(); ++i) + if (box.bottom() < cell_y_[i] && cell_y_[i] < box.top()) + return false; + return true; +} + +// Checks if a sub-table has multiple data cells filled. +int StructuredTable::CountFilledCells() { + return CountFilledCells(0, row_count() - 1, 0, column_count() - 1); +} +int StructuredTable::CountFilledCellsInRow(int row) { + return CountFilledCells(row, row, 0, column_count() - 1); +} +int StructuredTable::CountFilledCellsInColumn(int column) { + return CountFilledCells(0, row_count() - 1, column, column); +} +int StructuredTable::CountFilledCells(int row_start, int row_end, + int column_start, int column_end) { + ASSERT_HOST(0 <= row_start && row_start <= row_end && row_end < row_count()); + ASSERT_HOST(0 <= column_start && column_start <= column_end && + column_end < column_count()); + int cell_count = 0; + TBOX cell_box; + for (int row = row_start; row <= row_end; ++row) { + cell_box.set_bottom(cell_y_[row]); + cell_box.set_top(cell_y_[row + 1]); + for (int col = column_start; col <= column_end; ++col) { + cell_box.set_left(cell_x_[col]); + cell_box.set_right(cell_x_[col + 1]); + if (CountPartitions(cell_box) > 0) + ++cell_count; + } + } + return cell_count; +} + +// Makes sure that at least one cell in a row has substantial area filled. +// This can filter out large whitespace caused by growing tables too far +// and page numbers. +bool StructuredTable::VerifyRowFilled(int row) { + for (int i = 0; i < column_count(); ++i) { + double area_filled = CalculateCellFilledPercentage(row, i); + if (area_filled >= kMinFilledArea) + return true; + } + return false; +} + +// Finds the filled area in a cell. +// Assume ColPartitions do not overlap for simplicity (even though they do). +double StructuredTable::CalculateCellFilledPercentage(int row, int column) { + ASSERT_HOST(0 <= row && row <= row_count()); + ASSERT_HOST(0 <= column && column <= column_count()); + const TBOX kCellBox(cell_x_[column], cell_y_[row], + cell_x_[column + 1], cell_y_[row + 1]); + ASSERT_HOST(!kCellBox.null_box()); + + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(kCellBox); + double area_covered = 0; + ColPartition* text = NULL; + while ((text = gsearch.NextRectSearch()) != NULL) { + if (text->IsTextType()) + area_covered += text->bounding_box().intersection(kCellBox).area(); + } + return MIN(1.0, area_covered / kCellBox.area()); +} + +void StructuredTable::Display(ScrollView* window, ScrollView::Color color) { +#ifndef GRAPHICS_DISABLED + window->Brush(ScrollView::NONE); + window->Pen(color); + window->Rectangle(bounding_box_.left(), bounding_box_.bottom(), + bounding_box_.right(), bounding_box_.top()); + for (int i = 0; i < cell_x_.length(); i++) { + window->Line(cell_x_[i], bounding_box_.bottom(), + cell_x_[i], bounding_box_.top()); + } + for (int i = 0; i < cell_y_.length(); i++) { + window->Line(bounding_box_.left(), cell_y_[i], + bounding_box_.right(), cell_y_[i]); + } + window->UpdateWindow(); +#endif +} + +// Clear structure information. +void StructuredTable::ClearStructure() { + cell_x_.clear(); + cell_y_.clear(); + is_lined_ = false; + space_above_ = 0; + space_below_ = 0; + space_left_ = 0; + space_right_ = 0; + median_cell_height_ = 0; + median_cell_width_ = 0; +} + +// When a table has lines, the lines should not intersect any partitions. +// The following function makes sure the previous assumption is met. +bool StructuredTable::VerifyLinedTableCells() { + // Function only called when lines exist. + ASSERT_HOST(cell_y_.length() >= 2 && cell_x_.length() >= 2); + for (int i = 0; i < cell_y_.length(); ++i) { + if (CountHorizontalIntersections(cell_y_[i]) > 0) + return false; + } + for (int i = 0; i < cell_x_.length(); ++i) { + if (CountVerticalIntersections(cell_x_[i]) > 0) + return false; + } + return true; +} + +// TODO(nbeato): Could be much better than this. +// Examples: +// - Caclulate the percentage of filled cells. +// - Calculate the average number of ColPartitions per cell. +// - Calculate the number of cells per row with partitions. +// - Check if ColPartitions in adjacent cells are similar. +// - Check that all columns are at least a certain width. +// - etc. +bool StructuredTable::VerifyWhitespacedTable() { + // criteria for a table, must be at least 2x3 or 3x2 + return row_count() >= 2 && column_count() >= 2 && cell_count() >= 6; +} + +// Finds vertical splits in the ColPartitions of text_grid_ by considering +// all possible "good" guesses. A good guess is just the left/right sides of +// the partitions, since these locations will uniquely define where the +// extremal values where the splits can occur. The split happens +// in the middle of the two nearest partitions. +void StructuredTable::FindWhitespacedColumns() { + // Set of the extents of all partitions on the page. + GenericVectorEqEq left_sides; + GenericVectorEqEq right_sides; + + // Look at each text partition. We want to find the partitions + // that have extremal left/right sides. These will give us a basis + // for the table columns. + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(bounding_box_); + ColPartition* text = NULL; + while ((text = gsearch.NextRectSearch()) != NULL) { + if (!text->IsTextType()) + continue; + + ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right()); + int spacing = static_cast(text->median_width() * + kHorizontalSpacing / 2.0 + 0.5); + left_sides.push_back(text->bounding_box().left() - spacing); + right_sides.push_back(text->bounding_box().right() + spacing); + } + // It causes disaster below, so avoid it! + if (left_sides.length() == 0 || right_sides.length() == 0) + return; + + // Since data may be inserted in grid order, we sort the left/right sides. + left_sides.sort(); + right_sides.sort(); + + // At this point, in the "merged list", we expect to have a left side, + // followed by either more left sides or a right side. The last number + // should be a right side. We find places where the splits occur by looking + // for "valleys". If we want to force gap sizes or allow overlap, change + // the spacing above. If you want to let lines "slice" partitions as long + // as it is infrequent, change the following function. + FindCellSplitLocations(left_sides, right_sides, kCellSplitColumnThreshold, + &cell_x_); +} + +// Finds horizontal splits in the ColPartitions of text_grid_ by considering +// all possible "good" guesses. A good guess is just the bottom/top sides of +// the partitions, since these locations will uniquely define where the +// extremal values where the splits can occur. The split happens +// in the middle of the two nearest partitions. +void StructuredTable::FindWhitespacedRows() { + // Set of the extents of all partitions on the page. + GenericVectorEqEq bottom_sides; + GenericVectorEqEq top_sides; + // We will be "shrinking" partitions, so keep the min/max around to + // make sure the bottom/top lines do not intersect text. + int min_bottom = MAX_INT32; + int max_top = MIN_INT32; + + // Look at each text partition. We want to find the partitions + // that have extremal bottom/top sides. These will give us a basis + // for the table rows. Because the textlines can be skewed and close due + // to warping, the height of the partitions is toned down a little bit. + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(bounding_box_); + ColPartition* text = NULL; + while ((text = gsearch.NextRectSearch()) != NULL) { + if (!text->IsTextType()) + continue; + + ASSERT_HOST(text->bounding_box().bottom() < text->bounding_box().top()); + min_bottom = MIN(min_bottom, text->bounding_box().bottom()); + max_top = MAX(max_top, text->bounding_box().top()); + + // Ignore "tall" text partitions, as these are usually false positive + // vertical text or multiple lines pulled together. + if (text->bounding_box().height() > max_text_height_) + continue; + + int spacing = static_cast(text->bounding_box().height() * + kVerticalSpacing / 2.0 + 0.5); + int bottom = text->bounding_box().bottom() - spacing; + int top = text->bounding_box().top() + spacing; + // For horizontal text, the factor can be negative. This should + // probably cause a warning or failure. I haven't actually checked if + // it happens. + if (bottom >= top) + continue; + + bottom_sides.push_back(bottom); + top_sides.push_back(top); + } + // It causes disaster below, so avoid it! + if (bottom_sides.length() == 0 || top_sides.length() == 0) + return; + + // Since data may be inserted in grid order, we sort the bottom/top sides. + bottom_sides.sort(); + top_sides.sort(); + + // At this point, in the "merged list", we expect to have a bottom side, + // followed by either more bottom sides or a top side. The last number + // should be a top side. We find places where the splits occur by looking + // for "valleys". If we want to force gap sizes or allow overlap, change + // the spacing above. If you want to let lines "slice" partitions as long + // as it is infrequent, change the following function. + FindCellSplitLocations(bottom_sides, top_sides, kCellSplitRowThreshold, + &cell_y_); + + // Recover the min/max correctly since it was shifted. + cell_y_[0] = min_bottom; + cell_y_[cell_y_.length() - 1] = max_top; +} + +void StructuredTable::CalculateMargins() { + space_above_ = MAX_INT32; + space_below_ = MAX_INT32; + space_right_ = MAX_INT32; + space_left_ = MAX_INT32; + UpdateMargins(text_grid_); + UpdateMargins(line_grid_); +} +// Finds the nearest partition in grid to the table +// boundaries and updates the margin. +void StructuredTable::UpdateMargins(ColPartitionGrid* grid) { + int below = FindVerticalMargin(grid, bounding_box_.bottom(), true); + space_below_ = MIN(space_below_, below); + int above = FindVerticalMargin(grid, bounding_box_.top(), false); + space_above_ = MIN(space_above_, above); + int left = FindHorizontalMargin(grid, bounding_box_.left(), true); + space_left_ = MIN(space_left_, left); + int right = FindHorizontalMargin(grid, bounding_box_.right(), false); + space_right_ = MIN(space_right_, right); +} +int StructuredTable::FindVerticalMargin(ColPartitionGrid* grid, int border, + bool decrease) const { + ColPartitionGridSearch gsearch(grid); + gsearch.SetUniqueMode(true); + gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), + border); + ColPartition* part = NULL; + while ((part = gsearch.NextVerticalSearch(decrease)) != NULL) { + if (!part->IsTextType() && !part->IsHorizontalLine()) + continue; + int distance = decrease ? border - part->bounding_box().top() + : part->bounding_box().bottom() - border; + if (distance >= 0) + return distance; + } + return MAX_INT32; +} +int StructuredTable::FindHorizontalMargin(ColPartitionGrid* grid, int border, + bool decrease) const { + ColPartitionGridSearch gsearch(grid); + gsearch.SetUniqueMode(true); + gsearch.StartSideSearch(border, bounding_box_.bottom(), bounding_box_.top()); + ColPartition* part = NULL; + while ((part = gsearch.NextSideSearch(decrease)) != NULL) { + if (!part->IsTextType() && !part->IsVerticalLine()) + continue; + int distance = decrease ? border - part->bounding_box().right() + : part->bounding_box().left() - border; + if (distance >= 0) + return distance; + } + return MAX_INT32; +} + +void StructuredTable::CalculateStats() { + const int kMaxCellHeight = 1000; + const int kMaxCellWidth = 1000; + STATS height_stats(0, kMaxCellHeight + 1); + STATS width_stats(0, kMaxCellWidth + 1); + + for (int i = 0; i < row_count(); ++i) + height_stats.add(row_height(i), column_count()); + for (int i = 0; i < column_count(); ++i) + width_stats.add(column_width(i), row_count()); + + median_cell_height_ = static_cast(height_stats.median() + 0.5); + median_cell_width_ = static_cast(width_stats.median() + 0.5); +} + +// Looks for grid lines near the current bounding box and +// grows the bounding box to include them if no intersections +// will occur as a result. This is necessary because the margins +// are calculated relative to the closest line/text. If the +// line isn't absorbed, the margin will be the distance to the line. +void StructuredTable::AbsorbNearbyLines() { + ColPartitionGridSearch gsearch(line_grid_); + gsearch.SetUniqueMode(true); + + // Is the closest line above good? Loop multiple times for tables with + // multi-line (sometimes 2) borders. Limit the number of lines by + // making sure they stay within a table cell or so. + ColPartition* line = NULL; + gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), + bounding_box_.top()); + while ((line = gsearch.NextVerticalSearch(false)) != NULL) { + if (!line->IsHorizontalLine()) + break; + TBOX text_search(bounding_box_.left(), bounding_box_.top() + 1, + bounding_box_.right(), line->MidY()); + if (text_search.height() > median_cell_height_ * 2) + break; + if (CountPartitions(text_search) > 0) + break; + bounding_box_.set_top(line->MidY()); + } + // As above, is the closest line below good? + line = NULL; + gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), + bounding_box_.bottom()); + while ((line = gsearch.NextVerticalSearch(true)) != NULL) { + if (!line->IsHorizontalLine()) + break; + TBOX text_search(bounding_box_.left(), line->MidY(), + bounding_box_.right(), bounding_box_.bottom() - 1); + if (text_search.height() > median_cell_height_ * 2) + break; + if (CountPartitions(text_search) > 0) + break; + bounding_box_.set_bottom(line->MidY()); + } + // TODO(nbeato): vertical lines +} + + +// This function will find all "0 valleys" (of any length) given two +// arrays. The arrays are the mins and maxes of partitions (either +// left and right or bottom and top). Since the min/max lists are generated +// with pairs of increasing integers, we can make some assumptions in +// the function about ordering of the overall list, which are shown in the +// asserts. +// The algorithm works as follows: +// While there are numbers to process, take the smallest number. +// If it is from the min_list, increment the "hill" counter. +// Otherwise, decrement the "hill" counter. +// In the process of doing this, keep track of "crossing" the +// desired height. +// The first/last items are extremal values of the list and known. +// NOTE: This function assumes the lists are sorted! +void StructuredTable::FindCellSplitLocations(const GenericVector& min_list, + const GenericVector& max_list, + int max_merged, + GenericVector* locations) { + locations->clear(); + ASSERT_HOST(min_list.length() == max_list.length()); + if (min_list.length() == 0) + return; + ASSERT_HOST(min_list.get(0) < max_list.get(0)); + ASSERT_HOST(min_list.get(min_list.length() - 1) < + max_list.get(max_list.length() - 1)); + + locations->push_back(min_list.get(0)); + int min_index = 0; + int max_index = 0; + int stacked_partitions = 0; + int last_cross_position = MAX_INT32; + // max_index will expire after min_index. + // However, we can't "increase" the hill size if min_index expired. + // So finish processing when min_index expires. + while (min_index < min_list.length()) { + // Increase the hill count. + if (min_list[min_index] < max_list[max_index]) { + ++stacked_partitions; + if (last_cross_position != MAX_INT32 && + stacked_partitions > max_merged) { + int mid = (last_cross_position + min_list[min_index]) / 2; + locations->push_back(mid); + last_cross_position = MAX_INT32; + } + ++min_index; + } else { + // Decrease the hill count. + --stacked_partitions; + if (last_cross_position == MAX_INT32 && + stacked_partitions <= max_merged) { + last_cross_position = max_list[max_index]; + } + ++max_index; + } + } + locations->push_back(max_list.get(max_list.length() - 1)); +} + +// Counts the number of partitions in the table +// box that intersection the given x value. +int StructuredTable::CountVerticalIntersections(int x) { + int count = 0; + // Make a small box to keep the search time down. + const int kGridSize = text_grid_->gridsize(); + TBOX vertical_box = bounding_box_; + vertical_box.set_left(x - kGridSize); + vertical_box.set_right(x + kGridSize); + + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(vertical_box); + ColPartition* text = NULL; + while ((text = gsearch.NextRectSearch()) != NULL) { + if (!text->IsTextType()) + continue; + const TBOX& box = text->bounding_box(); + if (box.left() < x && x < box.right()) + ++count; + } + return count; +} + +// Counts the number of partitions in the table +// box that intersection the given y value. +int StructuredTable::CountHorizontalIntersections(int y) { + int count = 0; + // Make a small box to keep the search time down. + const int kGridSize = text_grid_->gridsize(); + TBOX horizontal_box = bounding_box_; + horizontal_box.set_bottom(y - kGridSize); + horizontal_box.set_top(y + kGridSize); + + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(horizontal_box); + ColPartition* text = NULL; + while ((text = gsearch.NextRectSearch()) != NULL) { + if (!text->IsTextType()) + continue; + + const TBOX& box = text->bounding_box(); + if (box.bottom() < y && y < box.top()) + ++count; + } + return count; +} + +// Counts how many text partitions are in this box. +// This is used to count partitons in cells, as that can indicate +// how "strong" a potential table row/colum (or even full table) actually is. +int StructuredTable::CountPartitions(const TBOX& box) { + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(box); + int count = 0; + ColPartition* text = NULL; + while ((text = gsearch.NextRectSearch()) != NULL) { + if (text->IsTextType()) + ++count; + } + return count; +} + +//////// +//////// TableRecognizer Class +//////// + +TableRecognizer::TableRecognizer() + : text_grid_(NULL), + line_grid_(NULL), + min_height_(0), + min_width_(0), + max_text_height_(MAX_INT32) { +} + +TableRecognizer::~TableRecognizer() { +} + +void TableRecognizer::Init() { +} + +void TableRecognizer::set_text_grid(ColPartitionGrid* text_grid) { + text_grid_ = text_grid; +} +void TableRecognizer::set_line_grid(ColPartitionGrid* line_grid) { + line_grid_ = line_grid; +} +void TableRecognizer::set_min_height(int height) { + min_height_ = height; +} +void TableRecognizer::set_min_width(int width) { + min_width_ = width; +} +void TableRecognizer::set_max_text_height(int height) { + max_text_height_ = height; +} + +StructuredTable* TableRecognizer::RecognizeTable(const TBOX& guess) { + StructuredTable* table = new StructuredTable(); + table->Init(); + table->set_text_grid(text_grid_); + table->set_line_grid(line_grid_); + table->set_max_text_height(max_text_height_); + + // Try to solve ths simple case, a table with *both* + // vertical and horizontal lines. + if (RecognizeLinedTable(guess, table)) + return table; + + // Fallback to whitespace if that failed. + // TODO(nbeato): Break this apart to take advantage of horizontal + // lines or vertical lines when present. + if (RecognizeWhitespacedTable(guess, table)) + return table; + + // No table found... + delete table; + return NULL; +} + +bool TableRecognizer::RecognizeLinedTable(const TBOX& guess_box, + StructuredTable* table) { + if (!HasSignificantLines(guess_box)) + return false; + TBOX line_bound = guess_box; + if (!FindLinesBoundingBox(&line_bound)) + return false; + table->set_bounding_box(line_bound); + return table->FindLinedStructure(); +} + +// Quick implementation. Just count the number of lines in the box. +// A better implementation would counter intersections and look for connected +// components. It could even go as far as finding similar length lines. +// To account for these possible issues, the VerifyLinedTableCells function +// will reject lined tables that cause intersections with text on the page. +// TODO(nbeato): look for "better" lines +bool TableRecognizer::HasSignificantLines(const TBOX& guess) { + ColPartitionGridSearch box_search(line_grid_); + box_search.SetUniqueMode(true); + box_search.StartRectSearch(guess); + ColPartition* line = NULL; + int vertical_count = 0; + int horizontal_count = 0; + + while ((line = box_search.NextRectSearch()) != NULL) { + if (line->IsHorizontalLine()) + ++horizontal_count; + if (line->IsVerticalLine()) + ++vertical_count; + } + + return vertical_count >= kLinedTableMinVerticalLines && + horizontal_count >= kLinedTableMinHorizontalLines; +} + +// Given a bounding box with a bunch of horizontal / vertical lines, +// we just find the extents of all of these lines iteratively. +// The box will be at least as large as guess. This +// could possibly be a bad assumption. +// It is guaranteed to halt in at least O(n * gridarea) where n +// is the number of lines. +// The assumption is that growing the box iteratively will add lines +// several times, but eventually we'll find the extents. +// +// For tables, the approach is a bit aggressive, a single line (which could be +// noise or a column ruling) can destroy the table inside. +// +// TODO(nbeato): This is a quick first implementation. +// A better implementation would actually look for consistency +// in extents of the lines and find the extents using lines +// that clearly describe the table. This would allow the +// lines to "vote" for height/width. An approach like +// this would solve issues with page layout rulings. +// I haven't looked for these issues yet, so I can't even +// say they happen confidently. +bool TableRecognizer::FindLinesBoundingBox(TBOX* bounding_box) { + // The first iteration will tell us if there are lines + // present and shrink the box to a minimal iterative size. + if (!FindLinesBoundingBoxIteration(bounding_box)) + return false; + + // Keep growing until the area of the table stabilizes. + // The box can only get bigger, increasing area. + bool changed = true; + while (changed) { + changed = false; + int old_area = bounding_box->area(); + bool check = FindLinesBoundingBoxIteration(bounding_box); + // At this point, the function will return true. + ASSERT_HOST(check); + ASSERT_HOST(bounding_box->area() >= old_area); + changed = (bounding_box->area() > old_area); + } + + return true; +} + +bool TableRecognizer::FindLinesBoundingBoxIteration(TBOX* bounding_box) { + // Search for all of the lines in the current box, keeping track of extents. + ColPartitionGridSearch box_search(line_grid_); + box_search.SetUniqueMode(true); + box_search.StartRectSearch(*bounding_box); + ColPartition* line = NULL; + bool first_line = true; + + while ((line = box_search.NextRectSearch()) != NULL) { + if (line->IsLineType()) { + if (first_line) { + // The first iteration can shrink the box. + *bounding_box = line->bounding_box(); + first_line = false; + } else { + *bounding_box += line->bounding_box(); + } + } + } + return !first_line; +} + +// The goal of this function is to move the table boundaries around and find +// a table that maximizes the whitespace around the table while maximizing +// the cellular structure. As a result, it gets confused by headers, footers, +// and merged columns (text that crosses columns). There is a tolerance +// that allows a few partitions to count towards potential cell merges. +// It's the max_merged parameter to FindPartitionLocations. +// It can work, but it needs some false positive remove on boundaries. +// For now, the grid structure must not intersect any partitions. +// Also, small tolerance is added to the horizontal lines for tightly packed +// tables. The tolerance is added by adjusting the bounding boxes of the +// partitions (in FindHorizontalPartitions). The current implementation +// only adjusts the vertical extents of the table. +// +// Also note. This was hacked at a lot. It could probably use some +// more hacking at to find a good set of border conditions and then a +// nice clean up. +bool TableRecognizer::RecognizeWhitespacedTable(const TBOX& guess_box, + StructuredTable* table) { + TBOX best_box = guess_box; // Best borders known. + int best_below = 0; // Margin size above best table. + int best_above = 0; // Margin size below best table. + TBOX adjusted = guess_box; // The search box. + + // We assume that the guess box is somewhat accurate, so we don't allow + // the adjusted border to pass half of the guessed area. This prevents + // "negative" tables from forming. + const int kMidGuessY = (guess_box.bottom() + guess_box.top()) / 2; + // Keeps track of the most columns in an accepted table. The resulting table + // may be less than the max, but we don't want to stray too far. + int best_cols = 0; + // Make sure we find a good border. + bool found_good_border = false; + + // Find the bottom of the table by trying a few different locations. For + // each location, the top, left, and right are fixed. We start the search + // in a smaller table to favor best_cols getting a good estimate sooner. + int last_bottom = MAX_INT32; + int bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(), + kMidGuessY - min_height_ / 2, true); + int top = NextHorizontalSplit(guess_box.left(), guess_box.right(), + kMidGuessY + min_height_ / 2, false); + adjusted.set_top(top); + + // Headers/footers can be spaced far from everything. + // Make sure that the space below is greater than the space above + // the lowest row. + int previous_below = 0; + const int kMaxChances = 10; + int chances = kMaxChances; + while (bottom != last_bottom) { + adjusted.set_bottom(bottom); + + if (adjusted.height() >= min_height_) { + // Try to fit the grid on the current box. We give it a chance + // if the number of columns didn't significantly drop. + table->set_bounding_box(adjusted); + if (table->FindWhitespacedStructure() && + table->column_count() >= best_cols * kRequiredColumns) { + if (false && IsWeakTableRow(table, 0)) { + // Currently buggy, but was looking promising so disabled. + --chances; + } else { + // We favor 2 things, + // 1- Adding rows that have partitioned data. + // 2- Better margins (to find header/footer). + // For better tables, we just look for multiple cells in the + // bottom row with data in them. + // For margins, the space below the last row should + // be better than a table with the last row removed. + chances = kMaxChances; + double max_row_height = kMaxRowSize * table->median_cell_height(); + if ((table->space_below() * kMarginFactor >= best_below && + table->space_below() >= previous_below) || + (table->CountFilledCellsInRow(0) > 1 && + table->row_height(0) < max_row_height)) { + best_box.set_bottom(bottom); + best_below = table->space_below(); + best_cols = MAX(table->column_count(), best_cols); + found_good_border = true; + } + } + previous_below = table->space_below(); + } else { + --chances; + } + } + if (chances <= 0) + break; + + last_bottom = bottom; + bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(), + last_bottom, true); + } + if (!found_good_border) + return false; + + // TODO(nbeato) comments: follow modified code above... put it in a function! + found_good_border = false; + int last_top = MIN_INT32; + top = NextHorizontalSplit(guess_box.left(), guess_box.right(), + kMidGuessY + min_height_ / 2, false); + int previous_above = 0; + chances = kMaxChances; + + adjusted.set_bottom(best_box.bottom()); + while (last_top != top) { + adjusted.set_top(top); + if (adjusted.height() >= min_height_) { + table->set_bounding_box(adjusted); + if (table->FindWhitespacedStructure() && + table->column_count() >= best_cols * kRequiredColumns) { + int last_row = table->row_count() - 1; + if (false && IsWeakTableRow(table, last_row)) { + // Currently buggy, but was looking promising so disabled. + --chances; + } else { + chances = kMaxChances; + double max_row_height = kMaxRowSize * table->median_cell_height(); + if ((table->space_above() * kMarginFactor >= best_above && + table->space_above() >= previous_above) || + (table->CountFilledCellsInRow(last_row) > 1 && + table->row_height(last_row) < max_row_height)) { + best_box.set_top(top); + best_above = table->space_above(); + best_cols = MAX(table->column_count(), best_cols); + found_good_border = true; + } + } + previous_above = table->space_above(); + } else { + --chances; + } + } + if (chances <= 0) + break; + + last_top = top; + top = NextHorizontalSplit(guess_box.left(), guess_box.right(), + last_top, false); + } + + if (!found_good_border) + return false; + + // If we get here, this shouldn't happen. It can be an assert, but + // I haven't tested it enough to make it crash things. + if (best_box.null_box()) + return false; + + // Given the best locations, fit the box to those locations. + table->set_bounding_box(best_box); + return table->FindWhitespacedStructure(); +} + +// Finds the closest value to y that can safely cause a horizontal +// split in the partitions. +// This function has been buggy and not as reliable as I would've +// liked. I suggest finding all of the splits using the +// FindPartitionLocations once and then just keeping the results +// of that function cached somewhere. +int TableRecognizer::NextHorizontalSplit(int left, int right, int y, + bool top_to_bottom) { + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartVerticalSearch(left, right, y); + ColPartition* text = NULL; + int last_y = y; + while ((text = gsearch.NextVerticalSearch(top_to_bottom)) != NULL) { + if (!text->IsTextType() || !text->IsHorizontalType()) + continue; + if (text->bounding_box().height() > max_text_height_) + continue; + + const TBOX& text_box = text->bounding_box(); + if (top_to_bottom && (last_y >= y || last_y <= text_box.top())) { + last_y = MIN(last_y, text_box.bottom()); + continue; + } + if (!top_to_bottom && (last_y <= y || last_y >= text_box.bottom())) { + last_y = MAX(last_y, text_box.top()); + continue; + } + + return last_y; + } + // If none is found, we at least want to preserve the min/max, + // which defines the overlap of y with the last partition in the grid. + return last_y; +} + +// Code is buggy right now. It is disabled in the calling function. +// It seems like sometimes the row that is passed in is not correct +// sometimes (like a phantom row is introduced). There's something going +// on in the cell_y_ data member before this is called... not certain. +bool TableRecognizer::IsWeakTableRow(StructuredTable* table, int row) { + if (!table->VerifyRowFilled(row)) + return false; + + double threshold = 0.0; + if (table->column_count() > kGoodRowNumberOfColumnsSmallSize) + threshold = table->column_count() * kGoodRowNumberOfColumnsLarge; + else + threshold = kGoodRowNumberOfColumnsSmall[table->column_count()]; + + return table->CountFilledCellsInRow(row) < threshold; +} + +} // namespace tesseract diff --git a/textord/tablerecog.h b/textord/tablerecog.h new file mode 100644 index 0000000000..d83e96aff4 --- /dev/null +++ b/textord/tablerecog.h @@ -0,0 +1,378 @@ +/////////////////////////////////////////////////////////////////////// +// File: tablerecog.h +// Description: Functions to detect structure of tables. +// Author: Nicholas Beato +// Created: Aug 17, 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TABLERECOG_H_ +#define TABLERECOG_H_ + +#include "colpartitiongrid.h" +#include "genericvector.h" + +namespace tesseract { + +// There are 2 classes in this file. They have 2 different purposes. +// - StructuredTable contains the methods to find the structure given +// a specific bounding box and grow that structure. +// - TableRecognizer contains the methods to adjust the possible positions +// of a table without worrying about structure. +// +// To use these classes, the assumption is that the TableFinder will +// have a guess of the location of a table (or possibly over/undersegmented +// tables). The TableRecognizer is responsible for finding the table boundaries +// at a high level. The StructuredTable class is responsible for determining +// the structure of the table and trying to maximize its bounds while retaining +// the structure. +// (The latter part is not implemented yet, but that was the goal). +// +// While on the boundary discussion, keep in mind that this is a first pass. +// There should eventually be some things like internal structure checks, +// and, more importantly, surrounding text flow checks. +// + +// Usage: +// The StructuredTable class contains methods to query a potential table. +// It has functions to find structure, count rows, find ColPartitions that +// intersect gridlines, etc. It is not meant to blindly find a table. It +// is meant to start with a known table location and enhance it. +// Usage: +// ColPartitionGrid text_grid, line_grid; // init +// TBOX table_box; // known location of table location +// +// StructuredTable table; +// table.Init(); // construction code +// table.set_text_grid(/* text */); // These 2 grids can be the same! +// table.set_line_grid(/* lines */); +// table.set_min_text_height(10); // Filter vertical and tall text. +// // IMPORTANT! The table needs to be told where it is! +// table.set_bounding_box(table_box); // Set initial table location. +// if (table.FindWhitespacedStructure()) { +// // process table +// table.column_count(); // number of columns +// table.row_count(); // number of rows +// table.cells_count(); // number of cells +// table.bounding_box(); // updated bounding box +// // etc. +// } +// +class StructuredTable { + public: + StructuredTable(); + ~StructuredTable(); + + // Initialization code. Must be called after the constructor. + void Init(); + + // Sets the grids used by the table. These can be changed between + // calls to Recognize. They are treated as read-only data. + void set_text_grid(ColPartitionGrid* text); + void set_line_grid(ColPartitionGrid* lines); + // Filters text partitions that are ridiculously tall to prevent + // merging rows. + void set_max_text_height(int height); + + // Basic accessors. Some are treated as attributes despite having indirect + // representation. + bool is_lined() const; + int row_count() const; + int column_count() const; + int cell_count() const; + void set_bounding_box(const TBOX& box); + const TBOX& bounding_box() const; + int median_cell_height(); + int median_cell_width(); + int row_height(int row) const; + int column_width(int column) const; + int space_above() const; + int space_below() const; + + // Given enough horizontal and vertical lines in a region, create this table + // based on the structure given by the lines. Return true if it worked out. + // Code assumes the lines exist. It is the caller's responsibility to check + // for lines and find an appropriate bounding box. + bool FindLinedStructure(); + + // The main subroutine for finding generic table structure. The function + // finds the grid structure in the given box. Returns true if a good grid + // exists, implying that "this" table is valid. + bool FindWhitespacedStructure(); + + //////// + //////// Functions to query table info. + //////// + + // Returns true if inserting part into the table does not cause any + // cell merges. + bool DoesPartitionFit(const ColPartition& part) const; + // Checks if a sub-table has multiple data cells filled. + int CountFilledCells(); + int CountFilledCellsInRow(int row); + int CountFilledCellsInColumn(int column); + int CountFilledCells(int row_start, int row_end, + int column_start, int column_end); + + // Makes sure that at least one cell in a row has substantial area filled. + // This can filter out large whitespace caused by growing tables too far + // and page numbers. + // (currently bugged for some reason). + bool VerifyRowFilled(int row); + // Finds the filled area in a cell. + double CalculateCellFilledPercentage(int row, int column); + + // Debug display, draws the table in the given color. If the table is not + // valid, the table and "best" grid lines are still drawn in the given color. + void Display(ScrollView* window, ScrollView::Color color); + + protected: + // Clear the structure information. + void ClearStructure(); + + //////// + //////// Lined tables + //////// + + // Verifies the lines do not intersect partitions. This happens when + // the lines are in column boundaries and extend the full page. As a result, + // the grid lines go through column text. The condition is detectable. + bool VerifyLinedTableCells(); + + //////// + //////// Tables with whitespace + //////// + + // This is the function to change if you want to filter resulting tables + // better. Right now it just checks for a minimum cell count and such. + // You could add things like maximum number of ColPartitions per cell or + // similar. + bool VerifyWhitespacedTable(); + // Find the columns of a table using whitespace. + void FindWhitespacedColumns(); + // Find the rows of a table using whitespace. + void FindWhitespacedRows(); + + //////// + //////// Functions to provide information about the table. + //////// + + // Calculates the whitespace around the table using the table boundary and + // the supplied grids (set_text_grid and set_line_grid). + void CalculateMargins(); + // Update the table margins with the supplied grid. This is + // only called by calculate margins to use multiple grid sources. + void UpdateMargins(ColPartitionGrid* grid); + int FindVerticalMargin(ColPartitionGrid* grid, int start_x, + bool decrease) const; + int FindHorizontalMargin(ColPartitionGrid* grid, int start_y, + bool decrease) const; + // Calculates stats on the table, namely the median cell height and width. + void CalculateStats(); + + //////// + //////// Functions to try to "fix" some table errors. + //////// + + // Given a whitespaced table, this looks for bordering lines that might + // be page layout boxes around the table. It is necessary to get the margins + // correct on the table. If the lines are not joined, the margins will be + // the distance to the line, which is not right. + void AbsorbNearbyLines(); + + // Nice utility function for finding partition gaps. You feed it a sorted + // list of all of the mins/maxes of the partitions in the table, and it gives + // you the gaps (middle). This works for both vertical and horizontal + // gaps. + // + // If you want to allow slight overlap in the division and the partitions, + // just scale down the partitions before inserting them in the list. + // Likewise, you can force at least some space between partitions. + // This trick is how the horizontal partitions are done (since the page + // skew could make it hard to find splits in the text). + // + // As a result, "0 distance" between closest partitions causes a gap. + // This is not a programmatic assumption. It is intentional and simplifies + // things. + // + // "max_merged" indicates both the minimum number of stacked partitions + // to cause a cell (add 1 to it), and the maximum number of partitions that + // a grid line can intersect. For example, if max_merged is 0, then lines + // are inserted wherever space exists between partitions. If it is 2, + // lines may intersect 2 partitions at most, but you also need at least + // 2 partitions to generate a line. + static void FindCellSplitLocations(const GenericVector& min_list, + const GenericVector& max_list, + int max_merged, + GenericVector* locations); + + //////// + //////// Utility function for table queries + //////// + + // Counts the number of ColPartitions that intersect vertical cell + // division at this x value. Used by VerifyLinedTable. + int CountVerticalIntersections(int x); + int CountHorizontalIntersections(int y); + + // Counts how many text partitions are in this box. + int CountPartitions(const TBOX& box); + + //////// + //////// Data members. + //////// + + // Input data, used as read only data to make decisions. + ColPartitionGrid* text_grid_; // Text ColPartitions + ColPartitionGrid* line_grid_; // Line ColPartitions + // Table structure. + // bounding box is a convenient external representation. + // cell_x_ and cell_y_ indicate the grid lines. + TBOX bounding_box_; // Bounding box + GenericVectorEqEq cell_x_; // Locations of vertical divisions (sorted) + GenericVectorEqEq cell_y_; // Locations of horizontal divisions (sorted) + bool is_lined_; // Is the table backed up by a line structure + // Table margins, set via CalculateMargins + int space_above_; + int space_below_; + int space_left_; + int space_right_; + int median_cell_height_; + int median_cell_width_; + // Filters, used to prevent awkward partitions from destroying structure. + int max_text_height_; +}; + +class TableRecognizer { + public: + TableRecognizer(); + ~TableRecognizer(); + + // Initialization code. Must be called after the constructor. + void Init(); + + //////// + //////// Pre-recognize methods to initial table constraints. + //////// + + // Sets the grids used by the table. These can be changed between + // calls to Recognize. They are treated as read-only data. + void set_text_grid(ColPartitionGrid* text); + void set_line_grid(ColPartitionGrid* lines); + // Sets some additional constraints on the table. + void set_min_height(int height); + void set_min_width(int width); + // Filters text partitions that are ridiculously tall to prevent + // merging rows. Note that "filters" refers to allowing horizontal + // cells to slice through them on the premise that they were + // merged text rows during previous layout. + void set_max_text_height(int height); + + // Given a guess location, the RecognizeTable function will try to find a + // structured grid in the area. On success, it will return a new + // StructuredTable (and assumes you will delete it). Otherwise, + // NULL is returned. + // + // Keep in mind, this may "overgrow" or "undergrow" the size of guess. + // Ideally, there is a either a one-to-one correspondence between + // the guess and table or no table at all. This is not the best of + // assumptions right now, but was made to try to keep things simple in + // the first pass. + // + // If a line structure is available on the page in the given region, + // the table will use the linear structure as it is. + // Otherwise, it will try to maximize the whitespace around it while keeping + // a grid structure. This is somewhat working. + // + // Since the combination of adjustments can get high, effort was + // originally made to keep the number of adjustments linear in the number + // of partitions. The underlying structure finding code used to be + // much more complex. I don't know how necessary this constraint is anymore. + // The evaluation of a possible table is kept within O(nlogn) in the size of + // the table (where size is the number of partitions in the table). + // As a result, the algorithm is capable of O(n^2 log n). Depending + // on the grid search size, it may be higher. + // + // Last note: it is possible to just try all partition boundaries at a high + // level O(n^4) and do a verification scheme (at least O(nlogn)). If there + // area 200 partitions on a page, this could be too costly. Effort could go + // into pruning the search, but I opted for something quicker. I'm confident + // that the independent adjustments can get similar results and keep the + // complextiy down. However, the other approach could work without using + // TableFinder at all if it is fast enough. It comes down to properly + // deciding what is a table. The code currently relies on TableFinder's + // guess to the location of a table for that. + StructuredTable* RecognizeTable(const TBOX& guess_box); + + protected: + //////// + //////// Lined tables + //////// + + // Returns true if the given box has a lined table within it. The + // table argument will be updated with the table if the table exists. + bool RecognizeLinedTable(const TBOX& guess_box, StructuredTable* table); + // Returns true if the given box has a large number of horizontal and + // vertical lines present. If so, we assume the extent of these lines + // uniquely defines a table and find that table via SolveLinedTable. + bool HasSignificantLines(const TBOX& guess); + + // Given enough horizontal and vertical lines in a region, find a bounding + // box that encloses all of them (as well as newly introduced lines). + // The bounding box is the smallest box that encloses the lines in guess + // without having any lines sticking out of it. + // bounding_box is an in/out parameter. + // On input, it in the extents of the box to search. + // On output, it is the resulting bounding box. + bool FindLinesBoundingBox(TBOX* bounding_box); + // Iteration in above search. + // bounding_box is an in/out parameter. + // On input, it in the extents of the box to search. + // On output, it is the resulting bounding box. + bool FindLinesBoundingBoxIteration(TBOX* bounding_box); + + //////// + //////// Generic "whitespaced" tables + //////// + + // Returns true if the given box has a whitespaced table within it. The + // table argument will be updated if the table exists. Also note + // that this method will fail if the guess_box center is not + // mostly within the table. + bool RecognizeWhitespacedTable(const TBOX& guess_box, StructuredTable* table); + + // Finds the location of a horizontal split relative to y. + // This function is mostly unused now. If the SolveWhitespacedTable + // changes much, it can be removed. Note, it isn't really as reliable + // as I thought. I went with alternatives for most of the other uses. + int NextHorizontalSplit(int left, int right, int y, bool top_to_bottom); + + // Indicates that a table row is weak. This means that it has + // many missing data cells or very large cell heights compared. + // to the rest of the table. + static bool IsWeakTableRow(StructuredTable* table, int row); + + // Input data, used as read only data to make decisions. + ColPartitionGrid* text_grid_; // Text ColPartitions + ColPartitionGrid* line_grid_; // Line ColPartitions + // Table constraints, a "good" table must satisfy these. + int min_height_; + int min_width_; + // Filters, used to prevent awkward partitions from destroying structure. + int max_text_height_; // Horizontal lines may intersect taller text. +}; + +} // namespace tesseract + +#endif /* TABLERECOG_H_ */ diff --git a/textord/tabvector.cpp b/textord/tabvector.cpp index b16d3ce7cd..d98d8350e2 100644 --- a/textord/tabvector.cpp +++ b/textord/tabvector.cpp @@ -47,8 +47,12 @@ const int kSimilarRaggedDist = 50; const int kMaxFillinMultiple = 11; // Min fraction of mean gutter size to allow a gutter on a good tab blob. const double kMinGutterFraction = 0.5; -// Max fraction of mean blob width allowed for vertical gaps in vertical text. -const double kVerticalTextGapFraction = 0.5; + +double_VAR(textord_tabvector_vertical_gap_fraction, 0.5, + "max fraction of mean blob width allowed for vertical gaps in vertical text"); + +double_VAR(textord_tabvector_vertical_box_ratio, 0.5, + "Fraction of box matches required to declare a line vertical"); ELISTIZE(TabConstraint) @@ -194,7 +198,7 @@ TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical, TabVector::TabVector(const TabVector& src, TabAlignment alignment, const ICOORD& vertical_skew, BLOBNBOX* blob) : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_), - sort_key_(0), percent_score_(0), + sort_key_(0), percent_score_(0), mean_width_(0), needs_refit_(true), needs_evaluation_(true), alignment_(alignment), top_constraints_(NULL), bottom_constraints_(NULL) { BLOBNBOX_C_IT it(&boxes_); @@ -214,6 +218,21 @@ TabVector::TabVector(const TabVector& src, TabAlignment alignment, Print("Constructed a new tab vector:"); } +// Copies basic attributes of a tab vector for simple operations. +// Copies things such startpt, endpt, range. +// Does not copy things such as partners, boxes, or constraints. +// This is useful if you only need vector information for processing, such +// as in the table detection code. +TabVector* TabVector::ShallowCopy() const { + TabVector* copy = new TabVector(); + copy->startpt_ = startpt_; + copy->endpt_ = endpt_; + copy->alignment_ = alignment_; + copy->extended_ymax_ = extended_ymax_; + copy->extended_ymin_ = extended_ymin_; + return copy; +} + // Extend this vector to include the supplied blob if it doesn't // already have it. void TabVector::ExtendToBox(BLOBNBOX* new_blob) { @@ -250,10 +269,18 @@ void TabVector::SetYEnd(int end_y) { endpt_.set_y(end_y); } -// Rotate the ends by the given vector. +// Rotate the ends by the given vector. Auto flip start and end if needed. void TabVector::Rotate(const FCOORD& rotation) { startpt_.rotate(rotation); endpt_.rotate(rotation); + int dx = endpt_.x() - startpt_.x(); + int dy = endpt_.y() - startpt_.y(); + if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) { + // Need to flip start/end. + ICOORD tmp = startpt_; + startpt_ = endpt_; + endpt_ = tmp; + } } // Setup the initial constraints, being the limits of @@ -488,10 +515,11 @@ void TabVector::Print(const char* prefix) { if (this == NULL) { tprintf("%s \n", prefix); } else { - tprintf("%s %s (%d,%d)->(%d,%d) s=%d, sort key=%d, boxes=%d, partners=%d\n", + tprintf("%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," + " partners=%d\n", prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(), - percent_score_, sort_key_, + mean_width_, percent_score_, sort_key_, boxes_.length(), partners_.length()); } } @@ -550,6 +578,7 @@ void TabVector::FitAndEvaluateIfNeeded(const ICOORD& vertical, // A second pass then further filters boxes by requiring that the gutter // width be a minimum fraction of the mean gutter along the line. void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { + bool debug = false; needs_evaluation_ = false; int length = endpt_.y() - startpt_.y(); if (length == 0 || boxes_.empty()) { @@ -582,6 +611,11 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { BLOBNBOX* bbox = it.data(); const TBOX& box = bbox->bounding_box(); int mid_y = (box.top() + box.bottom()) / 2; + if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) { + if (!debug) + Print("Starting evaluation"); + debug = true; + } // A good box is one where the nearest neighbour on the inside is closer // than half the distance to the nearest neighbour on the outside // (of the putative column). @@ -617,10 +651,11 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { double size2 = sqrt(static_cast(box.area())); if (vertical_gap < kMaxFillinMultiple * MIN(size1, size2)) good_length += vertical_gap; - if (TabFind::WithinTestRegion(2, tab_x, mid_y)) + if (debug) { tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n", vertical_gap, kMaxFillinMultiple * MIN(size1, size2), good_length); + } } else { // Adjust the start to the first good box. SetYStart(box.bottom()); @@ -628,7 +663,7 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { prev_good_box = &box; } else { // Get rid of boxes that are not good. - if (TabFind::WithinTestRegion(2, tab_x, mid_y)) { + if (debug) { tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", box.left(), box.bottom(), box.right(), box.top(), gutter_width, neighbour_gap); @@ -637,6 +672,9 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { deleted_a_box = true; } } + if (debug) { + Print("Evaluating:"); + } // If there are any good boxes, do it again, except this time get rid of // boxes that have a gutter that is a small fraction of the mean gutter. // This filters out ends that run into a coincidental gap in the text. @@ -697,14 +735,23 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { } // (Re)Fit a line to the stored points. Returns false if the line -// is degenerate. +// is degenerate. Althougth the TabVector code mostly doesn't care about the +// direction of lines, XAtY would give silly results for a horizontal line. +// The class is mostly aimed at use for vertical lines representing +// horizontal tab stops. bool TabVector::Fit(ICOORD vertical, bool force_parallel) { needs_refit_ = false; - if (boxes_.empty() && !force_parallel) { + if (boxes_.empty()) { // Don't refit something with no boxes, as that only happens // in Evaluate, and we don't want to end up with a zero vector. - // If we are forcing parallel, then that is OK. - return false; + if (!force_parallel) + return false; + // If we are forcing parallel, then we just need to set the sort_key_. + ICOORD midpt = startpt_; + midpt += endpt_; + midpt /= 2; + sort_key_ = SortKey(vertical, midpt.x(), midpt.y()); + return startpt_.y() != endpt_.y(); } if (!force_parallel && !IsRagged()) { // Use a fitted line as the vertical. @@ -734,9 +781,13 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) { BLOBNBOX_C_IT it(&boxes_); // Choose a line parallel to the vertical such that all boxes are on the // correct side of it. + mean_width_ = 0; + int width_count = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); TBOX box = bbox->bounding_box(); + mean_width_ += box.width(); + ++width_count; int x1 = IsRightTab() ? box.right() : box.left(); // Test both the bottom and the top, as one will be more extreme, depending // on the direction of skew. @@ -757,11 +808,8 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) { if (it.at_last()) end_y = top_y; } - if (boxes_.empty()) { - ICOORD midpt = startpt_; - midpt += endpt_; - midpt /= 2; - sort_key_ = SortKey(vertical, midpt.x(), midpt.y()); + if (width_count > 0) { + mean_width_ = (mean_width_ + width_count - 1) / width_count; } endpt_ = startpt_ + vertical; needs_evaluation_ = true; @@ -776,6 +824,15 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) { return false; } +// Returns the singleton partner if there is one, or NULL otherwise. +TabVector* TabVector::GetSinglePartner() { + if (!partners_.singleton()) + return NULL; + TabVector_C_IT partner_it(&partners_); + TabVector* partner = partner_it.data(); + return partner; +} + // Return the partner of this TabVector if the vector qualifies as // being a vertical text line, otherwise NULL. TabVector* TabVector::VerticalTextlinePartner() { @@ -787,6 +844,10 @@ TabVector* TabVector::VerticalTextlinePartner() { BLOBNBOX_C_IT box_it2(&partner->boxes_); // Count how many boxes are also in the other list. // At the same time, gather the mean width and median vertical gap. + if (textord_debug_tabfind > 1) { + Print("Testing for vertical text"); + partner->Print(" partner"); + } int num_matched = 0; int num_unmatched = 0; int total_widths = 0; @@ -815,33 +876,27 @@ TabVector* TabVector::VerticalTextlinePartner() { total_widths += box.width(); prev_bbox = bbox; } + double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); + double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; + int min_box_match = static_cast((num_matched + num_unmatched) * + textord_tabvector_vertical_box_ratio); + bool is_vertical = (gaps.get_total() > 0 && + num_matched >= min_box_match && + gaps.median() <= max_gap); if (textord_debug_tabfind > 1) { - Print("Testing for vertical text"); - tprintf("gaps=%d, matched=%d, unmatched=%d, median gap=%.2f, width=%.2f\n", - gaps.get_total(), num_matched, num_unmatched, - gaps.median(), - total_widths * 1.0 / (num_unmatched + num_matched)); - } - if (gaps.get_total() == 0 || num_matched <= num_unmatched) { - return NULL; - } - // It qualifies if the median gap is less than kVerticalTextGapFraction * - // mean width. - if (gaps.median() >= total_widths * kVerticalTextGapFraction / - (num_unmatched + num_matched)) { - return NULL; + tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " + "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", + gaps.get_total(), num_matched, num_unmatched, min_box_match, + gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); } - if (textord_debug_tabfind > 1) { - tprintf("Vertical text found\n"); - } - return partner; + return (is_vertical) ? partner : NULL; } // The constructor is private. TabVector::TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment, BLOBNBOX_CLIST* boxes) : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax), - sort_key_(0), percent_score_(0), + sort_key_(0), percent_score_(0), mean_width_(0), needs_refit_(true), needs_evaluation_(true), alignment_(alignment), top_constraints_(NULL), bottom_constraints_(NULL) { BLOBNBOX_C_IT it(&boxes_); @@ -884,4 +939,3 @@ void TabVector::Delete(TabVector* replacement) { } // namespace tesseract. - diff --git a/textord/tabvector.h b/textord/tabvector.h index 0b0b83e9bc..bc8440da6b 100644 --- a/textord/tabvector.h +++ b/textord/tabvector.h @@ -26,8 +26,6 @@ #include "rect.h" #include "bbgrid.h" -#undef TA_CENTER - class BLOBNBOX; class ScrollView; @@ -35,12 +33,18 @@ CLISTIZEH(BLOBNBOX) namespace tesseract { + +extern double_VAR_H(textord_tabvector_vertical_gap_fraction, 0.5, + "Max fraction of mean blob width allowed for vertical gaps in vertical text"); +extern double_VAR_H(textord_tabvector_vertical_box_ratio, 0.5, + "Fraction of box matches required to declare a line vertical"); + // The alignment type that a tab vector represents. // Keep this enum synced with kAlignmentNames in tabvector.cpp. enum TabAlignment { TA_LEFT_ALIGNED, TA_LEFT_RAGGED, - TA_CENTER, + TA_CENTER_JUSTIFIED, TA_RIGHT_ALIGNED, TA_RIGHT_RAGGED, TA_SEPARATOR, @@ -53,6 +57,7 @@ class TabFind; class TabVector; class TabConstraint; typedef BBGrid BlobGrid; +typedef GridSearch BlobGridSearch; ELIST2IZEH(TabVector) CLISTIZEH(TabVector) @@ -133,6 +138,13 @@ class TabVector : public ELIST2_LINK { TabVector(const TabVector& src, TabAlignment alignment, const ICOORD& vertical_skew, BLOBNBOX* blob); + // Copies basic attributes of a tab vector for simple operations. + // Copies things such startpt, endpt, range, width. + // Does not copy things such as partners, boxes, or constraints. + // This is useful if you only need vector information for processing, such + // as in the table detection code. + TabVector* ShallowCopy() const; + // Simple accessors. const ICOORD& startpt() const { return startpt_; @@ -149,6 +161,9 @@ class TabVector : public ELIST2_LINK { int sort_key() const { return sort_key_; } + int mean_width() const { + return mean_width_; + } void set_top_constraints(TabConstraint_LIST* constraints) { top_constraints_ = constraints; } @@ -158,6 +173,12 @@ class TabVector : public ELIST2_LINK { TabVector_CLIST* partners() { return &partners_; } + void set_startpt(const ICOORD& start) { + startpt_ = start; + } + void set_endpt(const ICOORD& end) { + endpt_ = end; + } // Inline quasi-accessors that require some computation. @@ -197,6 +218,10 @@ class TabVector : public ELIST2_LINK { bool IsSeparator() const { return alignment_ == TA_SEPARATOR; } + // Return true if this is a center aligned tab stop. + bool IsCenterTab() const { + return alignment_ == TA_CENTER_JUSTIFIED; + } // Return true if this is a ragged tab top, either left or right. bool IsRagged() const { return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED; @@ -323,13 +348,21 @@ class TabVector : public ELIST2_LINK { void Evaluate(const ICOORD& vertical, TabFind* finder); // (Re)Fit a line to the stored points. Returns false if the line - // is degenerate. + // is degenerate. Althougth the TabVector code mostly doesn't care about the + // direction of lines, XAtY would give silly results for a horizontal line. + // The class is mostly aimed at use for vertical lines representing + // horizontal tab stops. bool Fit(ICOORD vertical, bool force_parallel); // Return the partner of this TabVector if the vector qualifies as // being a vertical text line, otherwise NULL. TabVector* VerticalTextlinePartner(); + // Return the matching tabvector if there is exactly one partner, or + // NULL otherwise. This can be used after matching is done, eg. by + // VerticalTextlinePartner(), without checking if the line is vertical. + TabVector* GetSinglePartner(); + private: // Constructor is private as the static factory is the external way // to build a TabVector. @@ -354,6 +387,8 @@ class TabVector : public ELIST2_LINK { int sort_key_; // Result of Evaluate 0-100. Coverage of line with good boxes. int percent_score_; + // The mean width of the blobs. Meaningful only for separator lines. + int mean_width_; // True if the boxes_ list has been modified, so a refit is needed. bool needs_refit_; // True if a fit has been done, so re-evaluation is needed. @@ -373,5 +408,3 @@ class TabVector : public ELIST2_LINK { } // namespace tesseract. #endif // TESSERACT_TEXTORD_TABVECTOR_H__ - - diff --git a/textord/tessout.h b/textord/tessout.h deleted file mode 100644 index eecdf54709..0000000000 --- a/textord/tessout.h +++ /dev/null @@ -1,76 +0,0 @@ -/********************************************************************** - * File: tessout.h (Formerly tessconv.h) - * Description: Code to convert from tesseract data to mithras data. - * Author: Ray Smith - * Created: Tue Oct 22 12:54:38 BST 1991 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TESSOUT_H -#define TESSOUT_H - -#include "ocrblock.h" -#include "tessclas.h" -#include "statistc.h" -#include "notdll.h" - -BOOL8 make_blocks_from_rows( //make thresholds - TEXTROW *tessrows, //old style rows - const char *name, //name of image - ICOORD page_tr, //page size - BOOL8 copy_poly, //true to copy poly - BLOCK_IT *block_it //blocks to make - ); -ROW *convert_row( //convert a row - TEXTROW *tessrow, //row to convert - BOOL8 do_shift, //true do do shift - inT16 &top, //top of row - inT16 &bottom //bottom of row - ); -void convert_words( //convert a row - TEXTROW *tessrow, //row to convert - BOOL8 do_shift, //true do do shift - ROW *row //destination - ); -PBLOB *convert_blob( //convert a blob - TBLOB *tblob, //blob to convert - BOOL8 do_shift //true do do shift - ); -void convert_outline( //convert a outline - TESSLINE *tessline, //outline to convert - BOOL8 do_shift, //true do do shift - BOOL8 reverse, //reverse it - OUTLINE_IT *it //output list - ); -void accumulate_word_stats( //get stats - TWERD *word, //word to do - STATS *kern_stats, //kerning - ICOORD &bleft, //corners - ICOORD &tright); -void blob_bounding_box( //get bounding box - TBLOB *blob, //blob to do - inT16 &xmin, //bounding box - inT16 &ymin, - inT16 &xmax, //of blob - inT16 &ymax); -void free_blob( //free tess blob - TBLOB *blob //blob to free - ); -void free_tree( //free outlines - TESSLINE *outline //outlines to free - ); -void free_outline( //free one - TESSLINE *outline //outline to free - ); -#endif diff --git a/textord/textord.cpp b/textord/textord.cpp new file mode 100644 index 0000000000..c1f63e5f4b --- /dev/null +++ b/textord/textord.cpp @@ -0,0 +1,358 @@ +/////////////////////////////////////////////////////////////////////// +// File: textord.cpp +// Description: The top-level text line and word finding functionality. +// Author: Ray Smith +// Created: Fri Mar 13 14:43:01 PDT 2009 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "drawtord.h" +#include "textord.h" +#include "makerow.h" +#include "pageres.h" +#include "tordmain.h" +#include "wordseg.h" + +namespace tesseract { + +Textord::Textord(CCStruct* ccstruct) + : ccstruct_(ccstruct), + // makerow.cpp /////////////////////////////////////////// + BOOL_MEMBER(textord_single_height_mode, false, + "Script has no xheight, so use a single mode", + ccstruct_->params()), + // tospace.cpp /////////////////////////////////////////// + BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", + ccstruct_->params()), + BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false, + "Constrain relative values of inter and intra-word gaps for " + "old_to_method.", + ccstruct_->params()), + BOOL_MEMBER(tosp_only_use_prop_rows, true, + "Block stats to use fixed pitch rows?", + ccstruct_->params()), + BOOL_MEMBER(tosp_force_wordbreak_on_punct, false, + "Force word breaks on punct to break long lines in non-space " + "delimited langs", + ccstruct_->params()), + BOOL_MEMBER(tosp_use_pre_chopping, false, + "Space stats use prechopping?", + ccstruct_->params()), + BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", + ccstruct_->params()), + BOOL_MEMBER(tosp_block_use_cert_spaces, true, + "Only stat OBVIOUS spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, + "Only stat OBVIOUS spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_recovery_isolated_row_stats, true, + "Use row alone when inadequate cert spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", + ccstruct_->params()), + BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", + ccstruct_->params()), + BOOL_MEMBER(tosp_fuzzy_limit_all, true, + "Dont restrict kn->sp fuzzy limit to tables", + ccstruct_->params()), + BOOL_MEMBER(tosp_stats_use_xht_gaps, true, + "Use within xht gap for wd breaks", + ccstruct_->params()), + BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", + ccstruct_->params()), + BOOL_MEMBER(tosp_only_use_xht_gaps, false, + "Only use within xht gap for wd breaks", + ccstruct_->params()), + BOOL_MEMBER(tosp_rule_9_test_punct, false, + "Dont chng kn to space next to punct", + ccstruct_->params()), + BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", + ccstruct_->params()), + BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", + ccstruct_->params()), + BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", + ccstruct_->params()), + INT_MEMBER(tosp_debug_level, 0, "Debug data", + ccstruct_->params()), + INT_MEMBER(tosp_enough_space_samples_for_median, 3, + "or should we use mean", + ccstruct_->params()), + INT_MEMBER(tosp_redo_kern_limit, 10, + "No.samples reqd to reestimate for row", + ccstruct_->params()), + INT_MEMBER(tosp_few_samples, 40, + "No.gaps reqd with 1 large gap to treat as a table", + ccstruct_->params()), + INT_MEMBER(tosp_short_row, 20, + "No.gaps reqd with few cert spaces to use certs", + ccstruct_->params()), + INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", + ccstruct_->params()), + double_MEMBER(tosp_old_sp_kn_th_factor, 2.0, + "Factor for defining space threshold in terms of space and " + "kern sizes", + ccstruct_->params()), + double_MEMBER(tosp_threshold_bias1, 0, + "how far between kern and space?", + ccstruct_->params()), + double_MEMBER(tosp_threshold_bias2, 0, + "how far between kern and space?", + ccstruct_->params()), + double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", + ccstruct_->params()), + double_MEMBER(tosp_narrow_aspect_ratio, 0.48, + "narrow if w/h less than this", + ccstruct_->params()), + double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", + ccstruct_->params()), + double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", + ccstruct_->params()), + double_MEMBER(tosp_fuzzy_space_factor, 0.6, + "Fract of xheight for fuzz sp", + ccstruct_->params()), + double_MEMBER(tosp_fuzzy_space_factor1, 0.5, + "Fract of xheight for fuzz sp", + ccstruct_->params()), + double_MEMBER(tosp_fuzzy_space_factor2, 0.72, + "Fract of xheight for fuzz sp", + ccstruct_->params()), + double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", + ccstruct_->params()), + double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", + ccstruct_->params()), + double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", + ccstruct_->params()), + double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", + ccstruct_->params()), + double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", + ccstruct_->params()), + double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", + ccstruct_->params()), + double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", + ccstruct_->params()), + double_MEMBER(tosp_enough_small_gaps, 0.65, + "Fract of kerns reqd for isolated row stats", + ccstruct_->params()), + double_MEMBER(tosp_table_kn_sp_ratio, 2.25, + "Min difference of kn & sp in table", + ccstruct_->params()), + double_MEMBER(tosp_table_xht_sp_ratio, 0.33, + "Expect spaces bigger than this", + ccstruct_->params()), + double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, + "Fuzzy if less than this", + ccstruct_->params()), + double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", + ccstruct_->params()), + double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", + ccstruct_->params()), + double_MEMBER(tosp_min_sane_kn_sp, 1.5, + "Dont trust spaces less than this time kn", + ccstruct_->params()), + double_MEMBER(tosp_init_guess_kn_mult, 2.2, + "Thresh guess - mult kn by this", + ccstruct_->params()), + double_MEMBER(tosp_init_guess_xht_mult, 0.28, + "Thresh guess - mult xht by this", + ccstruct_->params()), + double_MEMBER(tosp_max_sane_kn_thresh, 5.0, + "Multiplier on kn to limit thresh", + ccstruct_->params()), + double_MEMBER(tosp_flip_caution, 0.0, + "Dont autoflip kn to sp when large separation", + ccstruct_->params()), + double_MEMBER(tosp_large_kerning, 0.19, + "Limit use of xht gap with large kns", + ccstruct_->params()), + double_MEMBER(tosp_dont_fool_with_small_kerns, -1, + "Limit use of xht gap with odd small kns", + ccstruct_->params()), + double_MEMBER(tosp_near_lh_edge, 0, + "Dont reduce box if the top left is non blank", + ccstruct_->params()), + double_MEMBER(tosp_silly_kn_sp_gap, 0.2, + "Dont let sp minus kn get too small", + ccstruct_->params()), + double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, + "How wide fuzzies need context", + ccstruct_->params()), + // tordmain.cpp /////////////////////////////////////////// + BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", + ccstruct_->params()), + BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", + ccstruct_->params()), + BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", + ccstruct_->params()), + INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", + ccstruct_->params()), + double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs", + ccstruct_->params()), + double_MEMBER(textord_noise_area_ratio, 0.7, + "Fraction of bounding box for noise", + ccstruct_->params()), + double_MEMBER(textord_blob_size_smallile, 20, + "Percentile for small blobs", + ccstruct_->params()), + double_MEMBER(textord_initialx_ile, 0.75, + "Ile of sizes for xheight guess", + ccstruct_->params()), + double_MEMBER(textord_initialasc_ile, 0.90, + "Ile of sizes for xheight guess", + ccstruct_->params()), + INT_MEMBER(textord_noise_sizefraction, 10, + "Fraction of size for maxima", + ccstruct_->params()), + double_MEMBER(textord_noise_sizelimit, 0.5, + "Fraction of x for big t count", + ccstruct_->params()), + INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", + ccstruct_->params()), + double_MEMBER(textord_noise_normratio, 2.0, + "Dot to norm ratio for deletion", + ccstruct_->params()), + BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", + ccstruct_->params()), + BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", + ccstruct_->params()), + double_MEMBER(textord_noise_syfract, 0.2, + "xh fract height error for norm blobs", + ccstruct_->params()), + double_MEMBER(textord_noise_sxfract, 0.4, + "xh fract width error for norm blobs", + ccstruct_->params()), + double_MEMBER(textord_noise_hfract, 1.0/64, + "Height fraction to discard outlines as speckle noise", + ccstruct_->params()), + INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", + ccstruct_->params()), + double_MEMBER(textord_noise_rowratio, 6.0, + "Dot to norm ratio for deletion", + ccstruct_->params()), + BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", + ccstruct_->params()), + double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", + ccstruct_->params()), + double_MEMBER(textord_blshift_xfraction, 9.99, + "Min size of baseline shift", + ccstruct_->params()) { +} + +Textord::~Textord() { +} + +// Make the textlines and words inside each block. +void Textord::TextordPage(PageSegMode pageseg_mode, + int width, int height, Pix* pix, + BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { + page_tr_.set_x(width); + page_tr_.set_y(height); + if (to_blocks->empty()) { + // AutoPageSeg was not used, so we need to find_components first. + find_components(pix, blocks, to_blocks); + } else { + // AutoPageSeg does not need to find_components as it did that already. + // Filter_blobs sets up the TO_BLOCKs the same as find_components does. + filter_blobs(page_tr_, to_blocks, true); + } + + ASSERT_HOST(!to_blocks->empty()); + if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) { + const FCOORD anticlockwise90(0.0f, 1.0f); + const FCOORD clockwise90(0.0f, -1.0f); + TO_BLOCK_IT it(to_blocks); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TO_BLOCK* to_block = it.data(); + BLOCK* block = to_block->block; + // Create a fake poly_block in block from its bounding box. + block->set_poly_block(new POLY_BLOCK(block->bounding_box(), + PT_VERTICAL_TEXT)); + // Rotate the to_block along with its contained block and blobnbox lists. + to_block->rotate(anticlockwise90); + // Set the block's rotation values to obey the convention followed in + // layout analysis for vertical text. + block->set_re_rotation(clockwise90); + block->set_classify_rotation(clockwise90); + } + } + + TO_BLOCK_IT to_block_it(to_blocks); + TO_BLOCK* to_block = to_block_it.data(); + // Make the rows in the block. + float gradient; + // Do it the old fashioned way. + if (PSM_LINE_FIND_ENABLED(pageseg_mode)) { + gradient = make_rows(page_tr_, to_blocks); + } else { + // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. + gradient = make_single_row(page_tr_, to_block, to_blocks); + } + // Now fit baselines. For now only old mode is available. + fit_rows(gradient, page_tr_, to_blocks); + // Now make the words in the lines. + if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { + // SINGLE_LINE uses the old word maker on the single line. + make_words(this, page_tr_, gradient, blocks, to_blocks); + } else { + // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a + // single word, and in SINGLE_CHAR mode, all the outlines + // go in a single blob. + TO_BLOCK* to_block = to_block_it.data(); + make_single_word(pageseg_mode == PSM_SINGLE_CHAR, + to_block->get_rows(), to_block->block->row_list()); + } + cleanup_blocks(blocks); // Remove empties. +#ifndef GRAPHICS_DISABLED + close_to_win(); +#endif +} + +// If we were supposed to return only a single textline, and there is more +// than one, clean up and leave only the best. +void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, + PAGE_RES* page_res) { + if (PSM_LINE_FIND_ENABLED(pageseg_mode)) + return; // No cleanup required. + PAGE_RES_IT it(page_res); + // Find the best row, being the greatest mean word conf. + float row_total_conf = 0.0f; + int row_word_count = 0; + ROW_RES* best_row = NULL; + float best_conf = 0.0f; + for (it.restart_page(); it.word() != NULL; it.forward()) { + WERD_RES* word = it.word(); + row_total_conf += word->best_choice->certainty(); + ++row_word_count; + if (it.next_row() != it.row()) { + row_total_conf /= row_word_count; + if (best_row == NULL || best_conf < row_total_conf) { + best_row = it.row(); + best_conf = row_total_conf; + } + row_total_conf = 0.0f; + row_word_count = 0; + } + } + // Now eliminate any word not in the best row. + for (it.restart_page(); it.word() != NULL; it.forward()) { + if (it.row() != best_row) + it.DeleteCurrentWord(); + } +} + +} // namespace tesseract. diff --git a/textord/textord.h b/textord/textord.h new file mode 100644 index 0000000000..5443b2ef9a --- /dev/null +++ b/textord/textord.h @@ -0,0 +1,345 @@ +/////////////////////////////////////////////////////////////////////// +// File: textord.h +// Description: The Textord class definition gathers text line and word +// finding functionality. +// Author: Ray Smith +// Created: Fri Mar 13 14:29:01 PDT 2009 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_TEXTORD_H__ +#define TESSERACT_TEXTORD_TEXTORD_H__ + +#include "ccstruct.h" +#include "blobbox.h" +#include "gap_map.h" +#include "notdll.h" +#include "publictypes.h" // For PageSegMode. + +class FCOORD; +class BLOCK_LIST; +class PAGE_RES; +class TO_BLOCK; +class TO_BLOCK_LIST; +class ScrollView; + +namespace tesseract { + +class Textord { + public: + explicit Textord(CCStruct* ccstruct); + ~Textord(); + + // Make the textlines and words inside each block. + void TextordPage(PageSegMode pageseg_mode, + int width, int height, Pix* pix, + BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); + + // If we were supposed to return only a single textline, and there is more + // than one, clean up and leave only the best. + void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res); + + // tospace.cpp /////////////////////////////////////////// + void to_spacing( + ICOORD page_tr, //topright of page + TO_BLOCK_LIST *blocks //blocks on page + ); + ROW *make_prop_words(TO_ROW *row, // row to make + FCOORD rotation // for drawing + ); + ROW *make_blob_words(TO_ROW *row, // row to make + FCOORD rotation // for drawing + ); + // tordmain.cpp /////////////////////////////////////////// + void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); + void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on); + private: + // For underlying memory management and other utilities. + CCStruct* ccstruct_; + + // The size of the input image. + ICOORD page_tr_; + + // makerow.cpp /////////////////////////////////////////// + // Make the textlines inside each block. + void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew, + int width, int height, TO_BLOCK_LIST* to_blocks); + // Make the textlines inside a single block. + void MakeBlockRows(int min_spacing, int max_spacing, + const FCOORD& skew, TO_BLOCK* block, + ScrollView* win); + + void fit_rows(float gradient, ICOORD page_tr, TO_BLOCK_LIST *blocks); + void cleanup_rows_fitting(ICOORD page_tr, // top right + TO_BLOCK *block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + inT32 block_edge, // edge of block + BOOL8 testing_on); // correct orientation + void compute_block_xheight(TO_BLOCK *block, float gradient); + void compute_row_xheight(TO_ROW *row, // row to do + const FCOORD& rotation, + float gradient, // global skew + int block_line_size); + void make_spline_rows(TO_BLOCK *block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + inT32 block_edge, // edge of block + BOOL8 testing_on); + + //// oldbasel.cpp //////////////////////////////////////// + void make_old_baselines(TO_BLOCK *block, // block to do + BOOL8 testing_on, // correct orientation + float gradient); + void correlate_lines(TO_BLOCK *block, float gradient); + void correlate_neighbours(TO_BLOCK *block, // block rows are in. + TO_ROW **rows, // rows of block. + int rowcount); // no of rows to do. + int correlate_with_stats(TO_ROW **rows, // rows of block. + int rowcount, // no of rows to do. + TO_BLOCK* block); + void find_textlines(TO_BLOCK *block, // block row is in + TO_ROW *row, // row to do + int degree, // required approximation + QSPLINE *spline); // starting spline + // tospace.cpp /////////////////////////////////////////// + //DEBUG USE ONLY + void block_spacing_stats(TO_BLOCK *block, + GAPMAP *gapmap, + BOOL8 &old_text_ord_proportional, + //resulting estimate + inT16 &block_space_gap_width, + //resulting estimate + inT16 &block_non_space_gap_width + ); + void row_spacing_stats(TO_ROW *row, + GAPMAP *gapmap, + inT16 block_idx, + inT16 row_idx, + //estimate for block + inT16 block_space_gap_width, + //estimate for block + inT16 block_non_space_gap_width + ); + void old_to_method(TO_ROW *row, + STATS *all_gap_stats, + STATS *space_gap_stats, + STATS *small_gap_stats, + inT16 block_space_gap_width, + //estimate for block + inT16 block_non_space_gap_width + ); + BOOL8 isolated_row_stats(TO_ROW *row, + GAPMAP *gapmap, + STATS *all_gap_stats, + BOOL8 suspected_table, + inT16 block_idx, + inT16 row_idx); + inT16 stats_count_under(STATS *stats, inT16 threshold); + void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats); + BOOL8 make_a_word_break(TO_ROW *row, // row being made + TBOX blob_box, // for next_blob // how many blanks? + inT16 prev_gap, + TBOX prev_blob_box, + inT16 real_current_gap, + inT16 within_xht_current_gap, + TBOX next_blob_box, + inT16 next_gap, + uinT8 &blanks, + BOOL8 &fuzzy_sp, + BOOL8 &fuzzy_non, + BOOL8& prev_gap_was_a_space, + BOOL8& break_at_next_gap); + BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box); + BOOL8 wide_blob(TO_ROW *row, TBOX blob_box); + BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box); + void peek_at_next_gap(TO_ROW *row, + BLOBNBOX_IT box_it, + TBOX &next_blob_box, + inT16 &next_gap, + inT16 &next_within_xht_gap); + void mark_gap(TBOX blob, //blob following gap + inT16 rule, // heuristic id + inT16 prev_gap, + inT16 prev_blob_width, + inT16 current_gap, + inT16 next_blob_width, + inT16 next_gap); + float find_mean_blob_spacing(WERD *word); + BOOL8 ignore_big_gap(TO_ROW *row, + inT32 row_length, + GAPMAP *gapmap, + inT16 left, + inT16 right); + //get bounding box + TBOX reduced_box_next(TO_ROW *row, //current row + BLOBNBOX_IT *it //iterator to blobds + ); + TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht); + // tordmain.cpp /////////////////////////////////////////// + float filter_noise_blobs(BLOBNBOX_LIST *src_list, + BLOBNBOX_LIST *noise_list, + BLOBNBOX_LIST *small_list, + BLOBNBOX_LIST *large_list); + void cleanup_blocks(BLOCK_LIST *blocks); + BOOL8 clean_noise_from_row(ROW *row); + void clean_noise_from_words(ROW *row); + // Remove outlines that are a tiny fraction in either width or height + // of the word height. + void clean_small_noise_from_words(ROW *row); + public: + // makerow.cpp /////////////////////////////////////////// + BOOL_VAR_H(textord_single_height_mode, false, + "Script has no xheight, so use a single mode for horizontal text"); + // tospace.cpp /////////////////////////////////////////// + BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?"); + BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false, + "Constrain relative values of inter and intra-word gaps for " + "old_to_method."); + BOOL_VAR_H(tosp_only_use_prop_rows, true, + "Block stats to use fixed pitch rows?"); + BOOL_VAR_H(tosp_force_wordbreak_on_punct, false, + "Force word breaks on punct to break long lines in non-space " + "delimited langs"); + BOOL_VAR_H(tosp_use_pre_chopping, false, + "Space stats use prechopping?"); + BOOL_VAR_H(tosp_old_to_bug_fix, false, + "Fix suspected bug in old code"); + BOOL_VAR_H(tosp_block_use_cert_spaces, true, + "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_row_use_cert_spaces, true, + "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_narrow_blobs_not_cert, true, + "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_row_use_cert_spaces1, true, + "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_recovery_isolated_row_stats, true, + "Use row alone when inadequate cert spaces"); + BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess"); + BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?"); + BOOL_VAR_H(tosp_fuzzy_limit_all, true, + "Dont restrict kn->sp fuzzy limit to tables"); + BOOL_VAR_H(tosp_stats_use_xht_gaps, true, + "Use within xht gap for wd breaks"); + BOOL_VAR_H(tosp_use_xht_gaps, true, + "Use within xht gap for wd breaks"); + BOOL_VAR_H(tosp_only_use_xht_gaps, false, + "Only use within xht gap for wd breaks"); + BOOL_VAR_H(tosp_rule_9_test_punct, false, + "Dont chng kn to space next to punct"); + BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip"); + BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip"); + BOOL_VAR_H(tosp_improve_thresh, false, + "Enable improvement heuristic"); + INT_VAR_H(tosp_debug_level, 0, "Debug data"); + INT_VAR_H(tosp_enough_space_samples_for_median, 3, + "or should we use mean"); + INT_VAR_H(tosp_redo_kern_limit, 10, + "No.samples reqd to reestimate for row"); + INT_VAR_H(tosp_few_samples, 40, + "No.gaps reqd with 1 large gap to treat as a table"); + INT_VAR_H(tosp_short_row, 20, + "No.gaps reqd with few cert spaces to use certs"); + INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly"); + double_VAR_H(tosp_old_sp_kn_th_factor, 2.0, + "Factor for defining space threshold in terms of space and " + "kern sizes"); + double_VAR_H(tosp_threshold_bias1, 0, + "how far between kern and space?"); + double_VAR_H(tosp_threshold_bias2, 0, + "how far between kern and space?"); + double_VAR_H(tosp_narrow_fraction, 0.3, + "Fract of xheight for narrow"); + double_VAR_H(tosp_narrow_aspect_ratio, 0.48, + "narrow if w/h less than this"); + double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide"); + double_VAR_H(tosp_wide_aspect_ratio, 0.0, + "wide if w/h less than this"); + double_VAR_H(tosp_fuzzy_space_factor, 0.6, + "Fract of xheight for fuzz sp"); + double_VAR_H(tosp_fuzzy_space_factor1, 0.5, + "Fract of xheight for fuzz sp"); + double_VAR_H(tosp_fuzzy_space_factor2, 0.72, + "Fract of xheight for fuzz sp"); + double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern"); + double_VAR_H(tosp_kern_gap_factor1, 2.0, + "gap ratio to flip kern->sp"); + double_VAR_H(tosp_kern_gap_factor2, 1.3, + "gap ratio to flip kern->sp"); + double_VAR_H(tosp_kern_gap_factor3, 2.5, + "gap ratio to flip kern->sp"); + double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier"); + double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier"); + double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space"); + double_VAR_H(tosp_enough_small_gaps, 0.65, + "Fract of kerns reqd for isolated row stats"); + double_VAR_H(tosp_table_kn_sp_ratio, 2.25, + "Min difference of kn & sp in table"); + double_VAR_H(tosp_table_xht_sp_ratio, 0.33, + "Expect spaces bigger than this"); + double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0, + "Fuzzy if less than this"); + double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg"); + double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg"); + double_VAR_H(tosp_min_sane_kn_sp, 1.5, + "Dont trust spaces less than this time kn"); + double_VAR_H(tosp_init_guess_kn_mult, 2.2, + "Thresh guess - mult kn by this"); + double_VAR_H(tosp_init_guess_xht_mult, 0.28, + "Thresh guess - mult xht by this"); + double_VAR_H(tosp_max_sane_kn_thresh, 5.0, + "Multiplier on kn to limit thresh"); + double_VAR_H(tosp_flip_caution, 0.0, + "Dont autoflip kn to sp when large separation"); + double_VAR_H(tosp_large_kerning, 0.19, + "Limit use of xht gap with large kns"); + double_VAR_H(tosp_dont_fool_with_small_kerns, -1, + "Limit use of xht gap with odd small kns"); + double_VAR_H(tosp_near_lh_edge, 0, + "Dont reduce box if the top left is non blank"); + double_VAR_H(tosp_silly_kn_sp_gap, 0.2, + "Dont let sp minus kn get too small"); + double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75, + "How wide fuzzies need context"); + // tordmain.cpp /////////////////////////////////////////// + BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs"); + BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs"); + BOOL_VAR_H(textord_show_boxes, false, "Display boxes"); + INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise"); + double_VAR_H(textord_blob_size_bigile, 95, "Percentile for large blobs"); + double_VAR_H(textord_noise_area_ratio, 0.7, + "Fraction of bounding box for noise"); + double_VAR_H(textord_blob_size_smallile, 20, "Percentile for small blobs"); + double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess"); + double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess"); + INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima"); + double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count"); + INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob"); + double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion"); + BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words"); + BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows"); + double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs"); + double_VAR_H(textord_noise_sxfract, 0.4, + "xh fract width error for norm blobs"); + double_VAR_H(textord_noise_hfract, 1.0/64, + "Height fraction to discard outlines as speckle noise"); + INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row"); + double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion"); + BOOL_VAR_H(textord_noise_debug, FALSE, "Debug row garbage detector"); + double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift"); + double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift"); +}; +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_TEXTORD_H__ diff --git a/textord/textord.vcproj b/textord/textord.vcproj deleted file mode 100755 index 831a524185..0000000000 --- a/textord/textord.vcproj +++ /dev/null @@ -1,793 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/textord/topitch.cpp b/textord/topitch.cpp index 60cf6e3843..3c4da0fcf1 100644 --- a/textord/topitch.cpp +++ b/textord/topitch.cpp @@ -23,18 +23,16 @@ #endif #include "stderr.h" #include "blobbox.h" -#include "lmedsq.h" #include "statistc.h" #include "drawtord.h" #include "makerow.h" #include "pitsync1.h" #include "pithsync.h" -#include "blobcmpl.h" #include "tovars.h" #include "wordseg.h" #include "topitch.h" #include "secname.h" -#include "tesseractclass.h" +#include "helpers.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -74,14 +72,11 @@ EXTERN double_VAR (textord_balance_factor, 1.0, * result for each row in the TO_ROW class. **********************************************************************/ -void compute_fixed_pitch( //determine pitch - ICOORD page_tr, //top right - TO_BLOCK_LIST *port_blocks, //input list - float gradient, //page skew - FCOORD rotation, //for drawing - BOOL8 testing_on, //correct orientation - tesseract::Tesseract* tess - ) { +void compute_fixed_pitch(ICOORD page_tr, // top right + TO_BLOCK_LIST *port_blocks, // input list + float gradient, // page skew + FCOORD rotation, // for drawing + BOOL8 testing_on) { // correct orientation TO_BLOCK_IT block_it; //iterator TO_BLOCK *block; //current block; TO_ROW_IT row_it; //row iterator @@ -101,7 +96,7 @@ void compute_fixed_pitch( //determine pitch for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); - compute_block_pitch(block, rotation, block_index, testing_on, tess); + compute_block_pitch(block, rotation, block_index, testing_on); block_index++; } @@ -309,14 +304,11 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix * Decide whether each block is fixed pitch individually. **********************************************************************/ -void compute_block_pitch( //process each block - TO_BLOCK *block, //input list - FCOORD rotation, //for drawing - inT32 block_index, //block number - BOOL8 testing_on, //correct orientation - tesseract::Tesseract* tess - ) { - TBOX block_box; //bounding box +void compute_block_pitch(TO_BLOCK *block, // input list + FCOORD rotation, // for drawing + inT32 block_index, // block number + BOOL8 testing_on) { // correct orientation + TBOX block_box; //bounding box block_box = block->block->bounding_box (); if (testing_on && textord_debug_pitch_test) { @@ -336,8 +328,7 @@ void compute_block_pitch( //process each block block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop; if (!block->get_rows ()->empty ()) { ASSERT_HOST (block->xheight > 0); - if (textord_repeat_extraction) - find_repeated_chars(block, textord_show_initial_words &&testing_on, tess); + find_repeated_chars(block, textord_show_initial_words && testing_on); #ifndef GRAPHICS_DISABLED if (textord_show_initial_words && testing_on) //overlap_picture_ops(TRUE); @@ -771,7 +762,7 @@ BOOL8 row_pitch_stats( //find line stats cluster_stats[gap_index + 1].get_total ()); tprintf ("\n"); } - qsort (gaps, cluster_count, sizeof (float), sort_floats2); + qsort (gaps, cluster_count, sizeof (float), sort_floats); //Try to find proportional non-space and space for row. lower = row->xheight * words_default_prop_nonspace; @@ -1756,39 +1747,14 @@ void print_pitch_sd( //find fp cells occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps); } - -/********************************************************************** - * sort_floats - * - * qsort function to sort 2 floats. - **********************************************************************/ - -int sort_floats2( //qsort function - const void *arg1, //ptrs to floats - const void *arg2) { - float diff; //difference - - diff = *((float *) arg1) - *((float *) arg2); - if (diff > 0) - return 1; - else if (diff < 0) - return -1; - else - return 0; -} - - /********************************************************************** * find_repeated_chars * - * Find 4 or more adjacent chars which are the same and put them + * Extract marked leader blobs and put them * into words in advance of fixed pitch checking and word generation. **********************************************************************/ -void find_repeated_chars( //search for equal chars - TO_BLOCK *block, //block to search - BOOL8 testing_on, //dbug mode - tesseract::Tesseract* tess - ) { +void find_repeated_chars(TO_BLOCK *block, // Block to search. + BOOL8 testing_on) { // Debug mode. TO_ROW *row; BLOBNBOX_IT box_it; BLOBNBOX_IT search_it; // forward search @@ -1804,7 +1770,7 @@ void find_repeated_chars( //search for equal chars box_it.set_to_list(row->blob_list()); if (box_it.empty()) continue; // no blobs in this row if (!row->rep_chars_marked()) { - mark_repeated_chars(row, block->xheight, tess); + mark_repeated_chars(row); } if (row->num_repeated_sets() == 0) continue; // nothing to do for this row word_it.set_to_list(&row->rep_words); @@ -1823,21 +1789,14 @@ void find_repeated_chars( //search for equal chars // After the call to make_real_word() all the blobs from this // repeated set will be removed from the blob list. box_it will be // set to point to the blob after the end of the extracted sequence. - word = make_real_word(&box_it, blobcount, - box_it.at_first(), false, false, 1); -#ifndef GRAPHICS_DISABLED - if (testing_on) { - word_box = word->bounding_box(); - tprintf("Found repeated word of %d blobs from (%d,%d)->(%d,%d)\n", - blobcount, word_box.left(), word_box.bottom(), - word_box.right(), word_box.top()); - //perimeter_color_index(to_win, RED); - to_win->Pen(255,0,0); - //interior_style(to_win, INT_HOLLOW, TRUE); - to_win->Rectangle(word_box.left(), word_box.bottom(), - word_box.right(), word_box.top()); + word = make_real_word(&box_it, blobcount, box_it.at_first(), 1); + if (!box_it.empty() && box_it.data()->joined_to_prev()) { + tprintf("Bad box joined to prev at"); + box_it.data()->bounding_box().print(); + tprintf("After repeated word:"); + word->bounding_box().print(); } -#endif + ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev()); word->set_flag(W_REP_CHAR, true); word->set_flag(W_DONT_CHOP, true); word_it.add_after_then_move(word); diff --git a/textord/topitch.h b/textord/topitch.h index ae0e0fd522..bedbb04dd9 100644 --- a/textord/topitch.h +++ b/textord/topitch.h @@ -43,14 +43,11 @@ extern double_VAR_H (textord_projection_scale, 0.125, extern double_VAR_H (textord_balance_factor, 2.0, "Ding rate for unbalanced char cells"); -void compute_fixed_pitch( //determine pitch - ICOORD page_tr, //top right - TO_BLOCK_LIST *port_blocks, //input list - float gradient, //page skew - FCOORD rotation, //for drawing - BOOL8 testing_on, //correct orientation - tesseract::Tesseract* tess - ); +void compute_fixed_pitch(ICOORD page_tr, // top right + TO_BLOCK_LIST *port_blocks, // input list + float gradient, // page skew + FCOORD rotation, // for drawing + BOOL8 testing_on); // correct orientation void fix_row_pitch( //get some value TO_ROW *bad_row, //row to fix TO_BLOCK *bad_block, //block of bad_row @@ -58,13 +55,10 @@ void fix_row_pitch( //get some value inT32 row_target, //number of row inT32 block_target //number of block ); -void compute_block_pitch( //process each block - TO_BLOCK *block, //input list - FCOORD rotation, //for drawing - inT32 block_index, //block number - BOOL8 testing_on, //correct orientation - tesseract::Tesseract* tess - ); +void compute_block_pitch( TO_BLOCK *block, // input list + FCOORD rotation, // for drawing + inT32 block_index, // block number + BOOL8 testing_on); // correct orientation BOOL8 compute_rows_pitch( //find line stats TO_BLOCK *block, //block to do inT32 block_index, //block number @@ -184,14 +178,8 @@ void print_pitch_sd( //find fp cells float space_size, float initial_pitch //guess at pitch ); -int sort_floats2( //qsort function - const void *arg1, //ptrs to floats - const void *arg2); -void find_repeated_chars( //search for equal chars - TO_BLOCK *block, //block to search - BOOL8 testing_on, //dbug mode - tesseract::Tesseract* tess - ); +void find_repeated_chars(TO_BLOCK *block, // Block to search. + BOOL8 testing_on); // Debug mode. void plot_fp_word( //draw block of words TO_BLOCK *block, //block to draw float pitch, //pitch to draw with diff --git a/textord/tordmain.cpp b/textord/tordmain.cpp index b2e581b02f..2cbfde4d4f 100644 --- a/textord/tordmain.cpp +++ b/textord/tordmain.cpp @@ -22,19 +22,18 @@ #endif #include "stderr.h" #include "globaloc.h" -#include "tessout.h" #include "blread.h" #include "blobbox.h" +#include "ccstruct.h" #include "edgblob.h" #include "drawtord.h" #include "makerow.h" #include "wordseg.h" -#include "ocrclass.h" #include "genblob.h" #include "imgs.h" +#include "textord.h" #include "tordmain.h" #include "secname.h" -#include "tesseractclass.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -55,154 +54,26 @@ const ERRCODE BLOCKLESS_BLOBS = "Warning:some blobs assigned to no block"; #undef EXTERN #define EXTERN -EXTERN BOOL_VAR (textord_no_rejects, FALSE, "Don't remove noise blobs"); -EXTERN BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs"); -EXTERN BOOL_VAR (textord_show_boxes, FALSE, "Display unsorted blobs"); -EXTERN BOOL_VAR (textord_new_initial_xheight, TRUE, -"Use test xheight mechanism"); -EXTERN BOOL_VAR (textord_exit_after, FALSE, "Exit after completing textord"); -EXTERN INT_VAR (textord_max_noise_size, 7, "Pixel size of noise"); -EXTERN double_VAR (textord_blob_size_bigile, 95, -"Percentile for large blobs"); -EXTERN double_VAR (textord_noise_area_ratio, 0.7, -"Fraction of bounding box for noise"); -EXTERN double_VAR (textord_blob_size_smallile, 20, -"Percentile for small blobs"); -EXTERN double_VAR (textord_initialx_ile, 0.75, -"Ile of sizes for xheight guess"); -EXTERN double_VAR (textord_initialasc_ile, 0.90, -"Ile of sizes for xheight guess"); -EXTERN INT_VAR (textord_noise_sizefraction, 10, -"Fraction of size for maxima"); -EXTERN double_VAR (textord_noise_sizelimit, 0.5, -"Fraction of x for big t count"); -EXTERN INT_VAR (textord_noise_translimit, 16, "Transitions for normal blob"); -EXTERN double_VAR (textord_noise_normratio, 2.0, -"Dot to norm ratio for deletion"); -EXTERN BOOL_VAR (textord_noise_rejwords, TRUE, "Reject noise-like words"); -EXTERN BOOL_VAR (textord_noise_rejrows, TRUE, "Reject noise-like rows"); -EXTERN double_VAR (textord_noise_syfract, 0.2, -"xh fract error for norm blobs"); -EXTERN double_VAR (textord_noise_sxfract, 0.4, -"xh fract width error for norm blobs"); -EXTERN double_VAR(textord_noise_hfract, 1.0/64, -"Height fraction to discard outlines as speckle noise"); -EXTERN INT_VAR (textord_noise_sncount, 1, "super norm blobs to save row"); -EXTERN double_VAR (textord_noise_rowratio, 6.0, -"Dot to norm ratio for deletion"); - -EXTERN BOOL_VAR (textord_noise_debug, FALSE, "Debug row garbage detector"); -EXTERN double_VAR (textord_blshift_maxshift, 0.00, "Max baseline shift"); -EXTERN double_VAR (textord_blshift_xfraction, 9.99, -"Min size of baseline shift"); -EXTERN STRING_EVAR (tessedit_image_ext, ".tif", "Externsion for image file"); - -#ifndef EMBEDDED -EXTERN clock_t previous_cpu; -#endif - -extern BOOL_VAR_H (polygon_tess_approximation, TRUE, -"Do tess poly instead of grey scale"); - #define MAX_NEAREST_DIST 600 //for block skew stats -#define MAX_BLOB_TRANSITIONS100 //for nois stats - -extern IMAGE page_image; //must be defined somewhere -extern BOOL_VAR_H (interactive_mode, TRUE, "Run interactively?"); -extern /*"C" */ ETEXT_DESC *global_monitor; //progress monitor - -/********************************************************************** - * find_components - * - * Find the C_OUTLINEs of the connected components in each block, put them - * in C_BLOBs, and filter them by size, putting the different size - * grades on different lists in the matching TO_BLOCK in port_blocks. - **********************************************************************/ - -void find_components( - BLOCK_LIST *blocks, - TO_BLOCK_LIST *land_blocks, - TO_BLOCK_LIST *port_blocks, - TBOX *page_box) { - BLOCK *block; //current block - PDBLK_CLIST pd_blocks; //copy of list - BLOCK_IT block_it = blocks; //iterator - PDBLK_C_IT pd_it = &pd_blocks; //iterator - IMAGE thresh_image; //thresholded - - int width = page_image.get_xsize(); - int height = page_image.get_ysize(); - if (width > MAX_INT16 || height > MAX_INT16) { - tprintf("Input image too large! (%d, %d)\n", width, height); - return; // Can't handle it. - } - - ICOORD page_tr(width, height); - block_it.set_to_list (blocks); - if (global_monitor != NULL) - global_monitor->ocr_alive = TRUE; - - set_global_loc_code(LOC_EDGE_PROG); - if (!page_image.white_high ()) - invert_image(&page_image); - -#ifndef EMBEDDED - previous_cpu = clock (); -#endif - - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - block = block_it.data(); - if (block->poly_block() == NULL || - block->poly_block()->IsText()) { -#ifndef GRAPHICS_DISABLED - extract_edges(NULL, &page_image, &page_image, page_tr, block); -#else - extract_edges(&page_image, &page_image, page_tr, block); -#endif - *page_box += block->bounding_box (); - } - } - if (global_monitor != NULL) { - global_monitor->ocr_alive = TRUE; - global_monitor->progress = 10; - } - - assign_blobs_to_blocks2(blocks, land_blocks, port_blocks); - if (global_monitor != NULL) - global_monitor->ocr_alive = TRUE; - filter_blobs (page_box->topright (), land_blocks, textord_test_landscape); -#ifndef EMBEDDED - previous_cpu = clock (); -#endif - filter_blobs (page_box->topright (), port_blocks, !textord_test_landscape); - if (global_monitor != NULL) - global_monitor->ocr_alive = TRUE; -} /********************************************************************** * SetBlobStrokeWidth * * Set the horizontal and vertical stroke widths in the blob. **********************************************************************/ -void SetBlobStrokeWidth(bool debug, BLOBNBOX* blob) { +void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) { #ifdef HAVE_LIBLEPT // Cut the blob rectangle into a Pix. - // TODO(rays) make the page_image a Pix so this is more direct. + int pix_height = pixGetHeight(pix); const TBOX& box = blob->bounding_box(); - IMAGE blob_im; int width = box.width(); int height = box.height(); - blob_im.create(width, height, 1); - copy_sub_image(&page_image, box.left(), box.bottom(), width, height, - &blob_im, 0, 0, false); - Pix* pix = blob_im.ToPix(); - Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG); - if (debug) { - pixWrite("cutpix.png", pix, IFF_PNG); - pixWrite("distpix.png", dist_pix, IFF_PNG); - } - pixDestroy(&pix); + Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(), + width, height); + Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, NULL); + boxDestroy(&blob_pix_box); + Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG); + pixDestroy(&pix_blob); // Compute the stroke widths. uinT32* data = pixGetData(dist_pix); int wpl = pixGetWpl(dist_pix); @@ -232,9 +103,6 @@ void SetBlobStrokeWidth(bool debug, BLOBNBOX* blob) { pixel = next_pixel; } } - if (debug) { - h_stats.print(stderr, true); - } // Vertical width of stroke. STATS v_stats(0, height + 1); for (int x = 0; x < width; ++x) { @@ -261,9 +129,6 @@ void SetBlobStrokeWidth(bool debug, BLOBNBOX* blob) { pixel = next_pixel; } } - if (debug) { - v_stats.print(stderr, true); - } pixDestroy(&dist_pix); // Store the horizontal and vertical width in the blob, keeping both // widths if there is enough information, otherwse only the one with @@ -271,18 +136,6 @@ void SetBlobStrokeWidth(bool debug, BLOBNBOX* blob) { // If there are insufficent samples, store zero, rather than using // 2*area/perimeter, as the numbers that gives do not match the numbers // from the distance method. - if (debug) { - tprintf("box=%d,%d->%d,%d, hcount=%d, vcount=%d, target=%d\n", - box.left(), box.bottom(), box.right(), box.top(), - h_stats.get_total(), v_stats.get_total(), (width+height) /4); - tprintf("hstats median=%f, lq=%f, uq=%f, sd=%f\n", - h_stats.median(), h_stats.ile(0.25f), h_stats.ile(0.75f), - h_stats.sd()); - tprintf("vstats median=%f, lq=%f, uq=%f, sd=%f\n", - v_stats.median(), v_stats.ile(0.25f), v_stats.ile(0.75f), - v_stats.sd()); - - } if (h_stats.get_total() >= (width + height) / 4) { blob->set_horz_stroke_width(h_stats.ile(0.5f)); if (v_stats.get_total() >= (width + height) / 4) @@ -316,11 +169,9 @@ void SetBlobStrokeWidth(bool debug, BLOBNBOX* blob) { * Make a list of TO_BLOCKs for portrait and landscape orientation. **********************************************************************/ -void assign_blobs_to_blocks2( // split into groups +void assign_blobs_to_blocks2(Pix* pix, BLOCK_LIST *blocks, // blocks to process - TO_BLOCK_LIST *land_blocks, // ** unused ** - TO_BLOCK_LIST *port_blocks // output list - ) { + TO_BLOCK_LIST *port_blocks) { // output list BLOCK *block; // current block BLOBNBOX *newblob; // created blob C_BLOB *blob; // current blob @@ -332,7 +183,7 @@ void assign_blobs_to_blocks2( // split into groups TO_BLOCK *port_block; // created block for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - block = block_it.data (); + block = block_it.data(); port_block = new TO_BLOCK(block); // Convert the good outlines to block->blob_list @@ -341,7 +192,7 @@ void assign_blobs_to_blocks2( // split into groups for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { blob = blob_it.extract(); newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX. - SetBlobStrokeWidth(false, newblob); + SetBlobStrokeWidth(pix, newblob); port_box_it.add_after_then_move(newblob); } @@ -353,7 +204,7 @@ void assign_blobs_to_blocks2( // split into groups for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { blob = blob_it.extract(); newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX. - SetBlobStrokeWidth(false, newblob); + SetBlobStrokeWidth(pix, newblob); port_box_it.add_after_then_move(newblob); } @@ -361,6 +212,39 @@ void assign_blobs_to_blocks2( // split into groups } } +namespace tesseract { +/********************************************************************** + * find_components + * + * Find the C_OUTLINEs of the connected components in each block, put them + * in C_BLOBs, and filter them by size, putting the different size + * grades on different lists in the matching TO_BLOCK in to_blocks. + **********************************************************************/ + +void Textord::find_components(Pix* pix, BLOCK_LIST *blocks, + TO_BLOCK_LIST *to_blocks) { + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + if (width > MAX_INT16 || height > MAX_INT16) { + tprintf("Input image too large! (%d, %d)\n", width, height); + return; // Can't handle it. + } + + set_global_loc_code(LOC_EDGE_PROG); + + BLOCK_IT block_it(blocks); // iterator + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + BLOCK* block = block_it.data(); + if (block->poly_block() == NULL || block->poly_block()->IsText()) { + extract_edges(pix, block); + } + } + + assign_blobs_to_blocks2(pix, blocks, to_blocks); + ICOORD page_tr(width, height); + filter_blobs(page_tr, to_blocks, !textord_test_landscape); +} /********************************************************************** * filter_blobs @@ -368,27 +252,26 @@ void assign_blobs_to_blocks2( // split into groups * Sort the blobs into sizes in all the blocks for later work. **********************************************************************/ -void filter_blobs( //split into groups - ICOORD page_tr, //top right - TO_BLOCK_LIST *blocks, //output list - BOOL8 testing_on //for plotting - ) { - TO_BLOCK_IT block_it = blocks; //destination iterator - TO_BLOCK *block; //created block +void Textord::filter_blobs(ICOORD page_tr, // top right + TO_BLOCK_LIST *blocks, // output list + BOOL8 testing_on) { // for plotting + TO_BLOCK_IT block_it = blocks; // destination iterator + TO_BLOCK *block; // created block if (to_win != NULL) to_win->Clear(); - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - block->line_size = filter_noise_blobs (&block->blobs, + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + block = block_it.data(); + block->line_size = filter_noise_blobs(&block->blobs, &block->noise_blobs, &block->small_blobs, &block->large_blobs); - block->line_spacing = - block->line_size * (textord_merge_desc + textord_merge_x + - textord_merge_asc + - textord_merge_asc) / textord_merge_x; + block->line_spacing = block->line_size * + (tesseract::CCStruct::kDescenderFraction + + tesseract::CCStruct::kXHeightFraction + + 2 * tesseract::CCStruct::kAscenderFraction) / + tesseract::CCStruct::kXHeightFraction; block->line_size *= textord_min_linesize; block->max_blob_size = block->line_size * textord_excess_blobsize; #ifndef GRAPHICS_DISABLED @@ -400,76 +283,26 @@ void filter_blobs( //split into groups if (textord_show_boxes && testing_on) { if (to_win == NULL) create_to_win(page_tr); - plot_box_list (to_win, &block->noise_blobs, ScrollView::WHITE); - plot_box_list (to_win, &block->small_blobs, ScrollView::WHITE); - plot_box_list (to_win, &block->large_blobs, ScrollView::WHITE); - plot_box_list (to_win, &block->blobs, ScrollView::WHITE); + plot_box_list(to_win, &block->noise_blobs, ScrollView::WHITE); + plot_box_list(to_win, &block->small_blobs, ScrollView::WHITE); + plot_box_list(to_win, &block->large_blobs, ScrollView::WHITE); + plot_box_list(to_win, &block->blobs, ScrollView::WHITE); } #endif } } - /********************************************************************** * filter_noise_blobs * * Move small blobs to a separate list. **********************************************************************/ -float filter_noise_blobs( //separate noise - BLOBNBOX_LIST *src_list, //origonal list - BLOBNBOX_LIST *noise_list, //noise list - BLOBNBOX_LIST *small_list, //small blobs - BLOBNBOX_LIST *large_list //large blobs - ) { - inT16 height; //height of blob - inT16 width; //of blob - BLOBNBOX_IT src_it = src_list; //iterators - BLOBNBOX_IT noise_it = noise_list; - BLOBNBOX_IT small_it = small_list; - BLOBNBOX_IT large_it = large_list; - STATS size_stats (0, MAX_NEAREST_DIST); - //blob heights - if (textord_new_initial_xheight) - return filter_noise_blobs2 (src_list, noise_list, small_list, large_list); - float min_y; //size limits - float max_y; - float max_x; - - for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { - if (src_it.data ()->bounding_box ().height () < textord_max_noise_size) - noise_it.add_after_then_move (src_it.extract ()); - } - for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { - size_stats.add (src_it.data ()->bounding_box ().height (), 1); - } - min_y = floor (size_stats.ile (textord_blob_size_smallile / 100.0)); - max_y = ceil (size_stats.ile (textord_blob_size_bigile / 100.0)); - max_x = ceil (size_stats.ile (0.5) * textord_width_limit); - for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { - height = src_it.data ()->bounding_box ().height (); - width = src_it.data ()->bounding_box ().width (); - if (height < min_y) - small_it.add_after_then_move (src_it.extract ()); - else if (height > max_y || width > max_x) - large_it.add_after_then_move (src_it.extract ()); - } - return size_stats.ile (textord_initialx_ile); -} - - -/********************************************************************** - * filter_noise_blobs2 - * - * Move small blobs to a separate list. - **********************************************************************/ - -float filter_noise_blobs2( //separate noise - BLOBNBOX_LIST *src_list, //origonal list - BLOBNBOX_LIST *noise_list, //noise list - BLOBNBOX_LIST *small_list, //small blobs - BLOBNBOX_LIST *large_list //large blobs - ) { +float Textord::filter_noise_blobs( + BLOBNBOX_LIST *src_list, // original list + BLOBNBOX_LIST *noise_list, // noise list + BLOBNBOX_LIST *small_list, // small blobs + BLOBNBOX_LIST *large_list) { // large blobs inT16 height; //height of blob inT16 width; //of blob BLOBNBOX *blob; //current blob @@ -497,10 +330,11 @@ float filter_noise_blobs2( //separate noise size_stats.add (src_it.data ()->bounding_box ().height (), 1); } initial_x = size_stats.ile (textord_initialx_ile); - max_y = - ceil (initial_x * - (textord_merge_desc + textord_merge_x + - 2 * textord_merge_asc) / textord_merge_x); + max_y = ceil(initial_x * + (tesseract::CCStruct::kDescenderFraction + + tesseract::CCStruct::kXHeightFraction + + 2 * tesseract::CCStruct::kAscenderFraction) / + tesseract::CCStruct::kXHeightFraction); min_y = floor (initial_x / 2); max_x = ceil (initial_x * textord_width_limit); small_it.move_to_first (); @@ -526,82 +360,65 @@ float filter_noise_blobs2( //separate noise max_height = size_stats.ile (textord_initialasc_ile); // printf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,", // max_y,min_y,initial_x,max_height); - max_height *= textord_merge_x / (textord_merge_x + textord_merge_asc); + max_height *= tesseract::CCStruct::kXHeightCapRatio; if (max_height > initial_x) initial_x = max_height; // printf(" ret=%g\n",initial_x); return initial_x; } - -/********************************************************************** - * textord_page - * - * Textord the list of blobs and return a list of proper blocks. - **********************************************************************/ - -void textord_page( //make rows & words - ICOORD page_tr, //top right - BLOCK_LIST *blocks, //block list - TO_BLOCK_LIST *land_blocks, //rotated for landscape - TO_BLOCK_LIST *port_blocks, //output list - tesseract::Tesseract* tess - ) { - float gradient; //global skew - - set_global_loc_code(LOC_TEXT_ORD_ROWS); - gradient = make_rows (page_tr, blocks, land_blocks, port_blocks, tess); - if (global_monitor != NULL) { - global_monitor->ocr_alive = TRUE; - global_monitor->progress = 20; - } - set_global_loc_code(LOC_TEXT_ORD_WORDS); - make_words(page_tr, gradient, blocks, land_blocks, port_blocks, tess); - if (global_monitor != NULL) { - global_monitor->ocr_alive = TRUE; - global_monitor->progress = 30; - } - cleanup_blocks(blocks); //remove empties -#ifndef GRAPHICS_DISABLED - close_to_win(); -#endif - if (textord_exit_after && !interactive_mode) - exit (0); -} - /********************************************************************** * cleanup_blocks * * Delete empty blocks, rows from the page. **********************************************************************/ -void cleanup_blocks( //remove empties - BLOCK_LIST *blocks //list - ) { +void Textord::cleanup_blocks( //remove empties + BLOCK_LIST *blocks //list + ) { BLOCK_IT block_it = blocks; //iterator ROW_IT row_it; //row iterator + int num_rows = 0; + int num_rows_all = 0; + int num_blocks = 0; + int num_blocks_all = 0; for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { + block_it.forward ()) { + num_rows = 0; + num_rows_all = 0; row_it.set_to_list (block_it.data ()->row_list ()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + ++num_rows_all; clean_small_noise_from_words(row_it.data()); if ((textord_noise_rejrows - && !row_it.data ()->word_list ()->empty () - && clean_noise_from_row (row_it.data ())) - || row_it.data ()->word_list ()->empty ()) + && !row_it.data ()->word_list ()->empty () + && clean_noise_from_row (row_it.data ())) + || row_it.data ()->word_list ()->empty ()) delete row_it.extract ();//lose empty row else { if (textord_noise_rejwords) clean_noise_from_words (row_it.data ()); if (textord_blshift_maxshift >= 0) - tweak_row_baseline (row_it.data ()); + tweak_row_baseline(row_it.data(), + textord_blshift_maxshift, + textord_blshift_xfraction); + ++num_rows; } } - if (block_it.data ()->row_list ()->empty ()) { - delete block_it.extract ();//lose empty block + if (block_it.data()->row_list()->empty() && + (block_it.data()->poly_block() == NULL || + block_it.data()->poly_block()->IsText())) { + delete block_it.extract(); // Lose empty text blocks but not other types. + } else { + ++num_blocks; } + ++num_blocks_all; + if (textord_noise_debug) + tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all); } + if (textord_noise_debug) + tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all); } @@ -611,9 +428,9 @@ void cleanup_blocks( //remove empties * Move blobs of words from rows of garbage into the reject blobs list. **********************************************************************/ -BOOL8 clean_noise_from_row( //remove empties - ROW *row //row to clean - ) { +BOOL8 Textord::clean_noise_from_row( //remove empties + ROW *row //row to clean + ) { BOOL8 testing_on; TBOX blob_box; //bounding box C_BLOB *blob; //current blob @@ -678,7 +495,7 @@ BOOL8 clean_noise_from_row( //remove empties blob_box.width () > blob_box.height ()? blob_box.width () : blob_box.height (); if (blob_size >= textord_noise_sizelimit * row->x_height () - && blob_size < row->x_height () * 2) { + && blob_size < row->x_height () * 2) { trans_threshold = blob_size / textord_noise_sizefraction; trans_count = blob->count_transitions (trans_threshold); if (trans_count < textord_noise_translimit) @@ -713,16 +530,15 @@ BOOL8 clean_noise_from_row( //remove empties && dot_count > norm_count * textord_noise_rowratio && dot_count > 2; } - /********************************************************************** * clean_noise_from_words * * Move blobs of words from rows of garbage into the reject blobs list. **********************************************************************/ -void clean_noise_from_words( //remove empties - ROW *row //row to clean - ) { +void Textord::clean_noise_from_words( //remove empties + ROW *row //row to clean + ) { TBOX blob_box; //bounding box inT8 *word_dud; //was it chucked C_BLOB *blob; //current blob @@ -833,7 +649,7 @@ void clean_noise_from_words( //remove empties // Remove outlines that are a tiny fraction in either width or height // of the word height. -void clean_small_noise_from_words(ROW *row) { +void Textord::clean_small_noise_from_words(ROW *row) { WERD_IT word_it(row->word_list()); for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { WERD* word = word_it.data(); @@ -864,7 +680,7 @@ void clean_small_noise_from_words(ROW *row) { } } } - +} // tesseract /********************************************************************** * tweak_row_baseline @@ -873,10 +689,10 @@ void clean_small_noise_from_words(ROW *row) { * close enough. **********************************************************************/ -void tweak_row_baseline( //remove empties - ROW *row //row to clean - ) { - TBOX blob_box; //bounding box +void tweak_row_baseline(ROW *row, + double blshift_maxshift, + double blshift_xfraction) { + TBOX blob_box; //bounding box C_BLOB *blob; //current blob WERD *word; //current word inT32 blob_count; //no of blobs @@ -922,9 +738,8 @@ void tweak_row_baseline( //remove empties ydiff = -ydiff / row->x_height (); else ydiff = ydiff / row->x_height (); - if (ydiff < textord_blshift_maxshift - && blob_box.height () / row->x_height () > - textord_blshift_xfraction) { + if (ydiff < blshift_maxshift + && blob_box.height () / row->x_height () > blshift_xfraction) { if (xstarts[dest_index] >= x_centre) xstarts[dest_index] = blob_box.left (); coeffs[dest_index * 3] = 0; @@ -981,7 +796,6 @@ void tweak_row_baseline( //remove empties free_mem(coeffs); } - /********************************************************************** * blob_y_order * @@ -1011,4 +825,3 @@ inT32 blob_y_order( //sort function return 0; } } - diff --git a/textord/tordmain.h b/textord/tordmain.h index ed56dcd29d..9868ccbbdb 100644 --- a/textord/tordmain.h +++ b/textord/tordmain.h @@ -21,56 +21,17 @@ #define TORDMAIN_H #include -#include "varable.h" +#include "params.h" #include "ocrblock.h" -#include "tessclas.h" +#include "blobs.h" #include "blobbox.h" #include "notdll.h" +struct Pix; namespace tesseract { class Tesseract; } -extern BOOL_VAR_H (textord_show_blobs, FALSE, "Display unsorted blobs"); -extern BOOL_VAR_H (textord_new_initial_xheight, TRUE, -"Use test xheight mechanism"); -extern BOOL_VAR_H (textord_exit_after, FALSE, -"Exit after completing textord"); -extern INT_VAR_H (textord_max_noise_size, 7, "Pixel size of noise"); -extern double_VAR_H (textord_blob_size_bigile, 95, -"Percentile for large blobs"); -extern double_VAR_H (textord_noise_area_ratio, 0.7, -"Fraction of bounding box for noise"); -extern double_VAR_H (textord_blob_size_smallile, 20, -"Percentile for small blobs"); -extern double_VAR_H (textord_initialx_ile, 0.75, -"Ile of sizes for xheight guess"); -extern double_VAR_H (textord_initialasc_ile, 0.90, -"Ile of sizes for xheight guess"); -extern INT_VAR_H (textord_noise_sizefraction, 10, -"Fraction of size for maxima"); -extern double_VAR_H (textord_noise_sizelimit, 0.5, -"Fraction of x for big t count"); -extern INT_VAR_H (textord_noise_translimit, 16, -"Transitions for normal blob"); -extern double_VAR_H (textord_noise_normratio, 2.0, -"Dot to norm ratio for deletion"); -extern BOOL_VAR_H (textord_noise_rejwords, TRUE, "Reject noise-like words"); -extern BOOL_VAR_H (textord_noise_rejrows, TRUE, "Reject noise-like rows"); -extern double_VAR_H (textord_noise_syfract, 0.2, -"xh fract error for norm blobs"); -extern double_VAR_H (textord_noise_sxfract, 0.4, -"xh fract width error for norm blobs"); -extern INT_VAR_H (textord_noise_sncount, 1, "super norm blobs to save row"); -extern double_VAR_H (textord_noise_rowratio, 6.0, -"Dot to norm ratio for deletion"); -extern BOOL_VAR_H (textord_noise_debug, FALSE, "Debug row garbage detector"); -extern double_VAR_H (textord_blshift_maxshift, 0.00, "Max baseline shift"); -extern double_VAR_H (textord_blshift_xfraction, 9.99, -"Min size of baseline shift"); - //xiaofan -extern STRING_EVAR_H (tessedit_image_ext, ".tif", "Externsion for image file"); -extern clock_t previous_cpu; void make_blocks_from_blobs( //convert & textord TBLOB *tessblobs, //tess style input const char *filename, //blob file @@ -78,34 +39,9 @@ void make_blocks_from_blobs( //convert & textord BOOL8 do_shift, //shift tess coords BLOCK_LIST *blocks //block list ); -void find_components( // find components in blocks - BLOCK_LIST *blocks, - TO_BLOCK_LIST *land_blocks, - TO_BLOCK_LIST *port_blocks, - TBOX *page_box); -void SetBlobStrokeWidth(bool debug, BLOBNBOX* blob); -void assign_blobs_to_blocks2( //split into groups - BLOCK_LIST *blocks, //blocks to process - TO_BLOCK_LIST *land_blocks, //rotated for landscape - TO_BLOCK_LIST *port_blocks //output list - ); -void filter_blobs( //split into groups - ICOORD page_tr, //top right - TO_BLOCK_LIST *blocks, //output list - BOOL8 testing_on //for plotting - ); -float filter_noise_blobs( //separate noise - BLOBNBOX_LIST *src_list, //origonal list - BLOBNBOX_LIST *noise_list, //noise list - BLOBNBOX_LIST *small_list, //small blobs - BLOBNBOX_LIST *large_list //large blobs - ); -float filter_noise_blobs2( //separate noise - BLOBNBOX_LIST *src_list, //origonal list - BLOBNBOX_LIST *noise_list, //noise list - BLOBNBOX_LIST *small_list, //small blobs - BLOBNBOX_LIST *large_list //large blobs - ); +void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob); +void assign_blobs_to_blocks2(Pix* pix, BLOCK_LIST *blocks, + TO_BLOCK_LIST *port_blocks); void textord_page( //make rows & words ICOORD page_tr, //top right BLOCK_LIST *blocks, //block list @@ -113,21 +49,9 @@ void textord_page( //make rows & words TO_BLOCK_LIST *port_blocks, //output list tesseract::Tesseract* ); -void cleanup_blocks( //remove empties - BLOCK_LIST *blocks //list - ); -BOOL8 clean_noise_from_row( //remove empties - ROW *row //row to clean - ); -void clean_noise_from_words( //remove empties - ROW *row //row to clean - ); -// Remove outlines that are a tiny fraction in either width or height -// of the word height. -void clean_small_noise_from_words(ROW *row); -void tweak_row_baseline( //remove empties - ROW *row //row to clean - ); +void tweak_row_baseline(ROW *row, + double blshift_maxshift, + double blshift_xfraction); inT32 blob_y_order( //sort function void *item1, //items to compare void *item2); diff --git a/textord/tospace.cpp b/textord/tospace.cpp index 062fc18863..1fe1a29163 100644 --- a/textord/tospace.cpp +++ b/textord/tospace.cpp @@ -1,7 +1,26 @@ +/********************************************************************** + * tospace.cpp + * + * Compute fuzzy word spacing thresholds for each row. + * I.e. set : max_nonspace + * space_threshold + * min_space + * kern_size + * space_size + * for each row. + * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE + * + * Note: functions in this file were originally not members of any + * class or enclosed by any namespace. Now they are all static members + * of the Textord class. + * + **********************************************************************/ + +#include "textord.h" #include "mfcpch.h" #include "tovars.h" #include "drawtord.h" -#include "tospace.h" +#include "textord.h" #include "ndminx.h" #include "statistc.h" @@ -10,128 +29,25 @@ #include "config_auto.h" #endif -BOOL_VAR(tosp_old_to_method, FALSE, "Space stats use prechopping?"); -BOOL_VAR(tosp_only_use_prop_rows, TRUE, -"Block stats to use fixed pitch rows?"); -BOOL_VAR(tosp_force_wordbreak_on_punct, FALSE, -"Force word breaks on punct to break long lines in non-space delimited langs"); -BOOL_VAR(tosp_use_pre_chopping, FALSE, -"Space stats use prechopping?"); -BOOL_VAR(tosp_old_to_bug_fix, FALSE, "Fix suspected bug in old code"); -BOOL_VAR(tosp_block_use_cert_spaces, TRUE, -"Only stat OBVIOUS spaces"); -BOOL_VAR(tosp_row_use_cert_spaces, TRUE, "Only stat OBVIOUS spaces"); -BOOL_VAR(tosp_narrow_blobs_not_cert, TRUE, -"Only stat OBVIOUS spaces"); -BOOL_VAR(tosp_row_use_cert_spaces1, TRUE, "Only stat OBVIOUS spaces"); -BOOL_VAR(tosp_recovery_isolated_row_stats, TRUE, -"Use row alone when inadequate cert spaces"); -BOOL_VAR(tosp_only_small_gaps_for_kern, FALSE, "Better guess"); -BOOL_VAR(tosp_all_flips_fuzzy, FALSE, "Pass ANY flip to context?"); -BOOL_VAR(tosp_fuzzy_limit_all, TRUE, -"Dont restrict kn->sp fuzzy limit to tables"); -BOOL_VAR(tosp_stats_use_xht_gaps, TRUE, -"Use within xht gap for wd breaks"); -BOOL_VAR(tosp_use_xht_gaps, TRUE, "Use within xht gap for wd breaks"); -BOOL_VAR(tosp_only_use_xht_gaps, FALSE, -"Only use within xht gap for wd breaks"); -BOOL_VAR(tosp_rule_9_test_punct, FALSE, -"Dont chng kn to space next to punct"); -BOOL_VAR(tosp_flip_fuzz_kn_to_sp, TRUE, "Default flip"); -BOOL_VAR(tosp_flip_fuzz_sp_to_kn, TRUE, "Default flip"); -BOOL_VAR(tosp_improve_thresh, FALSE, "Enable improvement heuristic"); -INT_VAR(tosp_debug_level, 0, "Debug data"); -INT_VAR(tosp_enough_space_samples_for_median, 3, -"or should we use mean"); -INT_VAR(tosp_redo_kern_limit, 10, -"No.samples reqd to reestimate for row"); -INT_VAR(tosp_few_samples, 40, -"No.gaps reqd with 1 large gap to treat as a table"); -INT_VAR(tosp_short_row, 20, -"No.gaps reqd with few cert spaces to use certs"); -INT_VAR(tosp_sanity_method, 1, "How to avoid being silly"); -double_VAR(tosp_threshold_bias1, 0, -"how far between kern and space?"); -double_VAR(tosp_threshold_bias2, 0, -"how far between kern and space?"); -double_VAR(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow"); -double_VAR(tosp_narrow_aspect_ratio, 0.48, -"narrow if w/h less than this"); -double_VAR(tosp_wide_fraction, 0.52, "Fract of xheight for wide"); -double_VAR(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this"); -double_VAR(tosp_fuzzy_space_factor, 0.6, -"Fract of xheight for fuzz sp"); -double_VAR(tosp_fuzzy_space_factor1, 0.5, -"Fract of xheight for fuzz sp"); -double_VAR(tosp_fuzzy_space_factor2, 0.72, -"Fract of xheight for fuzz sp"); -double_VAR(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern"); -double_VAR(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp"); -double_VAR(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp"); -double_VAR(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp"); -double_VAR(tosp_ignore_big_gaps, -1, "xht multiplier"); -double_VAR(tosp_ignore_very_big_gaps, 3.5, "xht multiplier"); -double_VAR(tosp_rep_space, 1.6, "rep gap multiplier for space"); -double_VAR(tosp_enough_small_gaps, 0.65, -"Fract of kerns reqd for isolated row stats"); -double_VAR(tosp_table_kn_sp_ratio, 2.25, -"Min difference of kn & sp in table"); -double_VAR(tosp_table_xht_sp_ratio, 0.33, -"Expect spaces bigger than this"); -double_VAR(tosp_table_fuzzy_kn_sp_ratio, 3.0, -"Fuzzy if less than this"); -double_VAR(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg"); -double_VAR(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg"); -double_VAR(tosp_min_sane_kn_sp, 1.5, -"Dont trust spaces less than this time kn"); -double_VAR(tosp_init_guess_kn_mult, 2.2, -"Thresh guess - mult kn by this"); -double_VAR(tosp_init_guess_xht_mult, 0.28, -"Thresh guess - mult xht by this"); -double_VAR(tosp_max_sane_kn_thresh, 5.0, -"Multiplier on kn to limit thresh"); -double_VAR(tosp_flip_caution, 0.0, -"Dont autoflip kn to sp when large separation"); -double_VAR(tosp_large_kerning, 0.19, -"Limit use of xht gap with large kns"); -double_VAR(tosp_dont_fool_with_small_kerns, -1, -"Limit use of xht gap with odd small kns"); -double_VAR(tosp_near_lh_edge, 0, -"Dont reduce box if the top left is non blank"); -double_VAR(tosp_silly_kn_sp_gap, 0.2, -"Dont let sp minus kn get too small"); -double_VAR(tosp_pass_wide_fuzz_sp_to_context, 0.75, -"How wide fuzzies need context"); - #define MAXSPACING 128 /*max expected spacing in pix */ -/********************************************************************** - * to_spacing - * - * Compute fuzzy word spacing thresholds for each row. - * I.e. set : max_nonspace - * space_threshold - * min_space - * kern_size - * space_size for each row. - * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE - **********************************************************************/ -void to_spacing( //set spacing - ICOORD page_tr, //topright of page - TO_BLOCK_LIST *blocks //blocks on page - ) { +namespace tesseract { +void Textord::to_spacing( + ICOORD page_tr, //topright of page + TO_BLOCK_LIST *blocks //blocks on page + ) { TO_BLOCK_IT block_it; //iterator TO_BLOCK *block; //current block; TO_ROW_IT row_it; //row iterator TO_ROW *row; //current row int block_index; //block number int row_index; //row number - inT16 block_space_gap_width; //Estimated width of real spaces for whole block - //Estimate width ofnon space gaps for whole block + //estimated width of real spaces for whole block + inT16 block_space_gap_width; + //estimated width of non space gaps for whole block inT16 block_non_space_gap_width; - //Old fixed/prop result - BOOL8 old_text_ord_proportional; - GAPMAP *gapmap = NULL; //map of big vert gaps in blk + BOOL8 old_text_ord_proportional;//old fixed/prop result + GAPMAP *gapmap = NULL; //map of big vert gaps in blk block_it.set_to_list (blocks); block_index = 1; @@ -144,6 +60,16 @@ void to_spacing( //set spacing old_text_ord_proportional, block_space_gap_width, block_non_space_gap_width); + // Make sure relative values of block-level space and non-space gap + // widths are reasonable. The ratio of 1:3 is also used in + // block_spacing_stats, to corrrect the block_space_gap_width + // Useful for arabic and hindi, when the non-space gap width is + // often over-estimated and should not be trusted. A similar ratio + // is found in block_spacing_stats. + if (tosp_old_to_method && tosp_old_to_constrain_sp_kn && + (float) block_space_gap_width / block_non_space_gap_width < 3.0) { + block_non_space_gap_width = (inT16) floor (block_space_gap_width / 3.0); + } row_it.set_to_list (block->get_rows ()); row_index = 1; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { @@ -183,13 +109,13 @@ void to_spacing( //set spacing * block_spacing_stats() *************************************************************************/ -void block_spacing_stats( //DEBUG USE ONLY - TO_BLOCK *block, - GAPMAP *gapmap, - BOOL8 &old_text_ord_proportional, - inT16 &block_space_gap_width, //resulting estimate - inT16 &block_non_space_gap_width //resulting estimate - ) { +void Textord::block_spacing_stats( + TO_BLOCK *block, + GAPMAP *gapmap, + BOOL8 &old_text_ord_proportional, + inT16 &block_space_gap_width, //resulting estimate + inT16 &block_non_space_gap_width //resulting estimate + ) { TO_ROW_IT row_it; //row iterator TO_ROW *row; //current row BLOBNBOX_IT blob_it; //iterator @@ -240,12 +166,12 @@ void block_spacing_stats( //DEBUG USE ONLY minwidth = blob_box.width (); gap_width = blob_box.left () - prev_blob_box.right (); if (!ignore_big_gap (row, row_length, gapmap, - prev_blob_box.right (), blob_box.left ())) { + prev_blob_box.right (), blob_box.left ())) { all_gap_stats.add (gap_width, 1); centre_to_centre = (blob_box.left () + blob_box.right () - (prev_blob_box.left () + - prev_blob_box.right ())) / 2; + prev_blob_box.right ())) / 2; //DEBUG centre_to_centre_stats.add (centre_to_centre, 1); // DEBUG @@ -321,7 +247,7 @@ void block_spacing_stats( //DEBUG USE ONLY If tosp_use_cert_spaces is enabled, the estimate of the space gap is restricted to obvious spaces - those wider than half the xht or those with wide blobs on both sides - i.e not things that are suspect 1's or - punctiation that is sometimes widely spaced. + punctuation that is sometimes widely spaced. */ if (!tosp_block_use_cert_spaces || (gap_width > @@ -356,16 +282,15 @@ void block_spacing_stats( //DEBUG USE ONLY * Set values for min_space, max_non_space based on row stats only * If failure - return 0 values. *************************************************************************/ - -void row_spacing_stats( //estimate for block - TO_ROW *row, - GAPMAP *gapmap, - inT16 block_idx, - inT16 row_idx, - inT16 block_space_gap_width, - inT16 block_non_space_gap_width //estimate for block - ) { - //iterator +void Textord::row_spacing_stats( + TO_ROW *row, + GAPMAP *gapmap, + inT16 block_idx, + inT16 row_idx, + inT16 block_space_gap_width, //estimate for block + inT16 block_non_space_gap_width //estimate for block + ) { + //iterator BLOBNBOX_IT blob_it = row->blob_list (); STATS all_gap_stats (0, MAXSPACING); STATS cert_space_gap_stats (0, MAXSPACING); @@ -396,7 +321,7 @@ void row_spacing_stats( //estimate for block block_non_space_gap_width + inT16 (floor (0.5 + tosp_threshold_bias1 * (block_space_gap_width - - block_non_space_gap_width))); + block_non_space_gap_width))); else real_space_threshold = //Old TO method (block_space_gap_width + block_non_space_gap_width) / 2; @@ -443,8 +368,8 @@ void row_spacing_stats( //estimate for block } } suspected_table = (large_gap_count > 1) || - ((large_gap_count > 0) && - (all_gap_stats.get_total () <= tosp_few_samples)); + ((large_gap_count > 0) && + (all_gap_stats.get_total () <= tosp_few_samples)); /* Now determine row kern size, space size and threshold */ @@ -452,17 +377,17 @@ void row_spacing_stats( //estimate for block tosp_enough_space_samples_for_median) || ((suspected_table || all_gap_stats.get_total () <= tosp_short_row) && - cert_space_gap_stats.get_total () > 0)) + cert_space_gap_stats.get_total () > 0)) { old_to_method(row, &all_gap_stats, &cert_space_gap_stats, &small_gap_stats, block_space_gap_width, block_non_space_gap_width); - else { + } else { if (!tosp_recovery_isolated_row_stats || - !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table, - block_idx, row_idx)) { + !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table, + block_idx, row_idx)) { if (tosp_row_use_cert_spaces && (tosp_debug_level > 5)) tprintf ("B:%d R:%d -- Inadequate certain spaces.\n", block_idx, row_idx); @@ -474,7 +399,8 @@ void row_spacing_stats( //estimate for block else row->kern_size = block_non_space_gap_width; row->space_threshold = - inT32 (floor ((row->space_size + row->kern_size) / 2)); + inT32 (floor ((row->space_size + row->kern_size) / + tosp_old_sp_kn_th_factor)); } else old_to_method(row, @@ -494,7 +420,7 @@ void row_spacing_stats( //estimate for block if (tosp_sanity_method == 0) { if (suspected_table && (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) { - if (tosp_debug_level > 0) + if (tosp_debug_level > 5) tprintf ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx, row_idx, row->kern_size, row->space_threshold, row->space_size); @@ -516,20 +442,21 @@ void row_spacing_stats( //estimate for block sane_space = MAX (tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5), row->xheight / 2); - if (tosp_debug_level > 0) + if (tosp_debug_level > 5) tprintf ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n", block_idx, row_idx, row->kern_size, row->space_threshold, row->space_size, sane_space); row->space_size = sane_space; row->space_threshold = - inT32 (floor ((row->space_size + row->kern_size) / 2)); + inT32 (floor ((row->space_size + row->kern_size) / + tosp_old_sp_kn_th_factor)); } /* NEVER let threshold get VERY far away from kern */ sane_threshold = inT32 (floor (tosp_max_sane_kn_thresh * MAX (row->kern_size, 2.5))); if (row->space_threshold > sane_threshold) { - if (tosp_debug_level > 0) + if (tosp_debug_level > 5) tprintf ("B:%d R:%d -- DONT BELIEVE THRESH %3.2f %d %3.2f->%d.\n", block_idx, row_idx, row->kern_size, @@ -546,7 +473,7 @@ void row_spacing_stats( //estimate for block if ((row->space_size < sane_space) || (row->space_threshold < sane_threshold)) { - if (tosp_debug_level > 0) + if (tosp_debug_level > 5) tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n", block_idx, row_idx, row->kern_size, @@ -627,15 +554,16 @@ void row_spacing_stats( //estimate for block (inT32) ceil (tosp_table_fuzzy_kn_sp_ratio * row->kern_size)); - if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold)) + if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold)) { row->max_nonspace = (inT32) floor (0.5 + row->kern_size + tosp_fuzzy_kn_fraction * (row->space_threshold - row->kern_size)); - - if (row->max_nonspace > row->space_threshold) + } + if (row->max_nonspace > row->space_threshold) { //Dont be silly row->max_nonspace = row->space_threshold; + } if (tosp_debug_level > 5) tprintf @@ -644,17 +572,21 @@ void row_spacing_stats( //estimate for block block_space_gap_width, real_space_threshold, row->kern_size, row->max_nonspace, row->space_threshold, row->min_space, row->space_size); + if (tosp_debug_level > 10) + tprintf("row->kern_size = %3.2f, row->space_size = %3.2f, " + "row->space_threshold = %d\n", + row->kern_size, row->space_size, row->space_threshold); } - -void old_to_method( //estimate for block - TO_ROW *row, - STATS *all_gap_stats, - STATS *space_gap_stats, - STATS *small_gap_stats, - inT16 block_space_gap_width, - inT16 block_non_space_gap_width //estimate for block - ) { +void Textord::old_to_method( + TO_ROW *row, + STATS *all_gap_stats, + STATS *space_gap_stats, + STATS *small_gap_stats, + inT16 block_space_gap_width, //estimate for block + inT16 block_non_space_gap_width //estimate for block + ) { + /* First, estimate row space size */ /* Old to condition was > 2 */ if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) { //Adequate samples @@ -684,25 +616,27 @@ void old_to_method( //estimate for block if (row->space_size < (block_non_space_gap_width * 3) + 1) row->space_size = (block_non_space_gap_width * 3) + 1; } - else + else { //Use block default row->space_size = block_space_gap_width; + } + /* Next, estimate row kern size */ if ((tosp_only_small_gaps_for_kern) && (small_gap_stats->get_total () > tosp_redo_kern_limit)) row->kern_size = small_gap_stats->median (); else if (all_gap_stats->get_total () > tosp_redo_kern_limit) row->kern_size = all_gap_stats->median (); - else - //old TO -SAME FOR ALL ROWS + else //old TO -SAME FOR ALL ROWS row->kern_size = block_non_space_gap_width; - if (tosp_threshold_bias2 > 0) + /* Finally, estimate row space threshold */ + if (tosp_threshold_bias2 > 0) { row->space_threshold = - inT32 (floor (0.5 + row->kern_size + - tosp_threshold_bias2 * (row->space_size - - row->kern_size))); - else + inT32 (floor (0.5 + row->kern_size + + tosp_threshold_bias2 * (row->space_size - + row->kern_size))); + } else { /* NOTE old text ord uses (space_size + kern_size + 1)/2 as the threshold and holds this in a float. The use is with a >= test @@ -712,7 +646,22 @@ void old_to_method( //estimate for block and kern_size.) */ row->space_threshold = - inT32 (floor ((row->space_size + row->kern_size) / 2)); + inT32 (floor ((row->space_size + row->kern_size) / 2)); + } + + // Apply the same logic and ratios as in row_spacing_stats to + // restrict relative values of the row's space_size, kern_size, and + // space_threshold + if (tosp_old_to_constrain_sp_kn && tosp_sanity_method == 1 && + ((row->space_size < + tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5)) || + ((row->space_size - row->kern_size) < + tosp_silly_kn_sp_gap * row->xheight))) { + if (row->kern_size > 2.5) + row->kern_size = row->space_size / tosp_min_sane_kn_sp; + row->space_threshold = inT32 (floor ((row->space_size + row->kern_size) / + tosp_old_sp_kn_th_factor)); + } } @@ -720,18 +669,17 @@ void old_to_method( //estimate for block * isolated_row_stats() * Set values for min_space, max_non_space based on row stats only *************************************************************************/ - -BOOL8 isolated_row_stats(TO_ROW *row, - GAPMAP *gapmap, - STATS *all_gap_stats, - BOOL8 suspected_table, - inT16 block_idx, - inT16 row_idx) { +BOOL8 Textord::isolated_row_stats(TO_ROW *row, + GAPMAP *gapmap, + STATS *all_gap_stats, + BOOL8 suspected_table, + inT16 block_idx, + inT16 row_idx) { float kern_estimate; float crude_threshold_estimate; inT16 small_gaps_count; inT16 total; - //iterator + //iterator BLOBNBOX_IT blob_it = row->blob_list (); STATS cert_space_gap_stats (0, MAXSPACING); STATS all_space_gap_stats (0, MAXSPACING); @@ -819,7 +767,7 @@ BOOL8 isolated_row_stats(TO_ROW *row, if ((row->kern_size >= row->space_threshold) || (row->space_threshold >= row->space_size) || (row->space_threshold <= 0)) { - if (tosp_debug_level > 0) + if (tosp_debug_level > 5) tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n", block_idx, row_idx, row->kern_size, row->space_threshold, row->space_size); @@ -836,8 +784,7 @@ BOOL8 isolated_row_stats(TO_ROW *row, return TRUE; } - -inT16 stats_count_under(STATS *stats, inT16 threshold) { +inT16 Textord::stats_count_under(STATS *stats, inT16 threshold) { inT16 index; inT16 total = 0; @@ -862,8 +809,7 @@ inT16 stats_count_under(STATS *stats, inT16 threshold) { * try moving the default threshold to within this band but leave the * fuzzy limit calculation as at present. *************************************************************************/ - -void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) { +void Textord::improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) { float sp = row->space_size; float kn = row->kern_size; inT16 reqd_zero_width = 0; @@ -937,11 +883,10 @@ void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) { * * Convert a TO_BLOCK to a BLOCK. **********************************************************************/ - -ROW *make_prop_words( //find lines - TO_ROW *row, //row to make - FCOORD rotation //for drawing - ) { +ROW *Textord::make_prop_words( + TO_ROW *row, // row to make + FCOORD rotation // for drawing + ) { BOOL8 bol; //start of line /* prev_ values are for start of word being built. non prev_ values are for the gap between the word being built and the next one. */ @@ -951,6 +896,8 @@ ROW *make_prop_words( //find lines BOOL8 fuzzy_sp; //probably space BOOL8 fuzzy_non; //probably not uinT8 blanks; //in front of word + BOOL8 prev_gap_was_a_space = FALSE; + BOOL8 break_at_next_gap = FALSE; ROW *real_row; //output row OUTLINE_IT out_it; //outlines C_OUTLINE_IT cout_it; @@ -979,9 +926,7 @@ ROW *make_prop_words( //find lines inT16 current_within_xht_gap = MAX_INT16; inT16 next_within_xht_gap = MAX_INT16; inT16 word_count = 0; - static inT16 row_count = 0; - row_count++; rep_char_it.set_to_list (&(row->rep_words)); if (!rep_char_it.empty ()) { next_rep_char_word_right = @@ -1098,7 +1043,9 @@ ROW *make_prop_words( //find lines make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box, current_gap, current_within_xht_gap, next_blob_box, next_gap_arg, - blanks, fuzzy_sp, fuzzy_non) || + blanks, fuzzy_sp, fuzzy_non, + prev_gap_was_a_space, + break_at_next_gap) || box_it.at_first()) { /* Form a new word out of the blobs collected */ if (!blob_it.empty ()) { @@ -1234,9 +1181,9 @@ ROW *make_prop_words( //find lines //put words in row word_it.add_list_after (&words); real_row->recalc_bounding_box (); - if (tosp_debug_level > 9) { - tprintf ("Row %d Made %d words in row ((%d,%d)(%d,%d))\n", - row_count, + + if (tosp_debug_level > 4) { + tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n", word_count, real_row->bounding_box ().left (), real_row->bounding_box ().bottom (), @@ -1254,11 +1201,10 @@ ROW *make_prop_words( //find lines * Converts words into blobs so that each blob is a single character. * Used for chopper test. **********************************************************************/ - -ROW *make_blob_words( // find lines - TO_ROW *row, // row to make - FCOORD rotation // for drawing - ) { +ROW *Textord::make_blob_words( + TO_ROW *row, // row to make + FCOORD rotation // for drawing + ) { bool bol; // start of line ROW *real_row; // output row C_OUTLINE_IT cout_it; @@ -1272,9 +1218,6 @@ ROW *make_blob_words( // find lines TBOX blob_box; // bounding box BLOBNBOX_IT box_it; // iterator inT16 word_count = 0; - static inT16 row_count = 0; - - row_count++; cblob_it.set_to_list(&cblobs); box_it.set_to_list(row->blob_list()); @@ -1323,9 +1266,8 @@ ROW *make_blob_words( // find lines //put words in row word_it.add_list_after(&words); real_row->recalc_bounding_box(); - if (tosp_debug_level > 9) { - tprintf ("Row %d Made %d words in row ((%d,%d)(%d,%d))\n", - row_count, + if (tosp_debug_level > 4) { + tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n", word_count, real_row->bounding_box().left(), real_row->bounding_box().bottom(), @@ -1337,20 +1279,20 @@ ROW *make_blob_words( // find lines return NULL; } -BOOL8 make_a_word_break( //decide on word break - TO_ROW *row, //row being made - TBOX blob_box, //for next_blob //how many blanks? - inT16 prev_gap, - TBOX prev_blob_box, - inT16 real_current_gap, - inT16 within_xht_current_gap, - TBOX next_blob_box, - inT16 next_gap, - uinT8 &blanks, - BOOL8 &fuzzy_sp, - BOOL8 &fuzzy_non) { - static BOOL8 prev_gap_was_a_space = FALSE; - static BOOL8 break_at_next_gap = FALSE; +BOOL8 Textord::make_a_word_break( + TO_ROW *row, // row being made + TBOX blob_box, // for next_blob // how many blanks? + inT16 prev_gap, + TBOX prev_blob_box, + inT16 real_current_gap, + inT16 within_xht_current_gap, + TBOX next_blob_box, + inT16 next_gap, + uinT8 &blanks, + BOOL8 &fuzzy_sp, + BOOL8 &fuzzy_non, + BOOL8& prev_gap_was_a_space, + BOOL8& break_at_next_gap) { BOOL8 space; inT16 current_gap; float fuzzy_sp_to_kn_limit; @@ -1574,9 +1516,9 @@ BOOL8 make_a_word_break( //decide on word break /* Heuristics to turn dubious kerns to spaces */ /* TRIED THIS BUT IT MADE THINGS WORSE if ( prev_gap == MAX_INT16 ) - prev_gap = 0; //start of row + prev_gap = 0; // start of row if ( next_gap == MAX_INT16 ) - next_gap = 0; //end of row + next_gap = 0; // end of row */ if ((prev_blob_box.width () > 0) && (next_blob_box.width () > 0) && @@ -1602,15 +1544,15 @@ BOOL8 make_a_word_break( //decide on word break prev_gap, prev_blob_box.width (), current_gap, next_blob_box.width (), next_gap); #endif - } - else if ((prev_blob_box.width () > 0) && - (next_blob_box.width () > 0) && - (current_gap >= - tosp_kern_gap_factor2 * MAX (prev_gap, next_gap)) && - !(narrow_blob (row, prev_blob_box) || - suspected_punct_blob (row, prev_blob_box)) && - !(narrow_blob (row, next_blob_box) || - suspected_punct_blob (row, next_blob_box))) { + } else if (prev_blob_box.width() > 0 && + next_blob_box.width() > 0 && + current_gap > 5 && // Rule 9 handles small gap, big ratio. + current_gap >= + tosp_kern_gap_factor2 * MAX(prev_gap, next_gap) && + !(narrow_blob(row, prev_blob_box) || + suspected_punct_blob(row, prev_blob_box)) && + !(narrow_blob(row, next_blob_box) || + suspected_punct_blob(row, next_blob_box))) { space = TRUE; fuzzy_non = TRUE; #ifndef GRAPHICS_DISABLED @@ -1635,25 +1577,25 @@ BOOL8 make_a_word_break( //decide on word break #endif } } + if (tosp_debug_level > 10) + tprintf("word break = %d current_gap = %d, prev_gap = %d, " + "next_gap = %d\n", space ? 1 : 0, current_gap, + prev_gap, next_gap); prev_gap_was_a_space = space && !(fuzzy_non); return space; } } - -BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box) { +BOOL8 Textord::narrow_blob(TO_ROW *row, TBOX blob_box) { BOOL8 result; - result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) || (((float) blob_box.width () / blob_box.height ()) <= tosp_narrow_aspect_ratio)); return result; } - -BOOL8 wide_blob(TO_ROW *row, TBOX blob_box) { +BOOL8 Textord::wide_blob(TO_ROW *row, TBOX blob_box) { BOOL8 result; - if (tosp_wide_fraction > 0) { if (tosp_wide_aspect_ratio > 0) result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) && @@ -1667,14 +1609,11 @@ BOOL8 wide_blob(TO_ROW *row, TBOX blob_box) { return result; } - -BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box) { +BOOL8 Textord::suspected_punct_blob(TO_ROW *row, TBOX box) { BOOL8 result; float baseline; float blob_x_centre; - /* Find baseline of centre of blob */ - blob_x_centre = (box.right () + box.left ()) / 2.0; baseline = row->baseline.y (blob_x_centre); @@ -1685,12 +1624,11 @@ BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box) { } -void peek_at_next_gap( //A COPY FOR PEEKING - TO_ROW *row, - BLOBNBOX_IT box_it, - TBOX &next_blob_box, - inT16 &next_gap, - inT16 &next_within_xht_gap) { +void Textord::peek_at_next_gap(TO_ROW *row, + BLOBNBOX_IT box_it, + TBOX &next_blob_box, + inT16 &next_gap, + inT16 &next_within_xht_gap) { TBOX next_reduced_blob_box; TBOX bit_beyond; BLOBNBOX_IT reduced_box_it = box_it; @@ -1712,14 +1650,14 @@ void peek_at_next_gap( //A COPY FOR PEEKING #ifndef GRAPHICS_DISABLED -void mark_gap( //Debug stuff - TBOX blob, //blob following gap - inT16 rule, // heuristic id - inT16 prev_gap, - inT16 prev_blob_width, - inT16 current_gap, - inT16 next_blob_width, - inT16 next_gap) { +void Textord::mark_gap( + TBOX blob, // blob following gap + inT16 rule, // heuristic id + inT16 prev_gap, + inT16 prev_blob_width, + inT16 current_gap, + inT16 next_blob_width, + inT16 next_gap) { ScrollView::Color col; //of ellipse marking flipped gap switch (rule) { @@ -1778,7 +1716,7 @@ void mark_gap( //Debug stuff //y centre blob.bottom () + blob.height () / 2.0f); } - if (tosp_debug_level > 0) + if (tosp_debug_level > 5) tprintf (" (%d,%d) Sp<->Kn Rule %d %d %d %d %d\n", blob.left () - current_gap / 2, blob.bottom (), rule, prev_gap, prev_blob_width, current_gap, @@ -1786,8 +1724,7 @@ void mark_gap( //Debug stuff } #endif - -float find_mean_blob_spacing(WERD *word) { +float Textord::find_mean_blob_spacing(WERD *word) { PBLOB_IT blob_it; C_BLOB_IT cblob_it; TBOX blob_box; @@ -1832,11 +1769,11 @@ float find_mean_blob_spacing(WERD *word) { } -BOOL8 ignore_big_gap(TO_ROW *row, - inT32 row_length, - GAPMAP *gapmap, - inT16 left, - inT16 right) { +BOOL8 Textord::ignore_big_gap(TO_ROW *row, + inT32 row_length, + GAPMAP *gapmap, + inT16 left, + inT16 right) { inT16 gap = right - left + 1; if (tosp_ignore_big_gaps > 999) @@ -1871,11 +1808,10 @@ BOOL8 ignore_big_gap(TO_ROW *row, * Then move the iterator on to the start of the next blob. * DONT reduce the box for small things - eg punctuation. **********************************************************************/ - -TBOX reduced_box_next( //get bounding box - TO_ROW *row, //current row - BLOBNBOX_IT *it //iterator to blobds - ) { +TBOX Textord::reduced_box_next( + TO_ROW *row, // current row + BLOBNBOX_IT *it // iterator to blobds + ) { BLOBNBOX *blob; //current blob BLOBNBOX *head_blob; //place to store box TBOX full_box; //full blob boundg box @@ -1947,8 +1883,10 @@ TBOX reduced_box_next( //get bounding box * NOTE that we need to rotate all the coordinates as * find_blob_limits finds the y min and max within a specified x band *************************************************************************/ - -TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht) { +TBOX Textord::reduced_box_for_blob( + BLOBNBOX *blob, + TO_ROW *row, + inT16 *left_above_xht) { float baseline; float blob_x_centre; float left_limit; @@ -2038,4 +1976,4 @@ TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht) { return TBOX (ICOORD ((inT16) floor (left_limit), blob_box.bottom ()), ICOORD ((inT16) ceil (right_limit), blob_box.top ())); } - +} // namespace tesseract diff --git a/textord/tospace.h b/textord/tospace.h deleted file mode 100644 index 5719416776..0000000000 --- a/textord/tospace.h +++ /dev/null @@ -1,201 +0,0 @@ -/********************************************************************** - * to_spacing - * - * Compute fuzzy word spacing thresholds for each row. - **********************************************************************/ - -#ifndef TOSPACE_H -#define TOSPACE_H - -#include "blobbox.h" -#include "gap_map.h" -#include "statistc.h" -#include "notdll.h" - -extern BOOL_VAR_H(tosp_old_to_method, FALSE, "Space stats use prechopping?"); -extern BOOL_VAR_H(tosp_only_use_prop_rows, TRUE, -"Block stats to use fixed pitch rows?"); -extern BOOL_VAR_H(tosp_use_pre_chopping, FALSE, -"Space stats use prechopping?"); -extern BOOL_VAR_H(tosp_old_to_bug_fix, FALSE, -"Fix suspected bug in old code"); -extern BOOL_VAR_H(tosp_block_use_cert_spaces, TRUE, -"Only stat OBVIOUS spaces"); -extern BOOL_VAR_H(tosp_row_use_cert_spaces, TRUE, -"Only stat OBVIOUS spaces"); -extern BOOL_VAR_H(tosp_narrow_blobs_not_cert, TRUE, -"Only stat OBVIOUS spaces"); -extern BOOL_VAR_H(tosp_row_use_cert_spaces1, TRUE, -"Only stat OBVIOUS spaces"); -extern BOOL_VAR_H(tosp_recovery_isolated_row_stats, TRUE, -"Use row alone when inadequate cert spaces"); -extern BOOL_VAR_H(tosp_force_wordbreak_on_punct, FALSE, -"Force word breaks on punct to break long lines in non-space delimited langs"); -extern BOOL_VAR_H(tosp_only_small_gaps_for_kern, FALSE, "Better guess"); -extern BOOL_VAR_H(tosp_all_flips_fuzzy, FALSE, "Pass ANY flip to context?"); -extern BOOL_VAR_H(tosp_fuzzy_limit_all, TRUE, -"Dont restrict kn->sp fuzzy limit to tables"); -extern BOOL_VAR_H(tosp_stats_use_xht_gaps, TRUE, -"Use within xht gap for wd breaks"); -extern BOOL_VAR_H(tosp_use_xht_gaps, TRUE, -"Use within xht gap for wd breaks"); -extern BOOL_VAR_H(tosp_only_use_xht_gaps, FALSE, -"Only use within xht gap for wd breaks"); -extern BOOL_VAR_H(tosp_rule_9_test_punct, FALSE, -"Dont chng kn to space next to punct"); -extern BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, TRUE, "Default flip"); -extern BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, TRUE, "Default flip"); -extern BOOL_VAR_H(tosp_improve_thresh, FALSE, -"Enable improvement heuristic"); -extern INT_VAR_H(tosp_debug_level, 0, "Debug data"); -extern INT_VAR_H(tosp_enough_space_samples_for_median, 3, -"or should we use mean"); -extern INT_VAR_H(tosp_redo_kern_limit, 10, -"No.samples reqd to reestimate for row"); -extern INT_VAR_H(tosp_few_samples, 40, -"No.gaps reqd with 1 large gap to treat as a table"); -extern INT_VAR_H(tosp_short_row, 20, -"No.gaps reqd with few cert spaces to use certs"); -extern INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly"); -extern double_VAR_H(tosp_threshold_bias1, 0, -"how far between kern and space?"); -extern double_VAR_H(tosp_threshold_bias2, 0, -"how far between kern and space?"); -extern double_VAR_H(tosp_narrow_fraction, 0.3, -"Fract of xheight for narrow"); -extern double_VAR_H(tosp_narrow_aspect_ratio, 0.48, -"narrow if w/h less than this"); -extern double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide"); -extern double_VAR_H(tosp_wide_aspect_ratio, 0.0, -"wide if w/h less than this"); -extern double_VAR_H(tosp_fuzzy_space_factor, 0.6, -"Fract of xheight for fuzz sp"); -extern double_VAR_H(tosp_fuzzy_space_factor1, 0.5, -"Fract of xheight for fuzz sp"); -extern double_VAR_H(tosp_fuzzy_space_factor2, 0.72, -"Fract of xheight for fuzz sp"); -extern double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern"); -extern double_VAR_H(tosp_kern_gap_factor1, 2.0, -"gap ratio to flip kern->sp"); -extern double_VAR_H(tosp_kern_gap_factor2, 1.3, -"gap ratio to flip kern->sp"); -extern double_VAR_H(tosp_kern_gap_factor3, 2.5, -"gap ratio to flip kern->sp"); -extern double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier"); -extern double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier"); -extern double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space"); -extern double_VAR_H(tosp_enough_small_gaps, 0.65, -"Fract of kerns reqd for isolated row stats"); -extern double_VAR_H(tosp_table_kn_sp_ratio, 2.25, -"Min difference of kn & sp in table"); -extern double_VAR_H(tosp_table_xht_sp_ratio, 0.33, -"Expect spaces bigger than this"); -extern double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0, -"Fuzzy if less than this"); -extern double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg"); -extern double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg"); -extern double_VAR_H(tosp_min_sane_kn_sp, 1.5, -"Dont trust spaces less than this time kn"); -extern double_VAR_H(tosp_init_guess_kn_mult, 2.2, -"Thresh guess - mult kn by this"); -extern double_VAR_H(tosp_init_guess_xht_mult, 0.28, -"Thresh guess - mult xht by this"); -extern double_VAR_H(tosp_max_sane_kn_thresh, 5.0, -"Multiplier on kn to limit thresh"); -extern double_VAR_H(tosp_flip_caution, 0.0, -"Dont autoflip kn to sp when large separation"); -extern double_VAR_H(tosp_large_kerning, 0.19, -"Limit use of xht gap with large kns"); -extern double_VAR_H(tosp_dont_fool_with_small_kerns, -1, -"Limit use of xht gap with odd small kns"); -extern double_VAR_H(tosp_near_lh_edge, 0, -"Dont reduce box if the top left is non blank"); -extern double_VAR_H(tosp_silly_kn_sp_gap, 0.2, -"Dont let sp minus kn get too small"); -extern double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75, -"How wide fuzzies need context"); - -void to_spacing( //set spacing - ICOORD page_tr, //topright of page - TO_BLOCK_LIST *blocks //blocks on page - ); - //DEBUG USE ONLY -void block_spacing_stats(TO_BLOCK *block, - GAPMAP *gapmap, - BOOL8 &old_text_ord_proportional, - inT16 &block_space_gap_width, //resulting estimate - inT16 &block_non_space_gap_width //resulting estimate - ); - //estimate for block -void row_spacing_stats(TO_ROW *row, - GAPMAP *gapmap, - inT16 block_idx, - inT16 row_idx, - inT16 block_space_gap_width, - inT16 block_non_space_gap_width //estimate for block - ); - //estimate for block -void old_to_method(TO_ROW *row, - STATS *all_gap_stats, - STATS *space_gap_stats, - STATS *small_gap_stats, - inT16 block_space_gap_width, - inT16 block_non_space_gap_width //estimate for block - ); -BOOL8 isolated_row_stats(TO_ROW *row, - GAPMAP *gapmap, - STATS *all_gap_stats, - BOOL8 suspected_table, - inT16 block_idx, - inT16 row_idx); -inT16 stats_count_under(STATS *stats, inT16 threshold); -void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats); -ROW *make_prop_words( // find lines - TO_ROW *row, // row to make - FCOORD rotation // for drawing - ); -ROW *make_blob_words( // find lines - TO_ROW *row, // row to make - FCOORD rotation // for drawing - ); -BOOL8 make_a_word_break( // decide on word break - TO_ROW *row, // row being made - TBOX blob_box, // for next_blob // how many blanks? - inT16 prev_gap, - TBOX prev_blob_box, - inT16 real_current_gap, - inT16 within_xht_current_gap, - TBOX next_blob_box, - inT16 next_gap, - uinT8 &blanks, - BOOL8 &fuzzy_sp, - BOOL8 &fuzzy_non); -BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box); -BOOL8 wide_blob(TO_ROW *row, TBOX blob_box); -BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box); - //A COPY FOR PEEKING -void peek_at_next_gap(TO_ROW *row, - BLOBNBOX_IT box_it, - TBOX &next_blob_box, - inT16 &next_gap, - inT16 &next_within_xht_gap); -void mark_gap( //Debug stuff - TBOX blob, //blob following gap - inT16 rule, // heuristic id - inT16 prev_gap, - inT16 prev_blob_width, - inT16 current_gap, - inT16 next_blob_width, - inT16 next_gap); -float find_mean_blob_spacing(WERD *word); -BOOL8 ignore_big_gap(TO_ROW *row, - inT32 row_length, - GAPMAP *gapmap, - inT16 left, - inT16 right); -TBOX reduced_box_next( //get bounding box - TO_ROW *row, //current row - BLOBNBOX_IT *it //iterator to blobds - ); -TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht); -#endif diff --git a/textord/tovars.cpp b/textord/tovars.cpp index 15b6bad0f8..6e0c929105 100644 --- a/textord/tovars.cpp +++ b/textord/tovars.cpp @@ -18,7 +18,8 @@ **********************************************************************/ #include "mfcpch.h" -#include "tovars.h" +#include "tovars.h" +#include "params.h" #define EXTERN @@ -31,16 +32,10 @@ EXTERN BOOL_VAR (textord_blocksall_prop, FALSE, "Moan about fixed pitch blocks"); EXTERN BOOL_VAR (textord_blocksall_testing, FALSE, "Dump stats when moaning"); EXTERN BOOL_VAR (textord_test_mode, FALSE, "Do current test"); -EXTERN BOOL_VAR (textord_repeat_extraction, TRUE, "Extract repeated chars"); EXTERN INT_VAR (textord_dotmatrix_gap, 3, "Max pixel gap for broken pixed pitch"); -EXTERN INT_VAR (textord_repeat_threshold, 4, -"Min multiple for repeated char"); EXTERN INT_VAR (textord_debug_block, 0, "Block to do debug on"); EXTERN INT_VAR (textord_pitch_range, 2, "Max range test on pitch"); -EXTERN double_VAR (textord_repeat_rating, 6, "Min rating for equal blobs"); -EXTERN double_VAR(textord_repch_width_variance, 0.2, -"Max width change of gap/blob for repeated chars"); EXTERN double_VAR (textord_wordstats_smooth_factor, 0.05, "Smoothing gap stats"); EXTERN double_VAR (textord_width_smooth_factor, 0.10, diff --git a/textord/tovars.h b/textord/tovars.h index 62406dbcec..e419ea4d8d 100644 --- a/textord/tovars.h +++ b/textord/tovars.h @@ -20,7 +20,7 @@ #ifndef TOVARS_H #define TOVARS_H -#include "varable.h" +#include "params.h" #include "notdll.h" extern BOOL_VAR_H (textord_show_initial_words, FALSE, @@ -34,16 +34,10 @@ extern BOOL_VAR_H (textord_blocksall_prop, FALSE, extern BOOL_VAR_H (textord_blocksall_testing, FALSE, "Dump stats when moaning"); extern BOOL_VAR_H (textord_test_mode, FALSE, "Do current test"); -extern BOOL_VAR_H (textord_repeat_extraction, TRUE, "Extract repeated chars"); extern INT_VAR_H (textord_dotmatrix_gap, 3, "Max pixel gap for broken pixed pitch"); -extern INT_VAR_H (textord_repeat_threshold, 4, -"Min multiple for repeated char"); extern INT_VAR_H (textord_debug_block, 0, "Block to do debug on"); extern INT_VAR_H (textord_pitch_range, 2, "Max range test on pitch"); -extern double_VAR_H (textord_repeat_rating, 6, "Min rating for equal blobs"); -extern double_VAR_H(textord_repch_width_variance, 0.2, -"Max width change of gap/blob for repeated chars"); extern double_VAR_H (textord_wordstats_smooth_factor, 0.05, "Smoothing gap stats"); extern double_VAR_H (textord_width_smooth_factor, 0.10, diff --git a/textord/wordseg.cpp b/textord/wordseg.cpp index 633d341533..2a411a5fb1 100644 --- a/textord/wordseg.cpp +++ b/textord/wordseg.cpp @@ -23,16 +23,13 @@ #endif #include "stderr.h" #include "blobbox.h" -#include "ocrclass.h" -#include "lmedsq.h" #include "statistc.h" #include "drawtord.h" #include "makerow.h" #include "pitsync1.h" -#include "blobcmpl.h" #include "tovars.h" #include "topitch.h" -#include "tospace.h" +#include "textord.h" #include "fpchop.h" #include "wordseg.h" @@ -43,12 +40,11 @@ #define EXTERN -EXTERN BOOL_VAR (textord_fp_chopping, TRUE, "Do fixed pitch chopping"); -EXTERN BOOL_VAR (textord_force_make_prop_words, FALSE, - "Force proportional word segmentation on all rows"); -EXTERN BOOL_VAR (textord_chopper_test, FALSE, - "Chopper is being tested."); -extern /*"C" */ ETEXT_DESC *global_monitor; //progress monitor +EXTERN BOOL_VAR(textord_fp_chopping, TRUE, "Do fixed pitch chopping"); +EXTERN BOOL_VAR(textord_force_make_prop_words, FALSE, + "Force proportional word segmentation on all rows"); +EXTERN BOOL_VAR(textord_chopper_test, FALSE, + "Chopper is being tested."); #define FIXED_WIDTH_MULTIPLE 5 #define BLOCK_STATS_CLUSTERS 10 @@ -57,75 +53,70 @@ extern /*"C" */ ETEXT_DESC *global_monitor; //progress monitor /** * @name make_single_word * - * Arrange the blobs into one word. There is no fixed pitch detection. + * For each row, arrange the blobs into one word. There is no fixed + * pitch detection. */ void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) { TO_ROW_IT to_row_it(rows); - TO_ROW* row = to_row_it.data(); - // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready - // to create the word. - C_BLOB_LIST cblobs; - C_BLOB_IT cblob_it(&cblobs); - BLOBNBOX_IT box_it(row->blob_list()); - for (;!box_it.empty(); box_it.forward()) { - BLOBNBOX* bblob= box_it.extract(); - if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) { - if (bblob->cblob() != NULL) { - C_OUTLINE_IT cout_it(cblob_it.data()->out_list()); - cout_it.move_to_last(); - cout_it.add_list_after(bblob->cblob()->out_list()); - delete bblob->cblob(); + ROW_IT row_it(real_rows); + for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list(); + to_row_it.forward()) { + TO_ROW* row = to_row_it.data(); + // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready + // to create the word. + C_BLOB_LIST cblobs; + C_BLOB_IT cblob_it(&cblobs); + BLOBNBOX_IT box_it(row->blob_list()); + for (;!box_it.empty(); box_it.forward()) { + BLOBNBOX* bblob= box_it.extract(); + if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) { + if (bblob->cblob() != NULL) { + C_OUTLINE_IT cout_it(cblob_it.data()->out_list()); + cout_it.move_to_last(); + cout_it.add_list_after(bblob->cblob()->out_list()); + delete bblob->cblob(); + } + } else { + if (bblob->cblob() != NULL) + cblob_it.add_after_then_move(bblob->cblob()); } - } else { - if (bblob->cblob() != NULL) - cblob_it.add_after_then_move(bblob->cblob()); delete bblob; } + // Convert the TO_ROW to a ROW. + ROW* real_row = new ROW(row, static_cast(row->kern_size), + static_cast(row->space_size)); + WERD_IT word_it(real_row->word_list()); + WERD* word = new WERD(&cblobs, 0, NULL); + word->set_flag(W_BOL, TRUE); + word->set_flag(W_EOL, TRUE); + word->set_flag(W_DONT_CHOP, one_blob); + word_it.add_after_then_move(word); + row_it.add_after_then_move(real_row); } - // Convert the TO_ROW to a ROW. - ROW* real_row = new ROW(row, static_cast(row->kern_size), - static_cast(row->space_size)); - WERD_IT word_it(real_row->word_list()); - WERD* word = new WERD(&cblobs, 0, NULL); - word->set_flag(W_BOL, TRUE); - word->set_flag(W_EOL, TRUE); - word_it.add_after_then_move(word); - ROW_IT row_it(real_rows); - row_it.add_after_then_move(real_row); } /** - * @name make_words + * make_words * * Arrange the blobs into words. */ -void make_words( //make words - ICOORD page_tr, //top right - float gradient, //page skew - BLOCK_LIST *blocks, //block list - TO_BLOCK_LIST *land_blocks, //rotated for landscape - TO_BLOCK_LIST *port_blocks, //output list - tesseract::Tesseract* tess - ) { - TO_BLOCK_IT block_it; //iterator - TO_BLOCK *block; //current block; - - compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD (0.0f, -1.0f), - !(BOOL8) textord_test_landscape, tess); - if (global_monitor != NULL) { - global_monitor->ocr_alive = TRUE; - global_monitor->progress = 25; - } - to_spacing(page_tr, port_blocks); - block_it.set_to_list (port_blocks); - for (block_it.mark_cycle_pt (); !block_it.cycled_list (); - block_it.forward ()) { - block = block_it.data (); - // set_row_spaces(block,FCOORD(1,0),!(BOOL8)textord_test_landscape); - //make proper classes - make_real_words (block, FCOORD (1.0f, 0.0f)); +void make_words(tesseract::Textord *textord, + ICOORD page_tr, // top right + float gradient, // page skew + BLOCK_LIST *blocks, // block list + TO_BLOCK_LIST *port_blocks) { // output list + TO_BLOCK_IT block_it; // iterator + TO_BLOCK *block; // current block + + compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f), + !(BOOL8) textord_test_landscape); + textord->to_spacing(page_tr, port_blocks); + block_it.set_to_list(port_blocks); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + block = block_it.data(); + make_real_words(textord, block, FCOORD(1.0f, 0.0f)); } } @@ -518,7 +509,8 @@ inT32 row_words2( //compute space size * Convert a TO_BLOCK to a BLOCK. */ -void make_real_words( //find lines +void make_real_words( + tesseract::Textord *textord, TO_BLOCK *block, //block to do FCOORD rotation //for drawing ) { @@ -541,11 +533,11 @@ void make_real_words( //find lines // leave the entire line as one word. We can force consistent chopping // with force_make_prop_words flag. if (textord_chopper_test) { - real_row = make_blob_words (row, rotation); + real_row = textord->make_blob_words (row, rotation); } else if (textord_force_make_prop_words || row->pitch_decision == PITCH_DEF_PROP || row->pitch_decision == PITCH_CORR_PROP) { - real_row = make_prop_words (row, rotation); + real_row = textord->make_prop_words (row, rotation); } else if (row->pitch_decision == PITCH_DEF_FIXED || row->pitch_decision == PITCH_CORR_FIXED) { real_row = fixed_pitch_words (row, rotation); @@ -610,71 +602,59 @@ ROW *make_rep_words( //make a row * list of BLOBNBOXs. */ -WERD *make_real_word( //make a WERD - BLOBNBOX_IT *box_it, //iterator +WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator inT32 blobcount, //no of blobs to use BOOL8 bol, //start of line - BOOL8 fuzzy_sp, //fuzzy space - BOOL8 fuzzy_non, //fuzzy non-space uinT8 blanks //no of blanks ) { - OUTLINE_IT out_it; //outlines + OUTLINE_IT out_it; // outlines C_OUTLINE_IT cout_it; - PBLOB_LIST blobs; //blobs in word + PBLOB_LIST blobs; // blobs in word C_BLOB_LIST cblobs; - PBLOB_IT blob_it = &blobs; //iterator + PBLOB_IT blob_it = &blobs; // iterator C_BLOB_IT cblob_it = &cblobs; - WERD *word; //new word - BLOBNBOX *bblob; //current blob - inT32 blobindex; //in row + WERD *word; // new word + BLOBNBOX *bblob; // current blob + inT32 blobindex; // in row for (blobindex = 0; blobindex < blobcount; blobindex++) { - bblob = box_it->extract (); - if (bblob->joined_to_prev ()) { - if (bblob->blob () != NULL) { - out_it.set_to_list (blob_it.data ()->out_list ()); - out_it.move_to_last (); - out_it.add_list_after (bblob->blob ()->out_list ()); - delete bblob->blob (); + bblob = box_it->extract(); + if (bblob->joined_to_prev()) { + if (bblob->blob() != NULL) { + out_it.set_to_list(blob_it.data()->out_list()); + out_it.move_to_last(); + out_it.add_list_after(bblob->blob()->out_list()); + delete bblob->blob(); } - else if (bblob->cblob () != NULL) { - cout_it.set_to_list (cblob_it.data ()->out_list ()); - cout_it.move_to_last (); - cout_it.add_list_after (bblob->cblob ()->out_list ()); - delete bblob->cblob (); + else if (bblob->cblob() != NULL) { + cout_it.set_to_list(cblob_it.data()->out_list()); + cout_it.move_to_last(); + cout_it.add_list_after(bblob->cblob()->out_list()); + delete bblob->cblob(); } } else { - if (bblob->blob () != NULL) - blob_it.add_after_then_move (bblob->blob ()); - else if (bblob->cblob () != NULL) - cblob_it.add_after_then_move (bblob->cblob ()); + if (bblob->blob() != NULL) + blob_it.add_after_then_move(bblob->blob()); + else if (bblob->cblob() != NULL) + cblob_it.add_after_then_move(bblob->cblob()); } delete bblob; - box_it->forward (); //next one + box_it->forward(); // next one } if (blanks < 1) blanks = 1; - if (!blob_it.empty ()) { - //make real word - word = new WERD (&blobs, blanks, NULL); - } - else { - word = new WERD (&cblobs, blanks, NULL); - } - if (bol) { - word->set_flag (W_BOL, TRUE); - } - if (fuzzy_sp) - //probably space - word->set_flag (W_FUZZY_SP, TRUE); - else if (fuzzy_non) - //probably not - word->set_flag (W_FUZZY_NON, TRUE); - if (box_it->at_first ()) { - word->set_flag (W_EOL, TRUE);//at end of line - } + + if (blob_it.empty()) + word = new WERD(&cblobs, blanks, NULL); + else + word = new WERD(&blobs, blanks, NULL); + + if (bol) + word->set_flag(W_BOL, TRUE); + if (box_it->at_first()) + word->set_flag(W_EOL, TRUE); // at end of line + return word; } - diff --git a/textord/wordseg.h b/textord/wordseg.h index 42217a2329..eae4e640dd 100644 --- a/textord/wordseg.h +++ b/textord/wordseg.h @@ -20,9 +20,10 @@ #ifndef WORDSEG_H #define WORDSEG_H -#include "varable.h" +#include "params.h" #include "blobbox.h" #include "notdll.h" +#include "textord.h" namespace tesseract { class Tesseract; @@ -35,14 +36,11 @@ extern BOOL_VAR_H (textord_chopper_test, FALSE, "Chopper is being tested."); void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows); -void make_words( //make words - ICOORD page_tr, //top right - float gradient, //page skew - BLOCK_LIST *blocks, //block list - TO_BLOCK_LIST *land_blocks, //rotated for landscape - TO_BLOCK_LIST *port_blocks, //output list - tesseract::Tesseract* - ); +void make_words(tesseract::Textord *textord, + ICOORD page_tr, // top right + float gradient, // page skew + BLOCK_LIST *blocks, // block list + TO_BLOCK_LIST *port_blocks); // output list void set_row_spaces( //find space sizes TO_BLOCK *block, //block to do FCOORD rotation, //for drawing @@ -62,7 +60,8 @@ inT32 row_words2( //compute space size FCOORD rotation, //for drawing BOOL8 testing_on //for debug ); -void make_real_words( //find lines +void make_real_words( + tesseract::Textord *textord, TO_BLOCK *block, //block to do FCOORD rotation //for drawing ); @@ -74,9 +73,6 @@ WERD *make_real_word( //make a WERD BLOBNBOX_IT *box_it, //iterator inT32 blobcount, //no of blobs to use BOOL8 bol, //start of line - BOOL8 fuzzy_sp, //fuzzy space - BOOL8 fuzzy_non, //fuzzy non-space uinT8 blanks //no of blanks ); #endif - diff --git a/training/Makefile.am b/training/Makefile.am index 489983d802..d9e050b825 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -7,18 +7,19 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \ -I$(top_srcdir)/textord -EXTRA_DIST = unicharset_extractor.vcproj wordlist2dawg.vcproj \ - cntraining.vcproj mftraining.vcproj - include_HEADERS = \ - commontraining.h mergenf.h name2char.h + commontraining.h mergenf.h tessopt.h -lib_LTLIBRARIES = libtesseract_training.la +lib_LTLIBRARIES = libtesseract_training.la libtesseract_tessopt.la libtesseract_training_la_SOURCES = \ - name2char.cpp commontraining.cpp + commontraining.cpp +libtesseract_tessopt_la_SOURCES = \ + tessopt.cpp libtesseract_training_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) +libtesseract_tessopt_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) + bin_PROGRAMS = cntraining combine_tessdata mftraining unicharset_extractor wordlist2dawg combine_tessdata_SOURCES = combine_tessdata.cpp combine_tessdata_LDADD = \ @@ -27,6 +28,7 @@ combine_tessdata_LDADD = \ cntraining_SOURCES = cntraining.cpp cntraining_LDADD = \ libtesseract_training.la \ + libtesseract_tessopt.la \ ../textord/libtesseract_textord.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ @@ -35,12 +37,15 @@ cntraining_LDADD = \ ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ ../ccmain/libtesseract_main.la \ + ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../wordrec/libtesseract_wordrec.la \ ../ccutil/libtesseract_ccutil.la mftraining_SOURCES = mftraining.cpp mergenf.cpp mftraining_LDADD = \ libtesseract_training.la \ + libtesseract_tessopt.la \ ../textord/libtesseract_textord.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ @@ -49,15 +54,19 @@ mftraining_LDADD = \ ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ ../ccmain/libtesseract_main.la \ + ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../wordrec/libtesseract_wordrec.la \ ../ccutil/libtesseract_ccutil.la unicharset_extractor_SOURCES = unicharset_extractor.cpp unicharset_extractor_LDADD = \ - ../ccutil/libtesseract_ccutil.la + libtesseract_tessopt.la \ + ../ccutil/libtesseract_ccutil.la wordlist2dawg_SOURCES = wordlist2dawg.cpp wordlist2dawg_LDADD = \ + libtesseract_tessopt.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ ../ccstruct/libtesseract_ccstruct.la \ @@ -65,6 +74,8 @@ wordlist2dawg_LDADD = \ ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ ../ccmain/libtesseract_main.la \ + ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../wordrec/libtesseract_wordrec.la \ ../textord/libtesseract_textord.la \ ../ccutil/libtesseract_ccutil.la diff --git a/training/Makefile.in b/training/Makefile.in index c48db2a619..aa2dc10739 100644 --- a/training/Makefile.in +++ b/training/Makefile.in @@ -75,8 +75,16 @@ am__base_list = \ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \ "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) +libtesseract_tessopt_la_LIBADD = +am_libtesseract_tessopt_la_OBJECTS = tessopt.lo +libtesseract_tessopt_la_OBJECTS = \ + $(am_libtesseract_tessopt_la_OBJECTS) +libtesseract_tessopt_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(libtesseract_tessopt_la_LDFLAGS) $(LDFLAGS) -o \ + $@ libtesseract_training_la_LIBADD = -am_libtesseract_training_la_OBJECTS = name2char.lo commontraining.lo +am_libtesseract_training_la_OBJECTS = commontraining.lo libtesseract_training_la_OBJECTS = \ $(am_libtesseract_training_la_OBJECTS) libtesseract_training_la_LINK = $(LIBTOOL) --tag=CXX \ @@ -87,13 +95,14 @@ PROGRAMS = $(bin_PROGRAMS) am_cntraining_OBJECTS = cntraining.$(OBJEXT) cntraining_OBJECTS = $(am_cntraining_OBJECTS) cntraining_DEPENDENCIES = libtesseract_training.la \ - ../textord/libtesseract_textord.la \ + libtesseract_tessopt.la ../textord/libtesseract_textord.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ ../ccstruct/libtesseract_ccstruct.la \ ../image/libtesseract_image.la ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ - ../ccmain/libtesseract_main.la \ + ../ccmain/libtesseract_main.la ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../wordrec/libtesseract_wordrec.la \ ../ccutil/libtesseract_ccutil.la am_combine_tessdata_OBJECTS = combine_tessdata.$(OBJEXT) @@ -102,26 +111,30 @@ combine_tessdata_DEPENDENCIES = ../ccutil/libtesseract_ccutil.la am_mftraining_OBJECTS = mftraining.$(OBJEXT) mergenf.$(OBJEXT) mftraining_OBJECTS = $(am_mftraining_OBJECTS) mftraining_DEPENDENCIES = libtesseract_training.la \ - ../textord/libtesseract_textord.la \ + libtesseract_tessopt.la ../textord/libtesseract_textord.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ ../ccstruct/libtesseract_ccstruct.la \ ../image/libtesseract_image.la ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ - ../ccmain/libtesseract_main.la \ + ../ccmain/libtesseract_main.la ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../wordrec/libtesseract_wordrec.la \ ../ccutil/libtesseract_ccutil.la am_unicharset_extractor_OBJECTS = unicharset_extractor.$(OBJEXT) unicharset_extractor_OBJECTS = $(am_unicharset_extractor_OBJECTS) -unicharset_extractor_DEPENDENCIES = ../ccutil/libtesseract_ccutil.la +unicharset_extractor_DEPENDENCIES = libtesseract_tessopt.la \ + ../ccutil/libtesseract_ccutil.la am_wordlist2dawg_OBJECTS = wordlist2dawg.$(OBJEXT) wordlist2dawg_OBJECTS = $(am_wordlist2dawg_OBJECTS) -wordlist2dawg_DEPENDENCIES = ../classify/libtesseract_classify.la \ +wordlist2dawg_DEPENDENCIES = libtesseract_tessopt.la \ + ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ ../ccstruct/libtesseract_ccstruct.la \ ../image/libtesseract_image.la ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ - ../ccmain/libtesseract_main.la \ + ../ccmain/libtesseract_main.la ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../wordrec/libtesseract_wordrec.la \ ../textord/libtesseract_textord.la \ ../ccutil/libtesseract_ccutil.la @@ -138,13 +151,14 @@ CXXLD = $(CXX) CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ -SOURCES = $(libtesseract_training_la_SOURCES) $(cntraining_SOURCES) \ +SOURCES = $(libtesseract_tessopt_la_SOURCES) \ + $(libtesseract_training_la_SOURCES) $(cntraining_SOURCES) \ + $(combine_tessdata_SOURCES) $(mftraining_SOURCES) \ + $(unicharset_extractor_SOURCES) $(wordlist2dawg_SOURCES) +DIST_SOURCES = $(libtesseract_tessopt_la_SOURCES) \ + $(libtesseract_training_la_SOURCES) $(cntraining_SOURCES) \ $(combine_tessdata_SOURCES) $(mftraining_SOURCES) \ $(unicharset_extractor_SOURCES) $(wordlist2dawg_SOURCES) -DIST_SOURCES = $(libtesseract_training_la_SOURCES) \ - $(cntraining_SOURCES) $(combine_tessdata_SOURCES) \ - $(mftraining_SOURCES) $(unicharset_extractor_SOURCES) \ - $(wordlist2dawg_SOURCES) RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ html-recursive info-recursive install-data-recursive \ install-dvi-recursive install-exec-recursive \ @@ -298,7 +312,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -323,17 +336,18 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \ -I$(top_srcdir)/textord -EXTRA_DIST = unicharset_extractor.vcproj wordlist2dawg.vcproj \ - cntraining.vcproj mftraining.vcproj - include_HEADERS = \ - commontraining.h mergenf.h name2char.h + commontraining.h mergenf.h tessopt.h -lib_LTLIBRARIES = libtesseract_training.la +lib_LTLIBRARIES = libtesseract_training.la libtesseract_tessopt.la libtesseract_training_la_SOURCES = \ - name2char.cpp commontraining.cpp + commontraining.cpp + +libtesseract_tessopt_la_SOURCES = \ + tessopt.cpp libtesseract_training_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) +libtesseract_tessopt_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) combine_tessdata_SOURCES = combine_tessdata.cpp combine_tessdata_LDADD = \ ../ccutil/libtesseract_ccutil.la @@ -341,6 +355,7 @@ combine_tessdata_LDADD = \ cntraining_SOURCES = cntraining.cpp cntraining_LDADD = \ libtesseract_training.la \ + libtesseract_tessopt.la \ ../textord/libtesseract_textord.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ @@ -349,12 +364,15 @@ cntraining_LDADD = \ ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ ../ccmain/libtesseract_main.la \ + ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../wordrec/libtesseract_wordrec.la \ ../ccutil/libtesseract_ccutil.la mftraining_SOURCES = mftraining.cpp mergenf.cpp mftraining_LDADD = \ libtesseract_training.la \ + libtesseract_tessopt.la \ ../textord/libtesseract_textord.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ @@ -363,15 +381,19 @@ mftraining_LDADD = \ ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ ../ccmain/libtesseract_main.la \ + ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../wordrec/libtesseract_wordrec.la \ ../ccutil/libtesseract_ccutil.la unicharset_extractor_SOURCES = unicharset_extractor.cpp unicharset_extractor_LDADD = \ - ../ccutil/libtesseract_ccutil.la + libtesseract_tessopt.la \ + ../ccutil/libtesseract_ccutil.la wordlist2dawg_SOURCES = wordlist2dawg.cpp wordlist2dawg_LDADD = \ + libtesseract_tessopt.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ ../ccstruct/libtesseract_ccstruct.la \ @@ -379,6 +401,8 @@ wordlist2dawg_LDADD = \ ../cutil/libtesseract_cutil.la \ ../viewer/libtesseract_viewer.la \ ../ccmain/libtesseract_main.la \ + ../cube/libtesseract_cube.la \ + ../neural_networks/runtime/libtesseract_neural.la \ ../wordrec/libtesseract_wordrec.la \ ../textord/libtesseract_textord.la \ ../ccutil/libtesseract_ccutil.la @@ -448,6 +472,8 @@ clean-libLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done +libtesseract_tessopt.la: $(libtesseract_tessopt_la_OBJECTS) $(libtesseract_tessopt_la_DEPENDENCIES) + $(libtesseract_tessopt_la_LINK) -rpath $(libdir) $(libtesseract_tessopt_la_OBJECTS) $(libtesseract_tessopt_la_LIBADD) $(LIBS) libtesseract_training.la: $(libtesseract_training_la_OBJECTS) $(libtesseract_training_la_DEPENDENCIES) $(libtesseract_training_la_LINK) -rpath $(libdir) $(libtesseract_training_la_OBJECTS) $(libtesseract_training_la_LIBADD) $(LIBS) install-binPROGRAMS: $(bin_PROGRAMS) @@ -520,7 +546,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/commontraining.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mergenf.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mftraining.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/name2char.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tessopt.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unicharset_extractor.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wordlist2dawg.Po@am__quote@ diff --git a/training/cntraining.cpp b/training/cntraining.cpp index d59a320b54..0bffbaf793 100644 --- a/training/cntraining.cpp +++ b/training/cntraining.cpp @@ -1,14 +1,14 @@ /****************************************************************************** -** Filename: cntraining.cpp -** Purpose: Generates a normproto and pffmtable. -** Author: Dan Johnson -** Revisment: Christy Russon -** History: Fri Aug 18 08:53:50 1989, DSJ, Created. -** 5/25/90, DSJ, Adapted to multiple feature types. -** Tuesday, May 17, 1998 Changes made to make feature specific and -** simplify structures. First step in simplifying training process. +** Filename: cntraining.cpp +** Purpose: Generates a normproto and pffmtable. +** Author: Dan Johnson +** Revisment: Christy Russon +** History: Fri Aug 18 08:53:50 1989, DSJ, Created. +** 5/25/90, DSJ, Adapted to multiple feature types. +** Tuesday, May 17, 1998 Changes made to make feature specific and +** simplify structures. First step in simplifying training process. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -22,7 +22,7 @@ /**---------------------------------------------------------------------------- - Include Files and Type Defines + Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "oldlist.h" #include "efio.h" @@ -30,10 +30,8 @@ #include "featdefs.h" #include "tessopt.h" #include "ocrfeatures.h" -#include "general.h" #include "clusttool.h" #include "cluster.h" -#include "name2char.h" #include #include #include @@ -41,294 +39,244 @@ #include "commontraining.h" #define PROGRAM_FEATURE_TYPE "cn" -#define MINSD (1.0f / 64.0f) - -int row_number; /* cjn: fixes link problem */ /**---------------------------------------------------------------------------- - Public Function Prototypes + Public Function Prototypes ----------------------------------------------------------------------------**/ int main ( - int argc, - char **argv); + int argc, + char **argv); /**---------------------------------------------------------------------------- - Private Function Prototypes + Private Function Prototypes ----------------------------------------------------------------------------**/ -void ReadTrainingSamples ( - FILE *File, - LIST* TrainingSamples); void WriteNormProtos ( - char *Directory, - LIST LabeledProtoList, - CLUSTERER *Clusterer); + char *Directory, + LIST LabeledProtoList, + CLUSTERER *Clusterer); /* PARAMDESC *ConvertToPARAMDESC( - PARAM_DESC* Param_Desc, - int N); + PARAM_DESC* Param_Desc, + int N); */ void WriteProtos( - FILE *File, - uinT16 N, - LIST ProtoList, - BOOL8 WriteSigProtos, - BOOL8 WriteInsigProtos); + FILE *File, + uinT16 N, + LIST ProtoList, + BOOL8 WriteSigProtos, + BOOL8 WriteInsigProtos); /**---------------------------------------------------------------------------- - Global Data Definitions and Declarations + Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ /* global variable to hold configuration parameters to control clustering */ //-M 0.025 -B 0.05 -I 0.8 -C 1e-3 -CLUSTERCONFIG Config = +CLUSTERCONFIG Config = { elliptical, 0.025, 0.05, 0.8, 1e-3, 0 }; /**---------------------------------------------------------------------------- - Public Code + Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ int main ( - int argc, - char **argv) + int argc, + char **argv) /* -** Parameters: -** argc number of command line arguments -** argv array of command line arguments -** Globals: none -** Operation: -** This program reads in a text file consisting of feature -** samples from a training page in the following format: +** Parameters: +** argc number of command line arguments +** argv array of command line arguments +** Globals: none +** Operation: +** This program reads in a text file consisting of feature +** samples from a training page in the following format: ** -** FontName CharName NumberOfFeatureTypes(N) -** FeatureTypeName1 NumberOfFeatures(M) -** Feature1 -** ... -** FeatureM -** FeatureTypeName2 NumberOfFeatures(M) -** Feature1 -** ... -** FeatureM -** ... -** FeatureTypeNameN NumberOfFeatures(M) -** Feature1 -** ... -** FeatureM -** FontName CharName ... +** FontName CharName NumberOfFeatureTypes(N) +** FeatureTypeName1 NumberOfFeatures(M) +** Feature1 +** ... +** FeatureM +** FeatureTypeName2 NumberOfFeatures(M) +** Feature1 +** ... +** FeatureM +** ... +** FeatureTypeNameN NumberOfFeatures(M) +** Feature1 +** ... +** FeatureM +** FontName CharName ... ** -** It then appends these samples into a separate file for each -** character. The name of the file is +** It then appends these samples into a separate file for each +** character. The name of the file is ** -** DirectoryName/FontName/CharName.FeatureTypeName +** DirectoryName/FontName/CharName.FeatureTypeName ** -** The DirectoryName can be specified via a command -** line argument. If not specified, it defaults to the -** current directory. The format of the resulting files is: +** The DirectoryName can be specified via a command +** line argument. If not specified, it defaults to the +** current directory. The format of the resulting files is: ** -** NumberOfFeatures(M) -** Feature1 -** ... -** FeatureM -** NumberOfFeatures(M) -** ... +** NumberOfFeatures(M) +** Feature1 +** ... +** FeatureM +** NumberOfFeatures(M) +** ... ** -** The output files each have a header which describes the -** type of feature which the file contains. This header is -** in the format required by the clusterer. A command line -** argument can also be used to specify that only the first -** N samples of each class should be used. -** Return: none -** Exceptions: none -** History: Fri Aug 18 08:56:17 1989, DSJ, Created. +** The output files each have a header which describes the +** type of feature which the file contains. This header is +** in the format required by the clusterer. A command line +** argument can also be used to specify that only the first +** N samples of each class should be used. +** Return: none +** Exceptions: none +** History: Fri Aug 18 08:56:17 1989, DSJ, Created. */ { - char *PageName; - FILE *TrainingPage; - LIST CharList = NIL; - CLUSTERER *Clusterer = NULL; - LIST ProtoList = NIL; - LIST NormProtoList = NIL; - LIST pCharList; - LABELEDLIST CharSample; + char *PageName; + FILE *TrainingPage; + LIST CharList = NIL_LIST; + CLUSTERER *Clusterer = NULL; + LIST ProtoList = NIL_LIST; + LIST NormProtoList = NIL_LIST; + LIST pCharList; + LABELEDLIST CharSample; + FEATURE_DEFS_STRUCT FeatureDefs; + InitFeatureDefs(&FeatureDefs); - ParseArguments (argc, argv); - while ((PageName = GetNextFilename(argc, argv)) != NULL) - { - printf ("Reading %s ...\n", PageName); - TrainingPage = Efopen (PageName, "r"); - ReadTrainingSamples (TrainingPage, &CharList); - fclose (TrainingPage); - //WriteTrainingSamples (Directory, CharList); - } - printf("Clustering ...\n"); - pCharList = CharList; - iterate(pCharList) - { - //Cluster - CharSample = (LABELEDLIST) first_node (pCharList); - //printf ("\nClustering %s ...", CharSample->Label); - Clusterer = SetUpForClustering(CharSample, PROGRAM_FEATURE_TYPE); - float SavedMinSamples = Config.MinSamples; - Config.MagicSamples = CharSample->SampleCount; - while (Config.MinSamples > 0.001) { - ProtoList = ClusterSamples(Clusterer, &Config); - if (NumberOfProtos(ProtoList, 1, 0) > 0) - break; - else { - Config.MinSamples *= 0.95; - printf("0 significant protos for %s." - " Retrying clustering with MinSamples = %f%%\n", - CharSample->Label, Config.MinSamples); - } - } - Config.MinSamples = SavedMinSamples; - AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); - } - FreeTrainingSamples (CharList); - if (Clusterer == NULL) // To avoid a SIGSEGV - return 1; - WriteNormProtos (Directory, NormProtoList, Clusterer); - FreeClusterer(Clusterer); - FreeProtoList(&ProtoList); - FreeNormProtoList(NormProtoList); - printf ("\n"); + ParseArguments(argc, argv); + int num_fonts = 0; + while ((PageName = GetNextFilename(argc, argv)) != NULL) { + printf("Reading %s ...\n", PageName); + TrainingPage = Efopen(PageName, "r"); + ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE, + 100, 1.0f / 64.0f, 0.0f, NULL, TrainingPage, &CharList); + fclose(TrainingPage); + ++num_fonts; + } + printf("Clustering ...\n"); + // To allow an individual font to form a separate cluster, + // reduce the min samples: + // Config.MinSamples = 0.5 / num_fonts; + pCharList = CharList; + iterate(pCharList) { + //Cluster + CharSample = (LABELEDLIST)first_node(pCharList); + Clusterer = + SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE); + float SavedMinSamples = Config.MinSamples; + // To disable the tendency to produce a single cluster for all fonts, + // make MagicSamples an impossible to achieve number: + // Config.MagicSamples = CharSample->SampleCount * 10; + Config.MagicSamples = CharSample->SampleCount; + while (Config.MinSamples > 0.001) { + ProtoList = ClusterSamples(Clusterer, &Config); + if (NumberOfProtos(ProtoList, 1, 0) > 0) { + break; + } else { + Config.MinSamples *= 0.95; + printf("0 significant protos for %s." + " Retrying clustering with MinSamples = %f%%\n", + CharSample->Label, Config.MinSamples); + } + } + Config.MinSamples = SavedMinSamples; + AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); + } + FreeTrainingSamples(CharList); + if (Clusterer == NULL) // To avoid a SIGSEGV + return 1; + WriteNormProtos (Directory, NormProtoList, Clusterer); + FreeClusterer(Clusterer); + FreeProtoList(&ProtoList); + FreeNormProtoList(NormProtoList); + printf ("\n"); return 0; -} // main +} // main /**---------------------------------------------------------------------------- - Private Code + Private Code ----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -void ReadTrainingSamples ( - FILE *File, - LIST* TrainingSamples) - -/* -** Parameters: -** File open text file to read samples from -** Globals: none -** Operation: -** This routine reads training samples from a file and -** places them into a data structure which organizes the -** samples by FontName and CharName. It then returns this -** data structure. -** Return: none -** Exceptions: none -** History: Fri Aug 18 13:11:39 1989, DSJ, Created. -** Tue May 17 1998 simplifications to structure, illiminated -** font, and feature specification levels of structure. -*/ - -{ - char unichar[UNICHAR_LEN + 1]; - LABELEDLIST CharSample; - FEATURE_SET FeatureSamples; - CHAR_DESC CharDesc; - int Type, i; - - while (fscanf (File, "%s %s", CTFontName, unichar) == 2) { - CharSample = FindList (*TrainingSamples, unichar); - if (CharSample == NULL) { - CharSample = NewLabeledList (unichar); - *TrainingSamples = push (*TrainingSamples, CharSample); - } - CharDesc = ReadCharDescription (File); - Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE); - FeatureSamples = CharDesc->FeatureSets[Type]; - for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) { - FEATURE f = FeatureSamples->Features[feature]; - for (int dim =0; dim < f->Type->NumParams; ++dim) - f->Params[dim] += UniformRandomNumber(-MINSD, MINSD); - } - CharSample->List = push (CharSample->List, FeatureSamples); - CharSample->SampleCount++; - for (i = 0; i < CharDesc->NumFeatureSets; i++) - if (Type != i) - FreeFeatureSet(CharDesc->FeatureSets[i]); - free (CharDesc); - } -} // ReadTrainingSamples /*----------------------------------------------------------------------------*/ void WriteNormProtos ( - char *Directory, - LIST LabeledProtoList, - CLUSTERER *Clusterer) + char *Directory, + LIST LabeledProtoList, + CLUSTERER *Clusterer) /* -** Parameters: -** Directory directory to place sample files into -** Operation: -** This routine writes the specified samples into files which -** are organized according to the font name and character name -** of the samples. -** Return: none -** Exceptions: none -** History: Fri Aug 18 16:17:06 1989, DSJ, Created. +** Parameters: +** Directory directory to place sample files into +** Operation: +** This routine writes the specified samples into files which +** are organized according to the font name and character name +** of the samples. +** Return: none +** Exceptions: none +** History: Fri Aug 18 16:17:06 1989, DSJ, Created. */ { - FILE *File; - char Filename[MAXNAMESIZE]; - LABELEDLIST LabeledProto; - int N; + FILE *File; + char Filename[MAXNAMESIZE]; + LABELEDLIST LabeledProto; + int N; - strcpy (Filename, ""); - if (Directory != NULL) - { - strcat (Filename, Directory); - strcat (Filename, "/"); - } - strcat (Filename, "normproto"); - printf ("\nWriting %s ...", Filename); - File = Efopen (Filename, "w"); - fprintf(File,"%0d\n",Clusterer->SampleSize); - WriteParamDesc(File,Clusterer->SampleSize,Clusterer->ParamDesc); - iterate(LabeledProtoList) - { - LabeledProto = (LABELEDLIST) first_node (LabeledProtoList); - N = NumberOfProtos(LabeledProto->List, - ShowSignificantProtos, ShowInsignificantProtos); - if (N < 1) { - printf ("\nError! Not enough protos for %s: %d protos" - " (%d significant protos" - ", %d insignificant protos)\n", - LabeledProto->Label, N, - NumberOfProtos(LabeledProto->List, 1, 0), - NumberOfProtos(LabeledProto->List, 0, 1)); - exit(1); - } - fprintf(File, "\n%s %d\n", LabeledProto->Label, N); - WriteProtos(File, Clusterer->SampleSize, LabeledProto->List, - ShowSignificantProtos, ShowInsignificantProtos); - } - fclose (File); + strcpy (Filename, ""); + if (Directory != NULL) + { + strcat (Filename, Directory); + strcat (Filename, "/"); + } + strcat (Filename, "normproto"); + printf ("\nWriting %s ...", Filename); + File = Efopen (Filename, "w"); + fprintf(File,"%0d\n",Clusterer->SampleSize); + WriteParamDesc(File,Clusterer->SampleSize,Clusterer->ParamDesc); + iterate(LabeledProtoList) + { + LabeledProto = (LABELEDLIST) first_node (LabeledProtoList); + N = NumberOfProtos(LabeledProto->List, true, false); + if (N < 1) { + printf ("\nError! Not enough protos for %s: %d protos" + " (%d significant protos" + ", %d insignificant protos)\n", + LabeledProto->Label, N, + NumberOfProtos(LabeledProto->List, 1, 0), + NumberOfProtos(LabeledProto->List, 0, 1)); + exit(1); + } + fprintf(File, "\n%s %d\n", LabeledProto->Label, N); + WriteProtos(File, Clusterer->SampleSize, LabeledProto->List, true, false); + } + fclose (File); -} // WriteNormProtos +} // WriteNormProtos /*-------------------------------------------------------------------------*/ void WriteProtos( - FILE *File, - uinT16 N, - LIST ProtoList, - BOOL8 WriteSigProtos, - BOOL8 WriteInsigProtos) + FILE *File, + uinT16 N, + LIST ProtoList, + BOOL8 WriteSigProtos, + BOOL8 WriteInsigProtos) { - PROTOTYPE *Proto; + PROTOTYPE *Proto; - // write prototypes - iterate(ProtoList) - { - Proto = (PROTOTYPE *) first_node ( ProtoList ); - if (( Proto->Significant && WriteSigProtos ) || - ( ! Proto->Significant && WriteInsigProtos ) ) - WritePrototype( File, N, Proto ); - } -} // WriteProtos + // write prototypes + iterate(ProtoList) + { + Proto = (PROTOTYPE *) first_node ( ProtoList ); + if (( Proto->Significant && WriteSigProtos ) || + ( ! Proto->Significant && WriteInsigProtos ) ) + WritePrototype( File, N, Proto ); + } +} // WriteProtos diff --git a/training/combine_tessdata.cpp b/training/combine_tessdata.cpp index 5997e72579..ba8934683e 100644 --- a/training/combine_tessdata.cpp +++ b/training/combine_tessdata.cpp @@ -81,7 +81,7 @@ int main(int argc, char **argv) { strcmp(argv[1], "-u") == 0)) { // Initialize TessdataManager with the data in the given traineddata file. tesseract::TessdataManager tm; - tm.Init(argv[2]); + tm.Init(argv[2], 0); printf("Extracting tessdata components from %s\n", argv[2]); if (strcmp(argv[1], "-e") == 0) { for (i = 3; i < argc; ++i) { @@ -115,7 +115,7 @@ int main(int argc, char **argv) { // Initialize TessdataManager with the data in the given traineddata file. tesseract::TessdataManager tm; - tm.Init(traineddata_filename.string()); + tm.Init(traineddata_filename.string(), 0); // Write the updated traineddata file. tm.OverwriteComponents(new_traineddata_filename, argv+3, argc-3); diff --git a/training/commontraining.cpp b/training/commontraining.cpp index 65e3b44669..6553f389e6 100644 --- a/training/commontraining.cpp +++ b/training/commontraining.cpp @@ -31,6 +31,7 @@ #define round(x,frag)(floor(x/frag+.5)*frag) + // Global Variables. char *Directory = NULL; @@ -39,21 +40,14 @@ const char *OutputUnicharsetFile = NULL; const char *InputFontInfoFile = NULL; -// globals used to control what information is saved in the output file -BOOL8 ShowSignificantProtos = TRUE; -BOOL8 ShowInsignificantProtos = FALSE; - FLOAT32 RoundingAccuracy = 0.0f; char CTFontName[MAXNAMESIZE]; const char* test_ch = ""; -// The unicharset used during training -UNICHARSET unicharset_training; - /*---------------------------------------------------------------------------*/ -void ParseArguments(int argc, char **argv) +void ParseArguments(int argc, char **argv) { /* ** Parameters: ** argc number of command line arguments to parse @@ -68,8 +62,6 @@ void ParseArguments(int argc, char **argv) ** This routine parses the command line arguments that were ** passed to the program. The legal arguments are: ** -d "turn off display of samples" - ** -p "turn off significant protos" - ** -n "turn off insignificant proto" ** -S [ spherical | elliptical | mixed | automatic ] ** -M MinSamples "min samples per prototype (%)" ** -B MaxIllegal "max illegal chars per cluster (%)" @@ -84,25 +76,13 @@ void ParseArguments(int argc, char **argv) ** Exceptions: Illegal options terminate the program. ** History: 7/24/89, DSJ, Created. */ - -{ - int Option; - int ParametersRead; - BOOL8 Error; + int Option; + int ParametersRead; + BOOL8 Error; Error = FALSE; - while (( Option = tessopt( argc, argv, "F:O:U:R:D:C:I:M:B:S:n:p" )) != EOF ) - { - switch ( Option ) - { - case 'n': - sscanf(tessoptarg,"%d", &ParametersRead); - ShowInsignificantProtos = ParametersRead; - break; - case 'p': - sscanf(tessoptarg,"%d", &ParametersRead); - ShowSignificantProtos = ParametersRead; - break; + while ((Option = tessopt(argc, argv, "F:O:U:R:D:C:I:M:B:S")) != EOF) { + switch (Option) { case 'C': ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) ); if ( ParametersRead != 1 ) Error = TRUE; @@ -253,16 +233,93 @@ LABELEDLIST NewLabeledList ( LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE)); LabeledList->Label = (char*)Emalloc (strlen (Label)+1); strcpy (LabeledList->Label, Label); - LabeledList->List = NIL; + LabeledList->List = NIL_LIST; LabeledList->SampleCount = 0; + LabeledList->font_sample_count = 0; return (LabeledList); } /* NewLabeledList */ +/*---------------------------------------------------------------------------*/ +void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, + const char *feature_name, int max_samples, + float linear_spread, float circular_spread, + UNICHARSET* unicharset, + FILE* file, LIST* training_samples) { +/* +** Parameters: +** file open text file to read samples from +** Globals: none +** Operation: +** This routine reads training samples from a file and +** places them into a data structure which organizes the +** samples by FontName and CharName. It then returns this +** data structure. +** Return: none +** Exceptions: none +** History: Fri Aug 18 13:11:39 1989, DSJ, Created. +** Tue May 17 1998 simplifications to structure, illiminated +** font, and feature specification levels of structure. +*/ + char unichar[UNICHAR_LEN + 1]; + LABELEDLIST char_sample; + FEATURE_SET feature_samples; + CHAR_DESC char_desc; + int i; + int feature_type = ShortNameToFeatureType(feature_defs, feature_name); + // Description of feature of type feature_type. + const FEATURE_DESC_STRUCT* f_desc = feature_defs.FeatureDesc[feature_type]; + + // Zero out the font_sample_count for all the classes. + LIST it = *training_samples; + iterate(it) { + char_sample = reinterpret_cast(first_node(it)); + char_sample->font_sample_count = 0; + } + + while (fscanf(file, "%s %s", CTFontName, unichar) == 2) { + if (unicharset != NULL && !unicharset->contains_unichar(unichar)) { + unicharset->unichar_insert(unichar); + if (unicharset->size() > MAX_NUM_CLASSES) { + tprintf("Error: Size of unicharset in training is " + "greater than MAX_NUM_CLASSES\n"); + exit(1); + } + } + char_sample = FindList(*training_samples, unichar); + if (char_sample == NULL) { + char_sample = NewLabeledList(unichar); + *training_samples = push(*training_samples, char_sample); + } + char_desc = ReadCharDescription(feature_defs, file); + feature_samples = char_desc->FeatureSets[feature_type]; + if (char_sample->font_sample_count < max_samples || max_samples <= 0) { + for (int feature = 0; feature < feature_samples->NumFeatures; ++feature) { + FEATURE f = feature_samples->Features[feature]; + for (int dim =0; dim < f->Type->NumParams; ++dim) + f->Params[dim] += f_desc->ParamDesc[dim].Circular + ? UniformRandomNumber(-circular_spread, circular_spread) + : UniformRandomNumber(-linear_spread, linear_spread); + } + char_sample->List = push(char_sample->List, feature_samples); + char_sample->SampleCount++; + char_sample->font_sample_count++; + } else { + FreeFeatureSet(feature_samples); + } + for (i = 0; i < char_desc->NumFeatureSets; i++) { + if (feature_type != i) + FreeFeatureSet(char_desc->FeatureSets[i]); + } + free(char_desc); + } +} // ReadTrainingSamples + /*---------------------------------------------------------------------------*/ void WriteTrainingSamples ( - char *Directory, - LIST CharList, + const FEATURE_DEFS_STRUCT &FeatureDefs, + char *Directory, + LIST CharList, const char* program_feature_type) /* @@ -279,7 +336,7 @@ void WriteTrainingSamples ( */ { - LABELEDLIST CharSample; + LABELEDLIST char_sample; FEATURE_SET FeatureSet; LIST FeatureList; FILE *File; @@ -288,7 +345,7 @@ void WriteTrainingSamples ( iterate (CharList) // iterate thru all of the fonts { - CharSample = (LABELEDLIST) first_node (CharList); + char_sample = (LABELEDLIST) first_node (CharList); // construct the full pathname for the current samples file strcpy (Filename, ""); @@ -299,7 +356,7 @@ void WriteTrainingSamples ( } strcat (Filename, CTFontName); strcat (Filename, "/"); - strcat (Filename, CharSample->Label); + strcat (Filename, char_sample->Label); strcat (Filename, "."); strcat (Filename, program_feature_type); printf ("\nWriting %s ...", Filename); @@ -313,7 +370,7 @@ void WriteTrainingSamples ( WriteOldParamDesc( File, FeatureDefs.FeatureDesc[ShortNameToFeatureType( - program_feature_type)]); + FeatureDefs, program_feature_type)]); } else { @@ -322,7 +379,7 @@ void WriteTrainingSamples ( } // append samples onto the file - FeatureList = CharSample->List; + FeatureList = char_sample->List; NumSamples = 0; iterate (FeatureList) { @@ -351,7 +408,7 @@ void FreeTrainingSamples ( */ { - LABELEDLIST CharSample; + LABELEDLIST char_sample; FEATURE_SET FeatureSet; LIST FeatureList; @@ -359,14 +416,14 @@ void FreeTrainingSamples ( // printf ("FreeTrainingSamples...\n"); iterate (CharList) /* iterate thru all of the fonts */ { - CharSample = (LABELEDLIST) first_node (CharList); - FeatureList = CharSample->List; + char_sample = (LABELEDLIST) first_node (CharList); + FeatureList = char_sample->List; iterate (FeatureList) /* iterate thru all of the classes */ { FeatureSet = (FEATURE_SET) first_node (FeatureList); FreeFeatureSet (FeatureSet); } - FreeLabeledList (CharSample); + FreeLabeledList (char_sample); } destroy (CharList); @@ -398,12 +455,13 @@ void FreeLabeledList ( /*---------------------------------------------------------------------------*/ CLUSTERER *SetUpForClustering( - LABELEDLIST CharSample, + const FEATURE_DEFS_STRUCT &FeatureDefs, + LABELEDLIST char_sample, const char* program_feature_type) /* ** Parameters: - ** CharSample: LABELEDLIST that holds all the feature information for a + ** char_sample: LABELEDLIST that holds all the feature information for a ** given character. ** Globals: ** None @@ -427,17 +485,12 @@ CLUSTERER *SetUpForClustering( inT32 CharID; LIST FeatureList = NULL; FEATURE_SET FeatureSet = NULL; - FEATURE_DESC FeatureDesc = NULL; - // PARAM_DESC* ParamDesc; - FeatureDesc = - FeatureDefs.FeatureDesc[ShortNameToFeatureType(program_feature_type)]; - N = FeatureDesc->NumParams; - // ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N); - Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc); - // free(ParamDesc); + int desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type); + N = FeatureDefs.FeatureDesc[desc_index]->NumParams; + Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc); - FeatureList = CharSample->List; + FeatureList = char_sample->List; CharID = 0; iterate(FeatureList) { @@ -560,7 +613,7 @@ LIST RemoveInsignificantProtos( int N) { - LIST NewProtoList = NIL; + LIST NewProtoList = NIL_LIST; LIST pProtoList; PROTOTYPE* Proto; PROTOTYPE* NewProto; @@ -682,9 +735,7 @@ void FreeLabeledClassList ( } /* FreeLabeledClassList */ /** SetUpForFloat2Int **************************************************/ -void SetUpForFloat2Int( - LIST LabeledClassList) -{ +void SetUpForFloat2Int(const UNICHARSET& unicharset, LIST LabeledClassList) { MERGE_CLASS MergeClass; CLASS_TYPE Class; int NumProtos; @@ -703,8 +754,7 @@ void SetUpForFloat2Int( { UnicityTableEqEq font_set; MergeClass = (MERGE_CLASS) first_node (LabeledClassList); - Class = &TrainingData[unicharset_training.unichar_to_id( - MergeClass->Label)]; + Class = &TrainingData[unicharset.unichar_to_id(MergeClass->Label)]; NumProtos = MergeClass->Class->NumProtos; NumConfigs = MergeClass->Class->NumConfigs; font_set.move(&MergeClass->Class->font_set); @@ -766,12 +816,12 @@ void FreeNormProtoList ( LIST CharList) { - LABELEDLIST CharSample; + LABELEDLIST char_sample; iterate (CharList) /* iterate thru all of the fonts */ { - CharSample = (LABELEDLIST) first_node (CharList); - FreeLabeledList (CharSample); + char_sample = (LABELEDLIST) first_node (CharList); + FreeLabeledList (char_sample); } destroy (CharList); diff --git a/training/commontraining.h b/training/commontraining.h index 6ccd29609d..a668dfa136 100644 --- a/training/commontraining.h +++ b/training/commontraining.h @@ -17,20 +17,16 @@ #include "oldlist.h" #include "cluster.h" #include "intproto.h" - +#include "featdefs.h" ////////////////////////////////////////////////////////////////////////////// // Macros //////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// #define MAXNAMESIZE 80 -#define MINSD_ANGLE (1.0f / 64.0f) - ////////////////////////////////////////////////////////////////////////////// // Globals /////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -extern BOOL8 ShowSignificantProtos; -extern BOOL8 ShowInsignificantProtos; extern BOOL8 ShowAllSamples; // Must be defined in the file that "implements" commonTraining facilities. @@ -48,9 +44,6 @@ extern const char *OutputUnicharsetFile; extern const char *InputFontInfoFile; -// The unicharset used during training -extern UNICHARSET unicharset_training; - ////////////////////////////////////////////////////////////////////////////// // Structs /////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// @@ -58,6 +51,7 @@ typedef struct { char *Label; int SampleCount; + int font_sample_count; LIST List; } LABELEDLISTNODE, *LABELEDLIST; @@ -87,9 +81,16 @@ LABELEDLIST FindList( LABELEDLIST NewLabeledList( const char *Label); +void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, + const char *feature_name, int max_samples, + float linear_spread, float circular_spread, + UNICHARSET* unicharset, + FILE* file, LIST* training_samples); + void WriteTrainingSamples( - char *Directory, - LIST CharList, + const FEATURE_DEFS_STRUCT &FeatureDefs, + char *Directory, + LIST CharList, const char *program_feature_type); void FreeTrainingSamples( @@ -102,6 +103,7 @@ void FreeLabeledClassList( LIST ClassListList); CLUSTERER *SetUpForClustering( + const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type); @@ -130,8 +132,7 @@ MERGE_CLASS NewLabeledClass( void FreeTrainingSamples( LIST CharList); -void SetUpForFloat2Int( - LIST LabeledClassList); +void SetUpForFloat2Int(const UNICHARSET& unicharset, LIST LabeledClassList); void Normalize( float *Values); diff --git a/training/mergenf.cpp b/training/mergenf.cpp index be05226514..402533cbba 100644 --- a/training/mergenf.cpp +++ b/training/mergenf.cpp @@ -15,11 +15,8 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ #include "mergenf.h" -#include "general.h" +#include "host.h" #include "efio.h" #include "clusttool.h" #include "cluster.h" @@ -30,16 +27,13 @@ #include "const.h" #include "featdefs.h" #include "intproto.h" -#include "varable.h" +#include "params.h" #include #include #include -/*---------------------------------------------------------------------------- - Variables ------------------------------------------------------------------------------*/ /*-------------------once in subfeat---------------------------------*/ double_VAR(training_angle_match_scale, 1.0, "Angle Match Scale ..."); @@ -54,15 +48,6 @@ double_VAR(training_orthogonal_bbox_pad, 2.5, "Orthogonal bounding box pad ...") double_VAR(training_angle_pad, 45.0, "Angle pad ..."); -/*---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------*/ -//int row_number; /* kludge due to linking problems */ - -/*---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ /** * Compare protos p1 and p2 and return an estimate of the * worst evidence rating that will result for any part of p1 @@ -130,7 +115,6 @@ FLOAT32 CompareProtos(PROTO p1, PROTO p2) { } /* CompareProtos */ -/*---------------------------------------------------------------------------*/ /** * This routine computes a proto which is the weighted * average of protos p1 and p2. The new proto is returned @@ -150,8 +134,7 @@ void ComputeMergedProto (PROTO p1, PROTO p2, FLOAT32 w1, FLOAT32 w2, - PROTO MergedProto) -{ + PROTO MergedProto) { FLOAT32 TotalWeight; TotalWeight = w1 + w2; @@ -165,7 +148,6 @@ void ComputeMergedProto (PROTO p1, FillABC(MergedProto); } /* ComputeMergedProto */ -/*---------------------------------------------------------------------------*/ /** * This routine searches thru all of the prototypes in * Class and returns the id of the proto which would provide @@ -211,7 +193,6 @@ int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], return BestProto; } /* FindClosestExistingProto */ -/*---------------------------------------------------------------------------*/ /** * This fills in the fields of the New proto based on the * fields of the Old proto. @@ -263,9 +244,7 @@ FLOAT32 SubfeatureEvidence(FEATURE Feature, PROTO Proto) { * approximation. The equation that represents the transform is: * 1 / (1 + (sim / midpoint) ^ curl) */ -FLOAT32 EvidenceOf ( - register FLOAT32 Similarity) -{ +double EvidenceOf (double Similarity) { Similarity /= training_similarity_midpoint; @@ -274,13 +253,11 @@ FLOAT32 EvidenceOf ( else if (training_similarity_curl == 2) Similarity = Similarity * Similarity; else - Similarity = static_cast(pow(static_cast(Similarity), - training_similarity_curl)); + Similarity = pow (Similarity, training_similarity_curl); return (1.0 / (1.0 + Similarity)); } -/*---------------------------------------------------------------------------*/ /** * This routine returns TRUE if Feature would be matched * by a fast match table built from Proto. @@ -321,7 +298,6 @@ BOOL8 DummyFastMatch ( Feature->Params[PicoFeatY]); } /* DummyFastMatch */ -/*----------------------------------------------------------------------------*/ /** * This routine computes a bounding box that encloses the * specified proto along with some padding. The @@ -359,7 +335,6 @@ void ComputePaddedBoundingBox (PROTO Proto, FLOAT32 TangentPad, } /* ComputePaddedBoundingBox */ -/*--------------------------------------------------------------------------*/ /** * Return TRUE if point (X,Y) is inside of Rectangle. * diff --git a/training/mergenf.h b/training/mergenf.h index 55c9b8e43b..d0920a63f4 100644 --- a/training/mergenf.h +++ b/training/mergenf.h @@ -78,8 +78,8 @@ FLOAT32 SubfeatureEvidence ( FEATURE Feature, PROTO Proto); -FLOAT32 EvidenceOf ( - register FLOAT32 Similarity); +double EvidenceOf ( + register double Similarity); BOOL8 DummyFastMatch ( FEATURE Feature, @@ -95,5 +95,3 @@ BOOL8 PointInside ( FRECT *Rectangle, FLOAT32 X, FLOAT32 Y); - -extern FEATURE_DESC_STRUCT PicoFeatDesc; diff --git a/training/mftraining.cpp b/training/mftraining.cpp index 33118d581e..6cd745caf3 100644 --- a/training/mftraining.cpp +++ b/training/mftraining.cpp @@ -1,18 +1,18 @@ /****************************************************************************** -** Filename: mftraining.c -** Purpose: Separates training pages into files for each character. -** Strips from files only the features and there parameters of - the feature type mf. -** Author: Dan Johnson -** Revisment: Christy Russon -** Environment: HPUX 6.5 -** Library: HPUX 6.5 -** History: Fri Aug 18 08:53:50 1989, DSJ, Created. -** 5/25/90, DSJ, Adapted to multiple feature types. -** Tuesday, May 17, 1998 Changes made to make feature specific and -** simplify structures. First step in simplifying training process. +** Filename: mftraining.c +** Purpose: Separates training pages into files for each character. +** Strips from files only the features and there parameters of + the feature type mf. +** Author: Dan Johnson +** Revisment: Christy Russon +** Environment: HPUX 6.5 +** Library: HPUX 6.5 +** History: Fri Aug 18 08:53:50 1989, DSJ, Created. +** 5/25/90, DSJ, Adapted to multiple feature types. +** Tuesday, May 17, 1998 Changes made to make feature specific and +** simplify structures. First step in simplifying training process. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -24,7 +24,7 @@ ** limitations under the License. ******************************************************************************/ /**---------------------------------------------------------------------------- - Include Files and Type Defines + Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "oldlist.h" #include "efio.h" @@ -33,7 +33,6 @@ #include "tessopt.h" #include "ocrfeatures.h" #include "mf.h" -#include "general.h" #include "clusttool.h" #include "cluster.h" #include "protos.h" @@ -41,7 +40,6 @@ #include "tprintf.h" #include "const.h" #include "mergenf.h" -#include "name2char.h" #include "intproto.h" #include "freelist.h" #include "efio.h" @@ -63,52 +61,40 @@ #endif #define PROGRAM_FEATURE_TYPE "mf" -#define MINSD (1.0f / 128.0f) static const char* kInputUnicharsetFile = "unicharset"; static const char* kOutputUnicharsetFile = "mfunicharset"; /**---------------------------------------------------------------------------- - Public Function Prototypes + Public Function Prototypes ----------------------------------------------------------------------------**/ int main ( - int argc, - char **argv); + int argc, + char **argv); /**---------------------------------------------------------------------------- - Private Function Prototypes + Private Function Prototypes ----------------------------------------------------------------------------**/ -LIST ReadTrainingSamples ( - FILE *File); - -void WriteClusteredTrainingSamples ( - char *Directory, - LIST ProtoList, - CLUSTERER *Clusterer, - LABELEDLIST CharSample); -/**/ -void WriteMergedTrainingSamples( - char *Directory, - LIST ClassList); void WriteMicrofeat( - char *Directory, - LIST ClassList); + char *Directory, + LIST ClassList); void WriteProtos( - FILE* File, - MERGE_CLASS MergeClass); + FILE* File, + MERGE_CLASS MergeClass); void WriteConfigs( - FILE* File, - CLASS_TYPE Class); + FILE* File, + CLASS_TYPE Class); /* PARAMDESC *ConvertToPARAMDESC( - PARAM_DESC* Param_Desc, - int N); + PARAM_DESC* Param_Desc, + int N); */ -void WritePFFMTable(INT_TEMPLATES Templates, const char* filename); +void WritePFFMTable(INT_TEMPLATES Templates, const UNICHARSET& unicharset, + const char* filename); // global variable to hold configuration parameters to control clustering // -M 0.40 -B 0.05 -I 1.0 -C 1e-6. @@ -117,7 +103,7 @@ CLUSTERCONFIG Config = /*---------------------------------------------------------------------------- - Public Code + Public Code -----------------------------------------------------------------------------*/ void DisplayProtoList(const char* ch, LIST protolist) { void* window = c_create_window("Char samples", 50, 200, @@ -160,55 +146,56 @@ char* new_dup(const char* str) { /*---------------------------------------------------------------------------*/ int main (int argc, char **argv) { /* -** Parameters: -** argc number of command line arguments -** argv array of command line arguments -** Globals: none -** Operation: -** This program reads in a text file consisting of feature -** samples from a training page in the following format: +** Parameters: +** argc number of command line arguments +** argv array of command line arguments +** Globals: none +** Operation: +** This program reads in a text file consisting of feature +** samples from a training page in the following format: ** -** FontName CharName NumberOfFeatureTypes(N) -** FeatureTypeName1 NumberOfFeatures(M) -** Feature1 -** ... -** FeatureM -** FeatureTypeName2 NumberOfFeatures(M) -** Feature1 -** ... -** FeatureM -** ... -** FeatureTypeNameN NumberOfFeatures(M) -** Feature1 -** ... -** FeatureM -** FontName CharName ... +** FontName CharName NumberOfFeatureTypes(N) +** FeatureTypeName1 NumberOfFeatures(M) +** Feature1 +** ... +** FeatureM +** FeatureTypeName2 NumberOfFeatures(M) +** Feature1 +** ... +** FeatureM +** ... +** FeatureTypeNameN NumberOfFeatures(M) +** Feature1 +** ... +** FeatureM +** FontName CharName ... ** -** The result of this program is a binary inttemp file used by -** the OCR engine. -** Return: none -** Exceptions: none -** History: Fri Aug 18 08:56:17 1989, DSJ, Created. -** Mon May 18 1998, Christy Russson, Revistion started. +** The result of this program is a binary inttemp file used by +** the OCR engine. +** Return: none +** Exceptions: none +** History: Fri Aug 18 08:56:17 1989, DSJ, Created. +** Mon May 18 1998, Christy Russson, Revistion started. */ - char *PageName; - FILE *TrainingPage; - FILE *OutFile; - LIST CharList; - CLUSTERER *Clusterer = NULL; - LIST ProtoList = NIL; + char *PageName; + FILE *TrainingPage; + FILE *OutFile; + CLUSTERER *Clusterer = NULL; + LIST ProtoList = NIL_LIST; LABELEDLIST CharSample; - PROTOTYPE *Prototype; - LIST ClassList = NIL; - int Cid, Pid; - PROTO Proto; - PROTO_STRUCT DummyProto; - BIT_VECTOR Config2; - MERGE_CLASS MergeClass; - INT_TEMPLATES IntTemplates; + PROTOTYPE *Prototype; + LIST ClassList = NIL_LIST; + int Cid, Pid; + PROTO Proto; + PROTO_STRUCT DummyProto; + BIT_VECTOR Config2; + MERGE_CLASS MergeClass; + INT_TEMPLATES IntTemplates; LIST pCharList, pProtoList; char Filename[MAXNAMESIZE]; - tesseract::Classify classify; + tesseract::Classify *classify = new tesseract::Classify(); + FEATURE_DEFS_STRUCT FeatureDefs; + InitFeatureDefs(&FeatureDefs); ParseArguments (argc, argv); if (InputUnicharsetFile == NULL) { @@ -218,12 +205,13 @@ int main (int argc, char **argv) { OutputUnicharsetFile = kOutputUnicharsetFile; } + UNICHARSET unicharset_training; if (!unicharset_training.load_from_file(InputUnicharsetFile)) { fprintf(stderr, "Failed to load unicharset from file %s\n" "Building unicharset for mftraining from scratch...\n", InputUnicharsetFile); unicharset_training.clear(); - // Space character needed to represent NIL classification. + // Space character needed to represent NIL_LIST classification. unicharset_training.unichar_insert(" "); } @@ -247,10 +235,11 @@ int main (int argc, char **argv) { (fixed << 2) + (serif << 3) + (fraktur << 4); - if (!classify.get_fontinfo_table().contains(fontinfo)) { - classify.get_fontinfo_table().push_back(fontinfo); + if (!classify->get_fontinfo_table().contains(fontinfo)) { + classify->get_fontinfo_table().push_back(fontinfo); } else { fprintf(stderr, "Font %s already defined\n", fontinfo.name); + delete classify; return 1; } } @@ -283,25 +272,29 @@ int main (int argc, char **argv) { FontInfo fontinfo; fontinfo.name = short_name; fontinfo.properties = 0; // Not used to lookup in the table - if (!classify.get_fontinfo_table().contains(fontinfo)) { - fontinfo_id = classify.get_fontinfo_table().push_back(fontinfo); + if (!classify->get_fontinfo_table().contains(fontinfo)) { + fontinfo_id = classify->get_fontinfo_table().push_back(fontinfo); printf("%s has no defined properties.\n", short_name); } else { - fontinfo_id = classify.get_fontinfo_table().get_id(fontinfo); + fontinfo_id = classify->get_fontinfo_table().get_id(fontinfo); // Update the properties field - fontinfo = classify.get_fontinfo_table().get(fontinfo_id); + fontinfo = classify->get_fontinfo_table().get(fontinfo_id); delete[] short_name; } TrainingPage = Efopen (PageName, "r"); - CharList = ReadTrainingSamples (TrainingPage); + LIST char_list = NIL_LIST; + ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE, + 0, 1.0f / 128.0f, 1.0f / 64.0f, &unicharset_training, + TrainingPage, &char_list); fclose (TrainingPage); //WriteTrainingSamples (Directory, CharList); - pCharList = CharList; + pCharList = char_list; iterate(pCharList) { //Cluster CharSample = (LABELEDLIST) first_node (pCharList); // printf ("\nClustering %s ...", CharSample->Label); - Clusterer = SetUpForClustering(CharSample, PROGRAM_FEATURE_TYPE); + Clusterer = + SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE); Config.MagicSamples = CharSample->SampleCount; ProtoList = ClusterSamples(Clusterer, &Config); CleanUpUnusedData(ProtoList); @@ -311,8 +304,8 @@ int main (int argc, char **argv) { Clusterer, &Config); if (strcmp(test_ch, CharSample->Label) == 0) DisplayProtoList(test_ch, ProtoList); - ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos, - ShowInsignificantProtos, + ProtoList = RemoveInsignificantProtos(ProtoList, true, + false, Clusterer->SampleSize); FreeClusterer(Clusterer); MergeClass = FindClass (ClassList, CharSample->Label); @@ -347,13 +340,12 @@ int main (int argc, char **argv) { } FreeProtoList (&ProtoList); } - FreeTrainingSamples (CharList); + FreeTrainingSamples(char_list); } - //WriteMergedTrainingSamples(Directory,ClassList); WriteMicrofeat(Directory, ClassList); - SetUpForFloat2Int(ClassList); - IntTemplates = classify.CreateIntTemplates(TrainingData, - unicharset_training); + SetUpForFloat2Int(unicharset_training, ClassList); + IntTemplates = classify->CreateIntTemplates(TrainingData, + unicharset_training); strcpy (Filename, ""); if (Directory != NULL) { strcat (Filename, Directory); @@ -365,7 +357,7 @@ int main (int argc, char **argv) { #else OutFile = Efopen (Filename, "wb"); #endif - classify.WriteIntTemplates(OutFile, IntTemplates, unicharset_training); + classify->WriteIntTemplates(OutFile, IntTemplates, unicharset_training); fclose (OutFile); strcpy (Filename, ""); if (Directory != NULL) { @@ -374,7 +366,7 @@ int main (int argc, char **argv) { } strcat (Filename, "pffmtable"); // Now create pffmtable. - WritePFFMTable(IntTemplates, Filename); + WritePFFMTable(IntTemplates, unicharset_training, Filename); // Write updated unicharset to a file. if (!unicharset_training.save_to_file(OutputUnicharsetFile)) { fprintf(stderr, "Failed to save unicharset to file %s\n", @@ -383,238 +375,95 @@ int main (int argc, char **argv) { } printf ("Done!\n"); /**/ FreeLabeledClassList (ClassList); + delete classify; + if (test_ch[0] != '\0') { + // If we are displaying debug window(s), wait for the user to look at them. + while (getchar() != '\n'); + } return 0; -} /* main */ +} /* main */ /**---------------------------------------------------------------------------- - Private Code + Private Code ----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -LIST ReadTrainingSamples ( - FILE *File) - -/* -** Parameters: -** File open text file to read samples from -** Globals: none -** Operation: -** This routine reads training samples from a file and -** places them into a data structure which organizes the -** samples by FontName and CharName. It then returns this -** data structure. -** Return: none -** Exceptions: none -** History: Fri Aug 18 13:11:39 1989, DSJ, Created. -** Tue May 17 1998 simplifications to structure, illiminated -** font, and feature specification levels of structure. -*/ - -{ - char unichar[UNICHAR_LEN + 1]; - LABELEDLIST CharSample; - FEATURE_SET FeatureSamples; - LIST TrainingSamples = NIL; - CHAR_DESC CharDesc; - int Type, i; - - while (fscanf (File, "%s %s", CTFontName, unichar) == 2) { - if (!unicharset_training.contains_unichar(unichar)) { - unicharset_training.unichar_insert(unichar); - if (unicharset_training.size() > MAX_NUM_CLASSES) { - cprintf("Error: Size of unicharset of mftraining is " - "greater than MAX_NUM_CLASSES\n"); - exit(1); - } - } - CharSample = FindList (TrainingSamples, unichar); - if (CharSample == NULL) { - CharSample = NewLabeledList (unichar); - TrainingSamples = push (TrainingSamples, CharSample); - } - CharDesc = ReadCharDescription (File); - Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE); - FeatureSamples = CharDesc->FeatureSets[Type]; - for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) { - FEATURE f = FeatureSamples->Features[feature]; - for (int dim =0; dim < f->Type->NumParams; ++dim) - f->Params[dim] += dim == MFDirection ? - UniformRandomNumber(-MINSD_ANGLE, MINSD_ANGLE) : - UniformRandomNumber(-MINSD, MINSD); - } - CharSample->List = push (CharSample->List, FeatureSamples); - CharSample->SampleCount++; - for (i = 0; i < CharDesc->NumFeatureSets; i++) - if (Type != i) - FreeFeatureSet(CharDesc->FeatureSets[i]); - free (CharDesc); - } - return (TrainingSamples); - -} /* ReadTrainingSamples */ - - -/*----------------------------------------------------------------------------*/ -void WriteClusteredTrainingSamples ( - char *Directory, - LIST ProtoList, - CLUSTERER *Clusterer, - LABELEDLIST CharSample) - -/* -** Parameters: -** Directory directory to place sample files into -** Operation: -** This routine writes the specified samples into files which -** are organized according to the font name and character name -** of the samples. -** Return: none -** Exceptions: none -** History: Fri Aug 18 16:17:06 1989, DSJ, Created. -*/ - -{ - FILE *File; - char Filename[MAXNAMESIZE]; - - strcpy (Filename, ""); - if (Directory != NULL) - { - strcat (Filename, Directory); - strcat (Filename, "/"); - } - strcat (Filename, CTFontName); - strcat (Filename, "/"); - strcat (Filename, CharSample->Label); - strcat (Filename, "."); - strcat (Filename, PROGRAM_FEATURE_TYPE); - strcat (Filename, ".p"); - printf ("\nWriting %s ...", Filename); - File = Efopen (Filename, "w"); - WriteProtoList(File, Clusterer->SampleSize, Clusterer->ParamDesc, - ProtoList, ShowSignificantProtos, ShowInsignificantProtos); - fclose (File); - -} /* WriteClusteredTrainingSamples */ -/*---------------------------------------------------------------------------*/ -void WriteMergedTrainingSamples( - char *Directory, - LIST ClassList) - -{ - FILE *File; - char Filename[MAXNAMESIZE]; - MERGE_CLASS MergeClass; - - iterate (ClassList) - { - MergeClass = (MERGE_CLASS) first_node (ClassList); - strcpy (Filename, ""); - if (Directory != NULL) - { - strcat (Filename, Directory); - strcat (Filename, "/"); - } - strcat (Filename, "Merged/"); - strcat (Filename, MergeClass->Label); - strcat (Filename, PROTO_SUFFIX); - printf ("\nWriting Merged %s ...", Filename); - File = Efopen (Filename, "w"); - WriteOldProtoFile (File, MergeClass->Class); - fclose (File); - - strcpy (Filename, ""); - if (Directory != NULL) - { - strcat (Filename, Directory); - strcat (Filename, "/"); - } - strcat (Filename, "Merged/"); - strcat (Filename, MergeClass->Label); - strcat (Filename, CONFIG_SUFFIX); - printf ("\nWriting Merged %s ...", Filename); - File = Efopen (Filename, "w"); - WriteOldConfigFile (File, MergeClass->Class); - fclose (File); - } - -} // WriteMergedTrainingSamples /*--------------------------------------------------------------------------*/ void WriteMicrofeat( - char *Directory, - LIST ClassList) + char *Directory, + LIST ClassList) { - FILE *File; - char Filename[MAXNAMESIZE]; - MERGE_CLASS MergeClass; - - strcpy (Filename, ""); - if (Directory != NULL) - { - strcat (Filename, Directory); - strcat (Filename, "/"); - } - strcat (Filename, "Microfeat"); - File = Efopen (Filename, "w"); - printf ("\nWriting Merged %s ...", Filename); - iterate(ClassList) - { - MergeClass = (MERGE_CLASS) first_node (ClassList); - WriteProtos(File, MergeClass); - WriteConfigs(File, MergeClass->Class); - } - fclose (File); + FILE *File; + char Filename[MAXNAMESIZE]; + MERGE_CLASS MergeClass; + + strcpy (Filename, ""); + if (Directory != NULL) + { + strcat (Filename, Directory); + strcat (Filename, "/"); + } + strcat (Filename, "Microfeat"); + File = Efopen (Filename, "w"); + printf ("\nWriting Merged %s ...", Filename); + iterate(ClassList) + { + MergeClass = (MERGE_CLASS) first_node (ClassList); + WriteProtos(File, MergeClass); + WriteConfigs(File, MergeClass->Class); + } + fclose (File); } // WriteMicrofeat /*---------------------------------------------------------------------------*/ void WriteProtos( - FILE* File, - MERGE_CLASS MergeClass) + FILE* File, + MERGE_CLASS MergeClass) { - float Values[3]; - int i; - PROTO Proto; - - fprintf(File, "%s\n", MergeClass->Label); - fprintf(File, "%d\n", MergeClass->Class->NumProtos); - for(i=0; i < MergeClass->Class->NumProtos; i++) - { - Proto = ProtoIn(MergeClass->Class,i); - fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", Proto->X, Proto->Y, - Proto->Length, Proto->Angle); - Values[0] = Proto->X; - Values[1] = Proto->Y; - Values[2] = Proto->Angle; - Normalize(Values); - fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]); - } + float Values[3]; + int i; + PROTO Proto; + + fprintf(File, "%s\n", MergeClass->Label); + fprintf(File, "%d\n", MergeClass->Class->NumProtos); + for(i=0; i < MergeClass->Class->NumProtos; i++) + { + Proto = ProtoIn(MergeClass->Class,i); + fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", Proto->X, Proto->Y, + Proto->Length, Proto->Angle); + Values[0] = Proto->X; + Values[1] = Proto->Y; + Values[2] = Proto->Angle; + Normalize(Values); + fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]); + } } // WriteProtos /*----------------------------------------------------------------------------*/ void WriteConfigs( - FILE* File, - CLASS_TYPE Class) + FILE* File, + CLASS_TYPE Class) { - BIT_VECTOR Config; - int i, j, WordsPerConfig; - - WordsPerConfig = WordsInVectorOfSize(Class->NumProtos); - fprintf(File, "%d %d\n", Class->NumConfigs,WordsPerConfig); - for(i=0; i < Class->NumConfigs; i++) - { - Config = Class->Configurations[i]; - for(j=0; j < WordsPerConfig; j++) - fprintf(File, "%08x ", Config[j]); - fprintf(File, "\n"); - } - fprintf(File, "\n"); + BIT_VECTOR Config; + int i, j, WordsPerConfig; + + WordsPerConfig = WordsInVectorOfSize(Class->NumProtos); + fprintf(File, "%d %d\n", Class->NumConfigs,WordsPerConfig); + for(i=0; i < Class->NumConfigs; i++) + { + Config = Class->Configurations[i]; + for(j=0; j < WordsPerConfig; j++) + fprintf(File, "%08x ", Config[j]); + fprintf(File, "\n"); + } + fprintf(File, "\n"); } // WriteConfigs /*--------------------------------------------------------------------------*/ -void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) { +void WritePFFMTable(INT_TEMPLATES Templates, const UNICHARSET& unicharset, + const char* filename) { FILE* fp = Efopen(filename, "wb"); /* then write out each class */ for (int i = 0; i < Templates->NumClasses; i++) { @@ -622,7 +471,7 @@ void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) { // Todo: Test with min instead of max // int MaxLength = LengthForConfigId(Class, 0); int MaxLength = 0; - const char *unichar = unicharset_training.id_to_unichar(i); + const char *unichar = unicharset.id_to_unichar(i); if (strcmp(unichar, " ") == 0) { unichar = "NULL"; } else if (Class->NumConfigs == 0) { diff --git a/training/name2char.cpp b/training/name2char.cpp deleted file mode 100644 index 1bc447fd2f..0000000000 --- a/training/name2char.cpp +++ /dev/null @@ -1,164 +0,0 @@ -/****************************************************************************** -** Filename: name2char.c -** Purpose: Routines to convert between classes and class names. -** Author: Dan Johnson -** History: Fri Feb 23 08:03:09 1990, DSJ, Created. -** - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. -******************************************************************************/ -/*---------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "name2char.h" -#include "matchdefs.h" -#include "danerror.h" -#include - -#define ILLEGALCHARNAME 6001 - -/*---------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -/** character ID (ascii code) to character name mapping */ -static const char *NameList[]={ - "!bang", - "\"doubleq", - "#hash", - "$dollar", - "%percent", - "&and", - "'quote", - "(lround", - ")rround", - "*asterisk", - "+plus", - ",comma", - "-minus", - ".dot", - "/slash", - ":colon", - ";semic", - "greater", - "?question", - "@at", - "[lsquare", - "\\backsl", - "]rsquare", - "^uparr", - "_unders", - "`grave", - "{lbrace", - "|bar", - "}rbrace", - "~tilde", - "AcA", - "BcB", - "CcC", - "DcD", - "EcE", - "FcF", - "GcG", - "HcH", - "IcI", - "JcJ", - "KcK", - "LcL", - "McM", - "NcN", - "OcO", - "PcP", - "QcQ", - "RcR", - "ScS", - "TcT", - "UcU", - "VcV", - "WcW", - "XcX", - "YcY", - "ZcZ", - NULL - }; - - -/*---------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** - * This routine converts the specified character name to - * an ascii character. - * - * @param CharName character name to convert to a character - * - * Globals: - * - NameList lookup table for name to char mapping - * - * @return Ascii character that corresponds to the character name. - * @note Exceptions: ILLEGALCHARNAME - * @note History: Sat Aug 26 12:26:54 1989, DSJ, Created. - */ -CLASS_ID NameToChar (char CharName[]) -{ - int i; - - // look for name in table and return character if found - for ( i = 0; NameList[i] != NULL; i++ ) - if ( strcmp (CharName, &NameList[i][1]) == 0) - return (NameList[i][0]); - if ( strlen (CharName) == 1 ) - return (CharName[0]); //name is not in table but is a single character - else //illegal character - { - DoError (ILLEGALCHARNAME, "Illegal character name"); - return 0; - } -} /* NameToChar */ - -/*---------------------------------------------------------------------------*/ -void CharToName ( - CLASS_ID Char, - char CharName[]) - -/* -** Parameters: -** Char character to map to a character name -** CharName string to copy character name into -** Globals: -** NameList lookup table for char to name mapping -** Operation: -** This routine converts the specified ascii character to a -** character name. This is convenient for representing -** characters which might have special meaning to operating -** system shells or other programs (e.g. "*?&><" etc.). -** Return: none -** Exceptions: none -** History: Sat Aug 26 12:51:02 1989, DSJ, Created. -*/ - -{ - int i; - - /* look for character in table and return a copy of its name if found */ - for ( i = 0; NameList[i] != NULL; i++ ) - if ( Char == NameList[i][0] ) - { - strcpy ( CharName, &NameList[i][1] ); - return; - } - - /* if the character is not in the table, then use it as the name */ - CharName[0] = Char; - CharName[1] = 0; - -} /* CharToName */ diff --git a/training/name2char.h b/training/name2char.h deleted file mode 100644 index 8cb1f3cde2..0000000000 --- a/training/name2char.h +++ /dev/null @@ -1,38 +0,0 @@ -/****************************************************************************** -** Filename: name2char.h -** Purpose: Routines to convert between classes and class names. -** Author: Dan Johnson -** History: Fri Feb 23 08:10:40 1990, DSJ, Created. -** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. -******************************************************************************/ -#ifndef __NAME2CHAR__ -#define __NAME2CHAR__ - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "matchdefs.h" - - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -CLASS_ID NameToChar ( - char CharName[]); - -void CharToName ( - CLASS_ID Char, - char CharName[]); - - -#endif diff --git a/ccutil/tessopt.cpp b/training/tessopt.cpp similarity index 100% rename from ccutil/tessopt.cpp rename to training/tessopt.cpp diff --git a/ccutil/tessopt.h b/training/tessopt.h similarity index 100% rename from ccutil/tessopt.h rename to training/tessopt.h diff --git a/training/unicharset_extractor.cpp b/training/unicharset_extractor.cpp index 6cc055db41..cd250d5216 100644 --- a/training/unicharset_extractor.cpp +++ b/training/unicharset_extractor.cpp @@ -82,12 +82,12 @@ void set_properties(UNICHARSET *unicharset, const char* const c_string) { if (iswlower(wc)) { unicharset->set_islower(id, 1); unicharset->set_other_case(id, wc_to_unichar_id(*unicharset, - towupper(wc))); + towupper(wc))); } if (iswupper(wc)) { unicharset->set_isupper(id, 1); unicharset->set_other_case(id, wc_to_unichar_id(*unicharset, - towlower(wc))); + towlower(wc))); } } if (iswdigit(wc)) @@ -142,7 +142,9 @@ int main(int argc, char** argv) { int x_min, y_min, x_max, y_max; char c_string[kBoxReadBufSize]; - while (read_next_box(box_file, c_string, &x_min, &y_min, &x_max, &y_max)) { + int line_number = 0; + while (read_next_box(&line_number, box_file, c_string, + &x_min, &y_min, &x_max, &y_max)) { unicharset.unichar_insert(c_string); set_properties(&unicharset, c_string); } diff --git a/training/wordlist2dawg.cpp b/training/wordlist2dawg.cpp index ffcc4c8e44..bf8691964c 100644 --- a/training/wordlist2dawg.cpp +++ b/training/wordlist2dawg.cpp @@ -24,34 +24,46 @@ #include "classify.h" #include "dawg.h" +#include "dict.h" #include "emalloc.h" #include "freelist.h" +#include "helpers.h" +#include "serialis.h" #include "trie.h" #include "unicharset.h" static const int kMaxNumEdges = 10000000; int main(int argc, char** argv) { - if (!(argc == 4 || (argc == 5 && strcmp(argv[1], "-t") == 0))) { - printf("Usage: %s [-t] word_list_file dawg_file unicharset_file\n", argv[0]); + int min_word_length; + int max_word_length; + if (!(argc == 4 || (argc == 5 && strcmp(argv[1], "-t") == 0) || + (argc == 7 && strcmp(argv[1], "-l") == 0 && + sscanf(argv[2], "%d", &min_word_length) == 1 && + sscanf(argv[3], "%d", &max_word_length) == 1))) { + printf("Usage: %s [-t | -l min_len max_len] word_list_file" + " dawg_file unicharset_file", argv[0]); return 1; } - tesseract::Classify classify; + tesseract::Classify *classify = new tesseract::Classify(); int argv_index = 0; if (argc == 5) ++argv_index; + if (argc == 7) argv_index += 3; const char* wordlist_filename = argv[++argv_index]; const char* dawg_filename = argv[++argv_index]; const char* unicharset_file = argv[++argv_index]; - if (!classify.getDict().getUnicharset().load_from_file(unicharset_file)) { + if (!classify->getDict().getUnicharset().load_from_file(unicharset_file)) { tprintf("Failed to load unicharset from '%s'\n", unicharset_file); + delete classify; return 1; } - const UNICHARSET &unicharset = classify.getDict().getUnicharset(); + const UNICHARSET &unicharset = classify->getDict().getUnicharset(); if (argc == 4) { tesseract::Trie trie( // the first 3 arguments are not used in this case tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, - kMaxNumEdges, unicharset.size()); + kMaxNumEdges, unicharset.size(), + classify->getDict().dawg_debug_level); printf("Reading word list from '%s'\n", wordlist_filename); if (!trie.read_word_list(wordlist_filename, unicharset)) { printf("Failed to read word list from '%s'\n", wordlist_filename); @@ -62,14 +74,74 @@ int main(int argc, char** argv) { printf("Writing squished DAWG to '%s'\n", dawg_filename); dawg->write_squished_dawg(dawg_filename); delete dawg; - } else { + } else if (argc == 5) { printf("Loading dawg DAWG from '%s'\n", dawg_filename); tesseract::SquishedDawg words( dawg_filename, // these 3 arguments are not used in this case - tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM); + tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, + classify->getDict().dawg_debug_level); printf("Checking word list from '%s'\n", wordlist_filename); words.check_for_words(wordlist_filename, unicharset, true); + } else if (argc == 7) { + // Place words of different lengths in separate Dawgs. + char str[CHARS_PER_LINE]; + FILE *word_file = fopen(wordlist_filename, "r"); + if (word_file == NULL) { + printf("Failed to open wordlist file %s\n", wordlist_filename); + exit(1); + } + FILE *dawg_file = fopen(dawg_filename, "wb"); + if (dawg_file == NULL) { + printf("Failed to open dawg output file %s\n", dawg_filename); + exit(1); + } + printf("Reading word list from '%s'\n", wordlist_filename); + GenericVector trie_vec; + int i; + for (i = min_word_length; i <= max_word_length; ++i) { + trie_vec.push_back(new tesseract::Trie( + // the first 3 arguments are not used in this case + tesseract::DAWG_TYPE_WORD, "", SYSTEM_DAWG_PERM, + kMaxNumEdges, unicharset.size(), + classify->getDict().dawg_debug_level)); + } + while (fgets(str, CHARS_PER_LINE, word_file) != NULL) { + chomp_string(str); // remove newline + WERD_CHOICE word(str, unicharset); + if (word.length() >= min_word_length && + word.length() <= max_word_length && + !word.contains_unichar_id(INVALID_UNICHAR_ID)) { + tesseract::Trie *curr_trie = trie_vec[word.length()-min_word_length]; + if (!curr_trie->word_in_dawg(word)) { + curr_trie->add_word_to_dawg(word); + if (classify->getDict().dawg_debug_level > 1) { + tprintf("Added word %s of length %d\n", str, word.length()); + } + if (!curr_trie->word_in_dawg(word)) { + tprintf("Error: word '%s' not in DAWG after adding it\n", str); + exit(1); + } + } + } + } + fclose(word_file); + printf("Writing fixed length dawgs to '%s'\n", dawg_filename); + GenericVector dawg_vec; + for (i = 0; i <= max_word_length; ++i) { + dawg_vec.push_back(i < min_word_length ? NULL : + trie_vec[i-min_word_length]->trie_to_dawg()); + } + tesseract::Dict::WriteFixedLengthDawgs( + dawg_vec, max_word_length - min_word_length + 1, + classify->getDict().dawg_debug_level, dawg_file); + fclose(dawg_file); + dawg_vec.delete_data_pointers(); + trie_vec.delete_data_pointers(); + } else { // should never get here + printf("Invalid command-line options\n"); + exit(1); } + delete classify; return 0; } diff --git a/viewer/Makefile.am b/viewer/Makefile.am index 58d672a640..ae6c560946 100644 --- a/viewer/Makefile.am +++ b/viewer/Makefile.am @@ -1,8 +1,6 @@ SUBDIRS = AM_CPPFLAGS = -EXTRA_DIST = viewer.vcproj - include_HEADERS = \ scrollview.h svmnode.h svutil.h svpaint.cpp diff --git a/viewer/Makefile.in b/viewer/Makefile.in index efa6474247..af43904083 100644 --- a/viewer/Makefile.in +++ b/viewer/Makefile.in @@ -244,7 +244,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -262,7 +261,6 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = AM_CPPFLAGS = -EXTRA_DIST = viewer.vcproj include_HEADERS = \ scrollview.h svmnode.h svutil.h svpaint.cpp diff --git a/viewer/scrollview.cpp b/viewer/scrollview.cpp index 9e66317087..b7545135f4 100644 --- a/viewer/scrollview.cpp +++ b/viewer/scrollview.cpp @@ -124,7 +124,18 @@ void* ScrollView::MessageReceiver(void* a) { cur->parameter[strlen(p)] = '\0'; } cur->type = static_cast(ev_type); - cur->y = cur->window->TranslateYCoordinate(cur->y); + // Correct selection coordinates so x,y is the min pt and size is +ve. + if (cur->x_size > 0) + cur->x -= cur->x_size; + else + cur->x_size = -cur->x_size; + if (cur->y_size > 0) + cur->y -= cur->y_size; + else + cur->y_size = -cur->y_size; + // Returned y will be the bottom-left if y is reversed. + if (cur->window->y_axis_is_reversed_) + cur->y = cur->window->TranslateYCoordinate(cur->y + cur->y_size); cur->counter = counter_event_id; // Increase by 2 since we will also create an SVET_ANY event from cur, // which will have a counter_id of cur + 1 (and thus gets processed diff --git a/vs2008/StdAfx.cpp b/vs2008/StdAfx.cpp deleted file mode 100644 index 01d3ad9820..0000000000 --- a/vs2008/StdAfx.cpp +++ /dev/null @@ -1,8 +0,0 @@ -// stdafx.cpp : source file that includes just the standard includes -// tessdll.pch will be the pre-compiled header -// stdafx.obj will contain the pre-compiled type information - -#include "stdafx.h" - -// TODO: reference any additional headers you need in STDAFX.H -// and not in this file diff --git a/vs2008/StdAfx.h b/vs2008/StdAfx.h deleted file mode 100644 index ffcb5f28cb..0000000000 --- a/vs2008/StdAfx.h +++ /dev/null @@ -1,24 +0,0 @@ -// stdafx.h : include file for standard system include files, -// or project specific include files that are used frequently, but -// are changed infrequently -// - -#if !defined(AFX_STDAFX_H__C038B272_58EA_4509_8B59_C963F64AF1D8__INCLUDED_) -#define AFX_STDAFX_H__C038B272_58EA_4509_8B59_C963F64AF1D8__INCLUDED_ - -#if _MSC_VER > 1000 -#pragma once -#endif // _MSC_VER > 1000 - - -// Insert your headers here -#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers - -#include - -// TODO: reference additional headers your program requires here - -//{{AFX_INSERT_LOCATION}} -// Microsoft Visual C++ will insert additional declarations immediately before the previous line. - -#endif // !defined(AFX_STDAFX_H__C038B272_58EA_4509_8B59_C963F64AF1D8__INCLUDED_) diff --git a/vs2008/ccmain.vcproj b/vs2008/ccmain.vcproj index 63daae562a..c1a6b3cefe 100644 --- a/vs2008/ccmain.vcproj +++ b/vs2008/ccmain.vcproj @@ -41,12 +41,12 @@ @@ -106,11 +106,11 @@ AdditionalOptions="/wd4244 /wd4018 /wd4305 /wd4800 /wd4996" Optimization="2" EnableIntrinsicFunctions="true" - AdditionalIncludeDirectories="..\ccutil;..\cutil;..\viewer;..\image;..\ccstruct;..\dict;..\classify;..\wordrec;..\textord;..\vs2008\include;..\vs2008\include\leptonica" - PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__MSW32__;_CRT_SECURE_NO_WARNINGS;HAVE_LIBLEPT;LEPTONLIB_IMPORTS" + AdditionalIncludeDirectories="..\ccutil;..\cutil;..\viewer;..\image;..\ccstruct;..\dict;..\classify;..\wordrec;..\textord;..\vs2008\include;..\vs2008\include\leptonica;..\neural_networks\runtime;..\cube;..\vs2008\port" + PreprocessorDefinitions="USE_STD_NAMESPACE;WIN32;NDEBUG;_LIB;__MSW32__;_CRT_SECURE_NO_WARNINGS;HAVE_LIBLEPT;" RuntimeLibrary="2" EnableFunctionLevelLinking="true" - UsePrecompiledHeader="2" + UsePrecompiledHeader="0" WarningLevel="3" DebugInformationFormat="3" /> @@ -170,8 +170,8 @@ AdditionalOptions="/wd4244 /wd4018 /wd4305 /wd4800 /wd4996" Optimization="2" EnableIntrinsicFunctions="true" - AdditionalIncludeDirectories="..\ccutil;..\cutil;..\viewer;..\image;..\ccstruct;..\dict;..\classify;..\wordrec;..\textord;..\vs2008\include;..\vs2008\include\leptonica" - PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__MSW32__;_CRT_SECURE_NO_WARNINGS;HAVE_LIBLEPT;LEPTONLIB_IMPORTS" + AdditionalIncludeDirectories="..\ccutil;..\cutil;..\viewer;..\image;..\ccstruct;..\dict;..\classify;..\wordrec;..\textord;..\vs2008\include;..\vs2008\include\leptonica;..\cube;..\neural_networks\runtime;..\vs2008\port" + PreprocessorDefinitions="USE_STD_NAMESPACE;WIN32;NDEBUG;_LIB;__MSW32__;_CRT_SECURE_NO_WARNINGS;HAVE_LIBLEPT;LEPTONLIB_IMPORTS" RuntimeLibrary="2" EnableFunctionLevelLinking="true" UsePrecompiledHeader="0" @@ -243,34 +243,6 @@ /> - - - - - - - - - - - @@ -299,62 +271,6 @@ /> - - - - - - - - - - - - - - - - - - - - - - @@ -384,7 +300,7 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -624,7 +439,6 @@ > @@ -633,7 +447,6 @@ > @@ -694,6 +507,10 @@ /> + + @@ -723,32 +540,8 @@ - - - - - - - - - + + @@ -893,6 +690,10 @@ /> + + @@ -1033,37 +834,6 @@ /> - - - - - - - - - - - @@ -1098,40 +868,20 @@ Filter="h;hpp;hxx;hm;inl;inc;xsd" UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}" > - - - - - - - - - - - - @@ -1159,11 +905,7 @@ > - - - - diff --git a/vs2008/ccstruct.vcproj b/vs2008/ccstruct.vcproj index 110c02b96b..c9f0775035 100644 --- a/vs2008/ccstruct.vcproj +++ b/vs2008/ccstruct.vcproj @@ -41,7 +41,7 @@ + + @@ -379,38 +383,11 @@ > - - - - - - - - - - - - - - - - - - + + @@ -1095,6 +1049,14 @@ /> + + + + @@ -1241,6 +1203,10 @@ RelativePath="..\ccstruct\blread.h" > + + @@ -1257,6 +1223,10 @@ RelativePath="..\ccstruct\detlinefit.h" > + + @@ -1273,16 +1243,12 @@ RelativePath="..\ccstruct\ipoints.h" > - - + + @@ -1361,6 +1331,14 @@ RelativePath="..\ccstruct\rejctmap.h" > + + + + diff --git a/vs2008/ccutil.vcproj b/vs2008/ccutil.vcproj index 26ad2ec99e..4c429af378 100644 --- a/vs2008/ccutil.vcproj +++ b/vs2008/ccutil.vcproj @@ -41,7 +41,7 @@ - - - - - - - - - - - - - - - - - - - - - - @@ -832,37 +770,6 @@ RelativePath="..\ccutil\unicharset.cpp" > - - - - - - - - - - - - - @@ -921,6 +824,10 @@ RelativePath="..\ccutil\genericvector.h" > + + @@ -985,6 +892,10 @@ RelativePath="..\ccutil\ocrshell.h" > + + @@ -993,6 +904,10 @@ RelativePath="..\ccutil\qrsequence.h" > + + @@ -1045,10 +960,6 @@ RelativePath="..\ccutil\unicity_table.h" > - - - - @@ -275,10 +271,6 @@ RelativePath="..\classify\fxdefs.cpp" > - - @@ -353,10 +345,6 @@ RelativePath="..\classify\adaptive.h" > - - @@ -413,14 +401,6 @@ RelativePath="..\classify\fxdefs.h" > - - - - diff --git a/vs2008/cntraining.vcproj b/vs2008/cntraining.vcproj index 1b82ee00d1..7a3b9ca669 100644 --- a/vs2008/cntraining.vcproj +++ b/vs2008/cntraining.vcproj @@ -336,37 +336,6 @@ RelativePath="..\training\commontraining.cpp" > - - - - - - - - - - - diff --git a/vs2008/cube.vcproj b/vs2008/cube.vcproj new file mode 100644 index 0000000000..392e3945bf --- /dev/null +++ b/vs2008/cube.vcproj @@ -0,0 +1,518 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vs2008/cutil.vcproj b/vs2008/cutil.vcproj index cf585f9fdc..e872a7a14a 100644 --- a/vs2008/cutil.vcproj +++ b/vs2008/cutil.vcproj @@ -243,10 +243,6 @@ RelativePath="..\cutil\freelist.cpp" > - - @@ -309,14 +305,6 @@ RelativePath="..\cutil\freelist.h" > - - - - diff --git a/vs2008/dict.vcproj b/vs2008/dict.vcproj index 18ca00bd2a..e5714e514c 100644 --- a/vs2008/dict.vcproj +++ b/vs2008/dict.vcproj @@ -215,18 +215,10 @@ Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx" UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" > - - - - @@ -269,22 +261,6 @@ Filter="h;hpp;hxx;hm;inl;inc;xsd" UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}" > - - - - - - - - @@ -297,10 +273,6 @@ RelativePath="..\dict\matchdefs.h" > - - diff --git a/vs2008/dlltest/Makefile.am b/vs2008/dlltest/Makefile.am deleted file mode 100644 index 504e6aebbe..0000000000 --- a/vs2008/dlltest/Makefile.am +++ /dev/null @@ -1,2 +0,0 @@ -include_HEADERS = dlltest.cpp dlltest.dsp dlltest.vcproj - diff --git a/vs2008/dlltest/Makefile.in b/vs2008/dlltest/Makefile.in deleted file mode 100644 index 36290c0bdf..0000000000 --- a/vs2008/dlltest/Makefile.in +++ /dev/null @@ -1,463 +0,0 @@ -# Makefile.in generated by automake 1.11.1 from Makefile.am. -# @configure_input@ - -# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, -# Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -@SET_MAKE@ - -VPATH = @srcdir@ -pkgdatadir = $(datadir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkglibexecdir = $(libexecdir)/@PACKAGE@ -am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd -install_sh_DATA = $(install_sh) -c -m 644 -install_sh_PROGRAM = $(install_sh) -c -install_sh_SCRIPT = $(install_sh) -c -INSTALL_HEADER = $(INSTALL_DATA) -transform = $(program_transform_name) -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -build_triplet = @build@ -host_triplet = @host@ -subdir = vs2008/dlltest -DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ - $(srcdir)/Makefile.in -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ - $(top_srcdir)/configure.ac -am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ - $(ACLOCAL_M4) -mkinstalldirs = $(SHELL) $(top_srcdir)/config/mkinstalldirs -CONFIG_HEADER = $(top_builddir)/config_auto.h -CONFIG_CLEAN_FILES = -CONFIG_CLEAN_VPATH_FILES = -SOURCES = -DIST_SOURCES = -am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; -am__vpath_adj = case $$p in \ - $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ - *) f=$$p;; \ - esac; -am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; -am__install_max = 40 -am__nobase_strip_setup = \ - srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` -am__nobase_strip = \ - for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" -am__nobase_list = $(am__nobase_strip_setup); \ - for p in $$list; do echo "$$p $$p"; done | \ - sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ - $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ - if (++n[$$2] == $(am__install_max)) \ - { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ - END { for (dir in files) print dir, files[dir] }' -am__base_list = \ - sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ - sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' -am__installdirs = "$(DESTDIR)$(includedir)" -HEADERS = $(include_HEADERS) -ETAGS = etags -CTAGS = ctags -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -ACLOCAL = @ACLOCAL@ -AMTAR = @AMTAR@ -AR = @AR@ -AUTOCONF = @AUTOCONF@ -AUTOHEADER = @AUTOHEADER@ -AUTOMAKE = @AUTOMAKE@ -AWK = @AWK@ -CC = @CC@ -CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ -CPP = @CPP@ -CPPFLAGS = @CPPFLAGS@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -CXXDEPMODE = @CXXDEPMODE@ -CXXFLAGS = @CXXFLAGS@ -CYGPATH_W = @CYGPATH_W@ -DEFS = @DEFS@ -DEPDIR = @DEPDIR@ -DSYMUTIL = @DSYMUTIL@ -DUMPBIN = @DUMPBIN@ -ECHO_C = @ECHO_C@ -ECHO_N = @ECHO_N@ -ECHO_T = @ECHO_T@ -EGREP = @EGREP@ -EXEEXT = @EXEEXT@ -FGREP = @FGREP@ -GENERIC_API_VERSION = @GENERIC_API_VERSION@ -GENERIC_LIBRARY_NAME = @GENERIC_LIBRARY_NAME@ -GENERIC_LIBRARY_VERSION = @GENERIC_LIBRARY_VERSION@ -GENERIC_MAJOR_VERSION = @GENERIC_MAJOR_VERSION@ -GENERIC_RELEASE = @GENERIC_RELEASE@ -GENERIC_VERSION = @GENERIC_VERSION@ -GREP = @GREP@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -LD = @LD@ -LDFLAGS = @LDFLAGS@ -LIBOBJS = @LIBOBJS@ -LIBS = @LIBS@ -LIBTIFF_CFLAGS = @LIBTIFF_CFLAGS@ -LIBTIFF_LIBS = @LIBTIFF_LIBS@ -LIBTOOL = @LIBTOOL@ -LIPO = @LIPO@ -LN_S = @LN_S@ -LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ -MAKEINFO = @MAKEINFO@ -MKDIR_P = @MKDIR_P@ -NM = @NM@ -NMEDIT = @NMEDIT@ -OBJDUMP = @OBJDUMP@ -OBJEXT = @OBJEXT@ -OTOOL = @OTOOL@ -OTOOL64 = @OTOOL64@ -PACKAGE = @PACKAGE@ -PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ -PACKAGE_DATE = @PACKAGE_DATE@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_STRING = @PACKAGE_STRING@ -PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_URL = @PACKAGE_URL@ -PACKAGE_VERSION = @PACKAGE_VERSION@ -PACKAGE_YEAR = @PACKAGE_YEAR@ -PATH_SEPARATOR = @PATH_SEPARATOR@ -RANLIB = @RANLIB@ -SED = @SED@ -SET_MAKE = @SET_MAKE@ -SHELL = @SHELL@ -STRIP = @STRIP@ -VERSION = @VERSION@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_ct_CC = @ac_ct_CC@ -ac_ct_CXX = @ac_ct_CXX@ -ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -am__include = @am__include@ -am__leading_dot = @am__leading_dot@ -am__quote = @am__quote@ -am__tar = @am__tar@ -am__untar = @am__untar@ -bindir = @bindir@ -build = @build@ -build_alias = @build_alias@ -build_cpu = @build_cpu@ -build_os = @build_os@ -build_vendor = @build_vendor@ -builddir = @builddir@ -datadir = @datadir@ -datarootdir = @datarootdir@ -docdir = @docdir@ -dvidir = @dvidir@ -exec_prefix = @exec_prefix@ -host = @host@ -host_alias = @host_alias@ -host_cpu = @host_cpu@ -host_os = @host_os@ -host_vendor = @host_vendor@ -htmldir = @htmldir@ -includedir = @includedir@ -infodir = @infodir@ -install_sh = @install_sh@ -libdir = @libdir@ -libexecdir = @libexecdir@ -localedir = @localedir@ -localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ -mandir = @mandir@ -mkdir_p = @mkdir_p@ -oldincludedir = @oldincludedir@ -pdfdir = @pdfdir@ -prefix = @prefix@ -program_transform_name = @program_transform_name@ -psdir = @psdir@ -sbindir = @sbindir@ -sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ -sysconfdir = @sysconfdir@ -target_alias = @target_alias@ -top_build_prefix = @top_build_prefix@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ -include_HEADERS = dlltest.cpp dlltest.dsp dlltest.vcproj -all: all-am - -.SUFFIXES: -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ - ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ - && { if test -f $@; then exit 0; else break; fi; }; \ - exit 1;; \ - esac; \ - done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu vs2008/dlltest/Makefile'; \ - $(am__cd) $(top_srcdir) && \ - $(AUTOMAKE) --gnu vs2008/dlltest/Makefile -.PRECIOUS: Makefile -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - @case '$?' in \ - *config.status*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ - *) \ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ - esac; - -$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(am__aclocal_m4_deps): - -mostlyclean-libtool: - -rm -f *.lo - -clean-libtool: - -rm -rf .libs _libs -install-includeHEADERS: $(include_HEADERS) - @$(NORMAL_INSTALL) - test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" - @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ - for p in $$list; do \ - if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ - echo "$$d$$p"; \ - done | $(am__base_list) | \ - while read files; do \ - echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ - $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ - done - -uninstall-includeHEADERS: - @$(NORMAL_UNINSTALL) - @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ - files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ - test -n "$$files" || exit 0; \ - echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \ - cd "$(DESTDIR)$(includedir)" && rm -f $$files - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - mkid -fID $$unique -tags: TAGS - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - set x; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - shift; \ - if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ - test -n "$$unique" || unique=$$empty_fix; \ - if test $$# -gt 0; then \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - "$$@" $$unique; \ - else \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - $$unique; \ - fi; \ - fi -ctags: CTAGS -CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - test -z "$(CTAGS_ARGS)$$unique" \ - || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ - $$unique - -GTAGS: - here=`$(am__cd) $(top_builddir) && pwd` \ - && $(am__cd) $(top_srcdir) \ - && gtags -i $(GTAGS_ARGS) "$$here" - -distclean-tags: - -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags - -distdir: $(DISTFILES) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ - if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ - if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d "$(distdir)/$$file"; then \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ - fi; \ - if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ - fi; \ - cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ - else \ - test -f "$(distdir)/$$file" \ - || cp -p $$d/$$file "$(distdir)/$$file" \ - || exit 1; \ - fi; \ - done -check-am: all-am -check: check-am -all-am: Makefile $(HEADERS) -installdirs: - for dir in "$(DESTDIR)$(includedir)"; do \ - test -z "$$dir" || $(MKDIR_P) "$$dir"; \ - done -install: install-am -install-exec: install-exec-am -install-data: install-data-am -uninstall: uninstall-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am - -installcheck: installcheck-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - `test -z '$(STRIP)' || \ - echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) - -maintainer-clean-generic: - @echo "This command is intended for maintainers to use" - @echo "it deletes files that may require special tools to rebuild." -clean: clean-am - -clean-am: clean-generic clean-libtool mostlyclean-am - -distclean: distclean-am - -rm -f Makefile -distclean-am: clean-am distclean-generic distclean-tags - -dvi: dvi-am - -dvi-am: - -html: html-am - -html-am: - -info: info-am - -info-am: - -install-data-am: install-includeHEADERS - -install-dvi: install-dvi-am - -install-dvi-am: - -install-exec-am: - -install-html: install-html-am - -install-html-am: - -install-info: install-info-am - -install-info-am: - -install-man: - -install-pdf: install-pdf-am - -install-pdf-am: - -install-ps: install-ps-am - -install-ps-am: - -installcheck-am: - -maintainer-clean: maintainer-clean-am - -rm -f Makefile -maintainer-clean-am: distclean-am maintainer-clean-generic - -mostlyclean: mostlyclean-am - -mostlyclean-am: mostlyclean-generic mostlyclean-libtool - -pdf: pdf-am - -pdf-am: - -ps: ps-am - -ps-am: - -uninstall-am: uninstall-includeHEADERS - -.MAKE: install-am install-strip - -.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ - clean-libtool ctags distclean distclean-generic \ - distclean-libtool distclean-tags distdir dvi dvi-am html \ - html-am info info-am install install-am install-data \ - install-data-am install-dvi install-dvi-am install-exec \ - install-exec-am install-html install-html-am \ - install-includeHEADERS install-info install-info-am \ - install-man install-pdf install-pdf-am install-ps \ - install-ps-am install-strip installcheck installcheck-am \ - installdirs maintainer-clean maintainer-clean-generic \ - mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ - ps ps-am tags uninstall uninstall-am uninstall-includeHEADERS - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/vs2008/dlltest/dlltest.cpp b/vs2008/dlltest/dlltest.cpp deleted file mode 100644 index d6c4178627..0000000000 --- a/vs2008/dlltest/dlltest.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/********************************************************************** - * File: dlltest.cpp - * Description: Main program to test the tessdll interface. - * Author: Ray Smith - * Created: Wed May 16 15:17:46 PDT 2007 - * - * (C) Copyright 2007, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ -#define _UNICODE - -#include "stdafx.h" -#include "imgs.h" -#include "unichar.h" -#include "tessdll.h" - -/********************************************************************** - * main() - * - **********************************************************************/ - - - - -static wchar_t *make_unicode_string(const char *utf8) -{ - int size = 0, out_index = 0; - wchar_t *out; - - /* first calculate the size of the target string */ - int used = 0; - int utf8_len = strlen(utf8); - while (used < utf8_len) { - int step = UNICHAR::utf8_step(utf8 + used); - if (step == 0) - break; - used += step; - ++size; - } - - out = (wchar_t *) malloc((size + 1) * sizeof(wchar_t)); - if (out == NULL) - return NULL; - - /* now convert to Unicode */ - used = 0; - while (used < utf8_len) { - int step = UNICHAR::utf8_step(utf8 + used); - if (step == 0) - break; - UNICHAR ch(utf8 + used, step); - out[out_index++] = ch.first_uni(); - used += step; - } - out[out_index] = 0; - - return out; -} - - -int main(int argc, char **argv) { - if (argc < 3 || argc > 4) { - fprintf(stderr, "Usage:%s imagename outputname [lang]\n", argv[0]); - exit(1); - } - - - IMAGE image; - if (image.read_header(argv[1]) < 0) { - fprintf(stderr, "Can't open %s\n", argv[1]); - exit(1); - } - if (image.read(image.get_ysize ()) < 0) { - fprintf(stderr, "Can't read %s\n", argv[1]); - exit(1); - } - - - - TessDllAPI api(argc > 3 ? argv[3] : "eng"); - - - - api.BeginPageUpright(image.get_xsize(), image.get_ysize(), image.get_buffer(), - image.get_bpp()); - - ETEXT_DESC* output = api.Recognize_all_Words(); - - - - - FILE* fp = fopen(argv[2],"w"); - if (fp == NULL) { - fprintf(stderr, "Can't create %s\n", argv[2]); - exit(1); - } - - // It should be noted that the format for char_code for version 2.0 and beyond is UTF8 - // which means that ASCII characters will come out as one structure but other characters - // will be returned in two or more instances of this structure with a single byte of the - // UTF8 code in each, but each will have the same bounding box. - // Programs which want to handle languagues with different characters sets will need to - // handle extended characters appropriately, but *all* code needs to be prepared to - // receive UTF8 coded characters for characters such as bullet and fancy quotes. - int j; - for (int i = 0; i < output->count; i = j) { - const EANYCODE_CHAR* ch = &output->text[i]; - unsigned char unistr[UNICHAR_LEN]; - - for (int b = 0; b < ch->blanks; ++b) - fprintf(fp, "\n"); - - for (j = i; j < output->count; j++) - { - const EANYCODE_CHAR* unich = &output->text[j]; - - if (ch->left != unich->left || ch->right != unich->right || - ch->top != unich->top || ch->bottom != unich->bottom) - break; - unistr[j - i] = static_cast(unich->char_code); - } - unistr[j - i] = '\0'; - - wchar_t *utf16ch=make_unicode_string(reinterpret_cast(unistr)); -#ifndef _UNICODE - // If we aren't in _UNICODE mode, print string only if ascii. - if (ch->char_code <= 0x7f) { - fprintf(fp, "%s", unistr); -#else - // %S is a microsoft-special. Attempts to translate the Unicode - // back to the current locale to print in 8 bit - fprintf(fp, "%S", utf16ch); -#endif - // Print the hex codes of the utf8 code. - for (int x = 0; unistr[x] != '\0'; ++x) - fprintf(fp, "[%x]", unistr[x]); - fprintf(fp, "->"); - // Print the hex codes of the unicode. - for (int y = 0; utf16ch[y] != 0; ++y) - fprintf(fp, "[%x]", utf16ch[y]); - // Print the coords. - fprintf(fp, "(%d,%d)->(%d,%d)\n", - ch->left, ch->bottom, ch->right, ch->top); - if (ch->formatting & 64) - fprintf(fp, "\n\n"); - if (ch->formatting & 128) - fprintf(fp, "\n\n"); - free(utf16ch); - } - - fclose(fp); - - return 0; -} diff --git a/vs2008/dlltest/dlltest.dsp b/vs2008/dlltest/dlltest.dsp deleted file mode 100644 index 5e80e94523..0000000000 --- a/vs2008/dlltest/dlltest.dsp +++ /dev/null @@ -1,186 +0,0 @@ -# Microsoft Developer Studio Project File - Name="dlltest" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 6.00 -# ** DO NOT EDIT ** - -# TARGTYPE "Win32 (x86) Console Application" 0x0103 - -CFG=dlltest - Win32 Debug -!MESSAGE This is not a valid makefile. To build this project using NMAKE, -!MESSAGE use the Export Makefile command and run -!MESSAGE -!MESSAGE NMAKE /f "dlltest.mak". -!MESSAGE -!MESSAGE You can specify a configuration when running NMAKE -!MESSAGE by defining the macro CFG on the command line. For example: -!MESSAGE -!MESSAGE NMAKE /f "dlltest.mak" CFG="dlltest - Win32 Debug" -!MESSAGE -!MESSAGE Possible choices for configuration are: -!MESSAGE -!MESSAGE "dlltest - Win32 Release" (based on "Win32 (x86) Console Application") -!MESSAGE "dlltest - Win32 Debug" (based on "Win32 (x86) Console Application") -!MESSAGE - -# Begin Project -# PROP AllowPerConfigDependencies 0 -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -RSC=rc.exe - -!IF "$(CFG)" == "dlltest - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "Release" -# PROP BASE Intermediate_Dir "Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 2 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir ".." -# PROP Intermediate_Dir "Release" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD CPP /nologo /MD /W3 /GX /O2 /I ".." /I "../image" /I "../ccutil" /I "../ccmain" /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D "__MSW32__" /D "_AFXDLL" /YX /FD /c -# ADD BASE RSC /l 0x409 /d "NDEBUG" -# ADD RSC /l 0x409 /d "NDEBUG" /d "_AFXDLL" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 -# ADD LINK32 ../tessdll.lib /nologo /subsystem:console /machine:I386 - -!ELSEIF "$(CFG)" == "dlltest - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "Debug" -# PROP BASE Intermediate_Dir "Debug" -# PROP BASE Target_Dir "" -# PROP Use_MFC 2 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "../bin.dbg" -# PROP Intermediate_Dir "Debug" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c -# ADD CPP /nologo /MDd /W3 /Gm /GX /ZI /Od /I ".." /I "../image" /I "../ccutil" /I "../ccmain" /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D "__MSW32__" /D "_AFXDLL" /YX /FD /GZ /c -# ADD BASE RSC /l 0x409 /d "_DEBUG" -# ADD RSC /l 0x409 /d "_DEBUG" /d "_AFXDLL" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept -# ADD LINK32 ../bin.dbg/tessdll.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept - -!ENDIF - -# Begin Target - -# Name "dlltest - Win32 Release" -# Name "dlltest - Win32 Debug" -# Begin Group "Source Files" - -# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" -# Begin Group "image" - -# PROP Default_Filter "" -# Begin Source File - -SOURCE=..\image\bitstrm.cpp -# End Source File -# Begin Source File - -SOURCE=..\image\imgbmp.cpp -# End Source File -# Begin Source File - -SOURCE=..\image\imgio.cpp -# End Source File -# Begin Source File - -SOURCE=..\image\imgs.cpp -# End Source File -# Begin Source File - -SOURCE=..\image\imgtiff.cpp -# End Source File -# End Group -# Begin Group "ccutil" - -# PROP Default_Filter "" -# Begin Source File - -SOURCE=..\ccutil\clst.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\debugwin.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\errcode.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\globaloc.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\hashfn.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\memblk.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\memry.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\serialis.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\strngs.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\tprintf.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\unichar.cpp -# End Source File -# Begin Source File - -SOURCE=..\ccutil\varable.cpp -# End Source File -# End Group -# Begin Source File - -SOURCE=.\dlltest.cpp -# End Source File -# Begin Source File - -SOURCE=..\StdAfx.cpp -# End Source File -# End Group -# Begin Group "Header Files" - -# PROP Default_Filter "h;hpp;hxx;hm;inl" -# Begin Source File - -SOURCE=.\dlltest.h -# End Source File -# End Group -# Begin Group "Resource Files" - -# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" -# End Group -# End Target -# End Project diff --git a/vs2008/dlltest/dlltest.vcproj b/vs2008/dlltest/dlltest.vcproj deleted file mode 100644 index 34d50ffce5..0000000000 --- a/vs2008/dlltest/dlltest.vcproj +++ /dev/null @@ -1,639 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/vs2008/image.vcproj b/vs2008/image.vcproj index d887d793df..d252df2d4c 100644 --- a/vs2008/image.vcproj +++ b/vs2008/image.vcproj @@ -46,7 +46,7 @@ MinimalRebuild="true" BasicRuntimeChecks="3" RuntimeLibrary="3" - UsePrecompiledHeader="2" + UsePrecompiledHeader="0" WarningLevel="3" DebugInformationFormat="4" /> @@ -110,7 +110,7 @@ PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__MSW32__;_CRT_SECURE_NO_WARNINGS;HAVE_LIBLEPT;LEPTONLIB_IMPORTS" RuntimeLibrary="2" EnableFunctionLevelLinking="true" - UsePrecompiledHeader="2" + UsePrecompiledHeader="0" WarningLevel="3" DebugInformationFormat="3" /> @@ -383,37 +383,6 @@ /> - - - - - - - - - - - diff --git a/vs2008/include/Makefile.am b/vs2008/include/Makefile.am index 674a740e89..8983a57bb7 100644 --- a/vs2008/include/Makefile.am +++ b/vs2008/include/Makefile.am @@ -1,2 +1,2 @@ SUBDIRS = leptonica -EXTRA_DIST = stdint.h +EXTRA_DIST = stdint.h diff --git a/vs2008/include/Makefile.in b/vs2008/include/Makefile.in index f16c12ff15..59bb225f7c 100644 --- a/vs2008/include/Makefile.in +++ b/vs2008/include/Makefile.in @@ -198,7 +198,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -215,7 +214,7 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = leptonica -EXTRA_DIST = stdint.h +EXTRA_DIST = stdint.h all: all-recursive diff --git a/vs2008/include/leptonica/Makefile.in b/vs2008/include/leptonica/Makefile.in index 8386773dbb..93e1d1b3fe 100644 --- a/vs2008/include/leptonica/Makefile.in +++ b/vs2008/include/leptonica/Makefile.in @@ -158,7 +158,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ diff --git a/vs2008/libtesseract_tessopt.vcproj b/vs2008/libtesseract_tessopt.vcproj new file mode 100644 index 0000000000..a58aac0163 --- /dev/null +++ b/vs2008/libtesseract_tessopt.vcproj @@ -0,0 +1,242 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vs2008/libtesseract_training.vcproj b/vs2008/libtesseract_training.vcproj new file mode 100644 index 0000000000..69681dd55b --- /dev/null +++ b/vs2008/libtesseract_training.vcproj @@ -0,0 +1,242 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vs2008/mftraining.vcproj b/vs2008/mftraining.vcproj index 3473f591fc..9828168393 100644 --- a/vs2008/mftraining.vcproj +++ b/vs2008/mftraining.vcproj @@ -364,37 +364,6 @@ /> - - - - - - - - - - - diff --git a/vs2008/neural_networks.vcproj b/vs2008/neural_networks.vcproj new file mode 100644 index 0000000000..7a9a4beede --- /dev/null +++ b/vs2008/neural_networks.vcproj @@ -0,0 +1,263 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vs2008/tessdll.cpp b/vs2008/tessdll.cpp deleted file mode 100644 index 422e1e3470..0000000000 --- a/vs2008/tessdll.cpp +++ /dev/null @@ -1,299 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tessdll.cpp -// Description: Windows dll interface for Tesseract. -// Author: Glen Wernersbach -// Created: Tue May 15 10:30:01 PDT 2007 -// -// (C) Copyright 2007, Jetsoftdev. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// -// tessdll.cpp : Defines the entry point for the DLL application. -// - -#include "stdafx.h" - - -#include "mfcpch.h" -#include "applybox.h" -#include "control.h" -#include "tessvars.h" -#include "tessedit.h" -#include "pageres.h" -#include "imgs.h" -#include "varabled.h" -#include "tprintf.h" -#include "tesseractmain.h" -#include "stderr.h" -#include "notdll.h" - - - -#include "tessdll.h" - -#ifdef __MSW32__ -extern ESHM_INFO shm; /*info on shm */ -#define TICKS 1000 -#endif - -extern BOOL_VARIABLE tessedit_write_ratings; -extern BOOL_VARIABLE tessedit_write_output; -extern BOOL_VARIABLE tessedit_write_raw_output; -extern BOOL_VARIABLE tessedit_write_txt_map; -extern BOOL_VARIABLE tessedit_resegment_from_boxes; - -//unsigned char membuf[sizeof (ETEXT_DESC)+32000L*sizeof (EANYCODE_CHAR)]; - -BOOL APIENTRY DllMain( HANDLE hModule, - DWORD ul_reason_for_call, - LPVOID lpReserved - ) -{ - switch (ul_reason_for_call) - { - case DLL_PROCESS_ATTACH: - case DLL_THREAD_ATTACH: - case DLL_THREAD_DETACH: - case DLL_PROCESS_DETACH: - break; - } - return TRUE; -} - - -TessDllAPI::TessDllAPI(const char* lang) { - uinT16 oldlang; //language - - ocr_open_shm ("0", "0", "0", "0", "0", "0", &oldlang); - - Init(NULL, lang); - - - if (interactive_mode) { - debug_window_on.set_value (TRUE); - } - - tessedit_write_ratings.set_value (TRUE); - tessedit_write_output.set_value(FALSE); - tessedit_write_raw_output.set_value(FALSE); - tessedit_write_txt_map.set_value(FALSE); - - - membuf = (unsigned char *) new BYTE[(sizeof (ETEXT_DESC)+32000L*sizeof (EANYCODE_CHAR))]; -} - -TessDllAPI::~TessDllAPI() { - EndPage(); - - End(); - - if (membuf) delete []membuf; -} - -int TessDllAPI::BeginPage(uinT32 xsize,uinT32 ysize,unsigned char *buf) -{ - return BeginPage(xsize,ysize,buf,1); -} - -int TessDllAPI::BeginPage(uinT32 xsize,uinT32 ysize,unsigned char *buf,uinT8 bpp) { - SetImage(buf, xsize, ysize, bpp/8, (xsize*bpp + 7)/8); - return ProcessPagePass1(); -} -int TessDllAPI::BeginPageUpright(uinT32 xsize,uinT32 ysize,unsigned char *buf) -{ - - return BeginPageUpright(xsize,ysize,buf,1); -} - -int TessDllAPI::BeginPageUpright(uinT32 xsize,uinT32 ysize,unsigned char *buf, uinT8 bpp) { - SetPageSegMode(tesseract::PSM_SINGLE_BLOCK); - SetImage(buf, xsize, ysize, bpp/8, (xsize*bpp + 7)/8); - return ProcessPagePass1(); -} - -int TessDllAPI::ProcessPagePass1() { - if (page_res_ != NULL) - ClearResults(); - if (FindLines() != 0) - return -1; - - page_res_ = new PAGE_RES(block_list_); - - if (page_res_) - tesseract_->recog_all_words(page_res_, global_monitor,0L,1); - - return (page_res_!=0); -} - -void TessDllAPI::EndPage() { - ClearResults(); -} - - -ETEXT_DESC * TessDllAPI::Recognize_all_Words(void) { - return Recognize_a_Block(0,0,0,0); -} - -ETEXT_DESC * TessDllAPI::Recognize_a_Block(uinT32 left,uinT32 right, - uinT32 top,uinT32 bottom) { - TBOX target_word_box(ICOORD (left+400, top+400), ICOORD (right+400, bottom+400)); - int i; - - - shm.shm_size=sizeof (ETEXT_DESC)+32000L*sizeof (EANYCODE_CHAR); - - memset(membuf,0,shm.shm_size); - shm.shm_mem=membuf; - - - global_monitor = ocr_setup_monitor(); - - tesseract_->recog_all_words(page_res_, global_monitor, - (right==0 ? 0L : &target_word_box), 2); -/* Disabled for now - for (i=0;icount;i++) { - global_monitor->text[i].left-=400; - global_monitor->text[i].right-=400; - global_monitor->text[i].bottom-=400; - global_monitor->text[i].top-=400; - } -*/ - - global_monitor = 0L; - - return ((ETEXT_DESC *) membuf); -} - -TessDllAPI *recognize=0L; -char* current_lang = 0L; - -extern "C" -{ - -TESSDLL_API void __cdecl TessDllRelease() { - if (recognize) delete recognize; - recognize=0L; -} - -TESSDLL_API void * __cdecl TessDllInit(const char* lang) { - if (recognize) TessDllRelease(); - - recognize = new TessDllAPI(lang); - if (current_lang != 0L) - free(current_lang); - current_lang = lang ? strdup(lang) : 0L; - - return (void*) recognize; -} - -TESSDLL_API int __cdecl TessDllBeginPageBPP(uinT32 xsize,uinT32 ysize, - unsigned char *buf, uinT8 bpp) { - return TessDllBeginPageLangBPP(xsize, ysize, buf, NULL,bpp); -} - -TESSDLL_API int __cdecl TessDllBeginPageLangBPP(uinT32 xsize, uinT32 ysize, - unsigned char *buf, - const char* lang, uinT8 bpp) { - if (recognize==0L || (lang != 0L) != (current_lang != 0L) || - lang != 0L && strcmp(lang, current_lang)) - TessDllInit(lang); - - return recognize->BeginPage(xsize, ysize, buf,bpp); -} - -TESSDLL_API int __cdecl TessDllBeginPageUprightBPP(uinT32 xsize, uinT32 ysize, - unsigned char *buf, - const char* lang, uinT8 bpp) { - if (recognize==0L || (lang != 0L) != (current_lang != 0L) || - lang != 0L && strcmp(lang, current_lang)) - TessDllInit(lang); - - return recognize->BeginPageUpright(xsize, ysize, buf,bpp); -} - -TESSDLL_API int __cdecl TessDllBeginPage(uinT32 xsize,uinT32 ysize, - unsigned char *buf) { - return TessDllBeginPageLangBPP(xsize, ysize, buf, NULL,1); -} - -TESSDLL_API int __cdecl TessDllBeginPageLang(uinT32 xsize, uinT32 ysize, - unsigned char *buf, - const char* lang) { - if (recognize==0L || (lang != 0L) != (current_lang != 0L) || - lang != 0L && strcmp(lang, current_lang)) - TessDllInit(lang); - - return recognize->BeginPage(xsize, ysize, buf,1); -} - -TESSDLL_API int __cdecl TessDllBeginPageUpright(uinT32 xsize, uinT32 ysize, - unsigned char *buf, - const char* lang) { - if (recognize==0L || (lang != 0L) != (current_lang != 0L) || - lang != 0L && strcmp(lang, current_lang)) - TessDllInit(lang); - - return recognize->BeginPageUpright(xsize, ysize, buf); -} - -TESSDLL_API void __cdecl TessDllEndPage(void) { - recognize->EndPage(); -} - -TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_a_Block(uinT32 left, - uinT32 right, - uinT32 top, - uinT32 bottom) { - return recognize->Recognize_a_Block(left,right,top,bottom); -} - - -TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_all_Words(void) { - return recognize->Recognize_all_Words(); -} - - - -//deprecated funtions -TESSDLL_API void __cdecl ReleaseRecognize() -{ - - if (recognize) delete recognize;recognize=0L; - -} - - - - -TESSDLL_API void * __cdecl InitRecognize() -{ -if (recognize) ReleaseRecognize(); - -recognize = new TessDllAPI(); - -return (void*) recognize; -} - -TESSDLL_API int __cdecl CreateRecognize(uinT32 xsize,uinT32 ysize,unsigned char *buf) -{ -InitRecognize(); - -return recognize->BeginPage(xsize,ysize,buf); - -} - -TESSDLL_API ETEXT_DESC * __cdecl reconize_a_word(uinT32 left,uinT32 right,uinT32 top,uinT32 bottom) -{ -return recognize->Recognize_a_Block(left,right,top,bottom); -} - - -} diff --git a/vs2008/tessdll.h b/vs2008/tessdll.h deleted file mode 100644 index 80d104818d..0000000000 --- a/vs2008/tessdll.h +++ /dev/null @@ -1,140 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tessdll.h -// Description: Windows dll interface for Tesseract. -// Author: Glen Wernersbach -// Created: Tue May 15 10:30:01 PDT 2007 -// -// (C) Copyright 2007, Jetsoftdev. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// - - -#ifndef __cplusplus -typedef BOOL bool; -#endif /* __cplusplus */ - -#include "ocrclass.h" - - -#ifdef __cplusplus - -#include "baseapi.h" - - -//This is an exposed C++ -class TESSDLL_API TessDllAPI : public tesseract::TessBaseAPI -{ - public: - //lang is the code of the language for which the data will be loaded. - //(Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded. - TessDllAPI(const char* lang = NULL) ; - ~TessDllAPI (); - - //xsize should be the width of line in bytes times 8 - //ysize is the height - //pass through a buffer of bytes for a 1 bit per pixel bitmap - //BeginPage assumes the first memory address is the bottom of the image - //BeginPageUpright assumes the first memory address is the top of the image - int BeginPage(uinT32 xsize,uinT32 ysize,unsigned char *buf); - int BeginPageUpright(uinT32 xsize,uinT32 ysize,unsigned char *buf); - - // This could probably be combined with about in a one function bpp=1 - int BeginPage(uinT32 xsize,uinT32 ysize,unsigned char *buf,uinT8 bpp); - int BeginPageUpright(uinT32 xsize,uinT32 ysize,unsigned char *buf, uinT8 bpp); - void EndPage(); - - //This allows you to extract one word or section from the bitmap or - //the whole page - //To extract the whole page just enter zeros for left, right, top, bottom - //Note: getting one word at time is not yet optimized for speed. - //limit of 32000 character can be returned - //see ocrclass.h for a decription of the ETEXT_DESC file - ETEXT_DESC *Recognize_a_Block(uinT32 left,uinT32 right, - uinT32 top,uinT32 bottom); - ETEXT_DESC *Recognize_all_Words(void); - - private: - int ProcessPagePass1(); - - unsigned char *membuf; -}; - -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - -#ifndef TESSDLL_API -#ifdef TESSDLL_EXPORTS -#define TESSDLL_API __declspec(dllexport) -#elif defined(TESSDLL_IMPORTS) -#define TESSDLL_API __declspec(dllimport) -#else -#define TESSDLL_API -#endif -#endif - - -//The functions below provide a c wrapper to a global recognize class object - -//xsize should be the width of line in bytes times 8 -//ysize is the height -//pass through a buffer of bytes for a 1 bit per pixel bitmap -//BeginPage assumes the first memory address is the bottom of the image (MS DIB format) -//BeginPageUpright assumes the first memory address is the top of the image (TIFF format) -//lang is the code of the language for which the data will be loaded. -//(Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded. -TESSDLL_API int __cdecl TessDllBeginPage(uinT32 xsize,uinT32 ysize, - unsigned char *buf); - -TESSDLL_API int __cdecl TessDllBeginPageLang(uinT32 xsize,uinT32 ysize, - unsigned char *buf, - const char* lang); -TESSDLL_API int __cdecl TessDllBeginPageUpright(uinT32 xsize,uinT32 ysize, - unsigned char *buf, - const char* lang); -//Added in version 2.0 to allow users to specify bytes per pixel to do -//1 for binary biptmap -//8 for gray -//24 bit for color RGB -TESSDLL_API int __cdecl TessDllBeginPageBPP(uinT32 xsize,uinT32 ysize, - unsigned char *buf,uinT8 bpp); - -TESSDLL_API int __cdecl TessDllBeginPageLangBPP(uinT32 xsize,uinT32 ysize, - unsigned char *buf, - const char* lang,uinT8 bpp); -TESSDLL_API int __cdecl TessDllBeginPageUprightBPP(uinT32 xsize,uinT32 ysize, - unsigned char *buf, - const char* lang,uinT8 bpp); - -TESSDLL_API void __cdecl TessDllEndPage(void); - -//This allows you to extract one word or section from the bitmap or -//the whole page -//To extract the whole page just enter zeros for left, right, top, bottom -//Note: getting one word at time is not yet optimized for speed. -//limit of 32000 character can be returned -//see ocrclass.h for a decription of the ETEXT_DESC file -TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_a_Block(uinT32 left, - uinT32 right, - uinT32 top, - uinT32 bottom); -TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_all_Words(); - -//This will release any memory associated with the recognize class object -TESSDLL_API void __cdecl TessDllRelease(); - -#ifdef __cplusplus -} -#endif diff --git a/vs2008/tessdll.vcproj b/vs2008/tessdll.vcproj deleted file mode 100644 index 760cd80d0c..0000000000 --- a/vs2008/tessdll.vcproj +++ /dev/null @@ -1,292 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/vs2008/tesseract.sln b/vs2008/tesseract.sln index 99366d1708..80a4bf9787 100644 --- a/vs2008/tesseract.sln +++ b/vs2008/tesseract.sln @@ -3,41 +3,25 @@ Microsoft Visual Studio Solution File, Format Version 10.00 # Visual C++ Express 2008 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cntraining", "cntraining.vcproj", "{1AC769A4-E98E-48B9-B87B-90D7837031FA}" ProjectSection(ProjectDependencies) = postProject + {8784A544-7616-4E56-8485-277A2F387ECA} = {8784A544-7616-4E56-8485-277A2F387ECA} {0BDEFC56-679F-436E-A089-81FCAEBA50F3} = {0BDEFC56-679F-436E-A089-81FCAEBA50F3} {2C106C68-62EE-43D2-B372-3BDFF6125AD1} = {2C106C68-62EE-43D2-B372-3BDFF6125AD1} {CE11FF6E-60D3-4E61-AFE2-F8F501100238} = {CE11FF6E-60D3-4E61-AFE2-F8F501100238} {DF2FA86F-A663-4805-AED7-2F81D9EAC796} = {DF2FA86F-A663-4805-AED7-2F81D9EAC796} + {454A6BAE-5F8A-461A-B8C3-5AAA531006F3} = {454A6BAE-5F8A-461A-B8C3-5AAA531006F3} {C7C7FBB5-7082-428C-8F81-8FBA7A37AC85} = {C7C7FBB5-7082-428C-8F81-8FBA7A37AC85} {1D62F8BB-31D7-4BBF-A4DA-619A29675D06} = {1D62F8BB-31D7-4BBF-A4DA-619A29675D06} {D67B17CC-4AE5-4356-91F6-00E265942A56} = {D67B17CC-4AE5-4356-91F6-00E265942A56} EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "dlltest", "dlltest\dlltest.vcproj", "{F8B1406B-1643-4402-B07A-8ADD9A52C01E}" - ProjectSection(ProjectDependencies) = postProject - {DF2FA86F-A663-4805-AED7-2F81D9EAC796} = {DF2FA86F-A663-4805-AED7-2F81D9EAC796} - {CB522274-3353-41AE-A700-F365FC79DEDD} = {CB522274-3353-41AE-A700-F365FC79DEDD} - EndProjectSection -EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mftraining", "mftraining.vcproj", "{EC8A9E75-C66E-4320-9D91-97F53ED7021B}" ProjectSection(ProjectDependencies) = postProject + {8784A544-7616-4E56-8485-277A2F387ECA} = {8784A544-7616-4E56-8485-277A2F387ECA} {0BDEFC56-679F-436E-A089-81FCAEBA50F3} = {0BDEFC56-679F-436E-A089-81FCAEBA50F3} {2C106C68-62EE-43D2-B372-3BDFF6125AD1} = {2C106C68-62EE-43D2-B372-3BDFF6125AD1} {CE11FF6E-60D3-4E61-AFE2-F8F501100238} = {CE11FF6E-60D3-4E61-AFE2-F8F501100238} {DF2FA86F-A663-4805-AED7-2F81D9EAC796} = {DF2FA86F-A663-4805-AED7-2F81D9EAC796} - {C7C7FBB5-7082-428C-8F81-8FBA7A37AC85} = {C7C7FBB5-7082-428C-8F81-8FBA7A37AC85} - {1D62F8BB-31D7-4BBF-A4DA-619A29675D06} = {1D62F8BB-31D7-4BBF-A4DA-619A29675D06} - {D67B17CC-4AE5-4356-91F6-00E265942A56} = {D67B17CC-4AE5-4356-91F6-00E265942A56} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tessdll", "tessdll.vcproj", "{CB522274-3353-41AE-A700-F365FC79DEDD}" - ProjectSection(ProjectDependencies) = postProject - {EFE93017-B435-46EA-9199-1C83A869595A} = {EFE93017-B435-46EA-9199-1C83A869595A} - {C2342D39-FD84-4448-A050-7FF3B4A33F3D} = {C2342D39-FD84-4448-A050-7FF3B4A33F3D} - {0BDEFC56-679F-436E-A089-81FCAEBA50F3} = {0BDEFC56-679F-436E-A089-81FCAEBA50F3} - {2C106C68-62EE-43D2-B372-3BDFF6125AD1} = {2C106C68-62EE-43D2-B372-3BDFF6125AD1} - {CE11FF6E-60D3-4E61-AFE2-F8F501100238} = {CE11FF6E-60D3-4E61-AFE2-F8F501100238} - {DF2FA86F-A663-4805-AED7-2F81D9EAC796} = {DF2FA86F-A663-4805-AED7-2F81D9EAC796} - {D5C6C872-4561-452D-BCFA-EF9441F95309} = {D5C6C872-4561-452D-BCFA-EF9441F95309} + {454A6BAE-5F8A-461A-B8C3-5AAA531006F3} = {454A6BAE-5F8A-461A-B8C3-5AAA531006F3} {C7C7FBB5-7082-428C-8F81-8FBA7A37AC85} = {C7C7FBB5-7082-428C-8F81-8FBA7A37AC85} {1D62F8BB-31D7-4BBF-A4DA-619A29675D06} = {1D62F8BB-31D7-4BBF-A4DA-619A29675D06} {D67B17CC-4AE5-4356-91F6-00E265942A56} = {D67B17CC-4AE5-4356-91F6-00E265942A56} @@ -48,10 +32,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tesseract", "tesseract.vcpr {EFE93017-B435-46EA-9199-1C83A869595A} = {EFE93017-B435-46EA-9199-1C83A869595A} {C2342D39-FD84-4448-A050-7FF3B4A33F3D} = {C2342D39-FD84-4448-A050-7FF3B4A33F3D} {0BDEFC56-679F-436E-A089-81FCAEBA50F3} = {0BDEFC56-679F-436E-A089-81FCAEBA50F3} + {CC778B5F-7B9B-4466-9E9A-B8222C6FF16F} = {CC778B5F-7B9B-4466-9E9A-B8222C6FF16F} {2C106C68-62EE-43D2-B372-3BDFF6125AD1} = {2C106C68-62EE-43D2-B372-3BDFF6125AD1} {CE11FF6E-60D3-4E61-AFE2-F8F501100238} = {CE11FF6E-60D3-4E61-AFE2-F8F501100238} {DF2FA86F-A663-4805-AED7-2F81D9EAC796} = {DF2FA86F-A663-4805-AED7-2F81D9EAC796} {D5C6C872-4561-452D-BCFA-EF9441F95309} = {D5C6C872-4561-452D-BCFA-EF9441F95309} + {2EDB497A-E9CF-40E6-A8F2-B001D895A8AE} = {2EDB497A-E9CF-40E6-A8F2-B001D895A8AE} {C7C7FBB5-7082-428C-8F81-8FBA7A37AC85} = {C7C7FBB5-7082-428C-8F81-8FBA7A37AC85} {1D62F8BB-31D7-4BBF-A4DA-619A29675D06} = {1D62F8BB-31D7-4BBF-A4DA-619A29675D06} {D67B17CC-4AE5-4356-91F6-00E265942A56} = {D67B17CC-4AE5-4356-91F6-00E265942A56} @@ -59,6 +45,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tesseract", "tesseract.vcpr EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "unicharset_extractor", "unicharset_extractor.vcproj", "{06878883-8785-4C71-89B6-47CEDA3CD300}" ProjectSection(ProjectDependencies) = postProject + {8784A544-7616-4E56-8485-277A2F387ECA} = {8784A544-7616-4E56-8485-277A2F387ECA} {DF2FA86F-A663-4805-AED7-2F81D9EAC796} = {DF2FA86F-A663-4805-AED7-2F81D9EAC796} EndProjectSection EndProject @@ -98,6 +85,14 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "combine_tessdata", "combine {DF2FA86F-A663-4805-AED7-2F81D9EAC796} = {DF2FA86F-A663-4805-AED7-2F81D9EAC796} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "neural_networks", "neural_networks.vcproj", "{2EDB497A-E9CF-40E6-A8F2-B001D895A8AE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cube", "cube.vcproj", "{CC778B5F-7B9B-4466-9E9A-B8222C6FF16F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libtesseract_tessopt", "libtesseract_tessopt.vcproj", "{8784A544-7616-4E56-8485-277A2F387ECA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libtesseract_training", "libtesseract_training.vcproj", "{454A6BAE-5F8A-461A-B8C3-5AAA531006F3}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -111,22 +106,12 @@ Global {1AC769A4-E98E-48B9-B87B-90D7837031FA}.Release.dynamic|Win32.Build.0 = Release.dynamic|Win32 {1AC769A4-E98E-48B9-B87B-90D7837031FA}.Release.static|Win32.ActiveCfg = Release|Win32 {1AC769A4-E98E-48B9-B87B-90D7837031FA}.Release.static|Win32.Build.0 = Release|Win32 - {F8B1406B-1643-4402-B07A-8ADD9A52C01E}.Debug|Win32.ActiveCfg = Debug|Win32 - {F8B1406B-1643-4402-B07A-8ADD9A52C01E}.Debug|Win32.Build.0 = Debug|Win32 - {F8B1406B-1643-4402-B07A-8ADD9A52C01E}.Release.dynamic|Win32.ActiveCfg = Release|Win32 - {F8B1406B-1643-4402-B07A-8ADD9A52C01E}.Release.static|Win32.ActiveCfg = Release|Win32 - {F8B1406B-1643-4402-B07A-8ADD9A52C01E}.Release.static|Win32.Build.0 = Release|Win32 {EC8A9E75-C66E-4320-9D91-97F53ED7021B}.Debug|Win32.ActiveCfg = Debug|Win32 {EC8A9E75-C66E-4320-9D91-97F53ED7021B}.Debug|Win32.Build.0 = Debug|Win32 {EC8A9E75-C66E-4320-9D91-97F53ED7021B}.Release.dynamic|Win32.ActiveCfg = Release.dynamic|Win32 {EC8A9E75-C66E-4320-9D91-97F53ED7021B}.Release.dynamic|Win32.Build.0 = Release.dynamic|Win32 {EC8A9E75-C66E-4320-9D91-97F53ED7021B}.Release.static|Win32.ActiveCfg = Release|Win32 {EC8A9E75-C66E-4320-9D91-97F53ED7021B}.Release.static|Win32.Build.0 = Release|Win32 - {CB522274-3353-41AE-A700-F365FC79DEDD}.Debug|Win32.ActiveCfg = Debug|Win32 - {CB522274-3353-41AE-A700-F365FC79DEDD}.Debug|Win32.Build.0 = Debug|Win32 - {CB522274-3353-41AE-A700-F365FC79DEDD}.Release.dynamic|Win32.ActiveCfg = Release|Win32 - {CB522274-3353-41AE-A700-F365FC79DEDD}.Release.static|Win32.ActiveCfg = Release|Win32 - {CB522274-3353-41AE-A700-F365FC79DEDD}.Release.static|Win32.Build.0 = Release|Win32 {47519557-3296-407A-BE51-8175C77B0868}.Debug|Win32.ActiveCfg = Debug|Win32 {47519557-3296-407A-BE51-8175C77B0868}.Debug|Win32.Build.0 = Debug|Win32 {47519557-3296-407A-BE51-8175C77B0868}.Release.dynamic|Win32.ActiveCfg = Release.dynamic|Win32 @@ -211,6 +196,30 @@ Global {3FB6B4AB-F35C-4E04-8EF7-B68906E50C6D}.Release.dynamic|Win32.Build.0 = Release.dynamic|Win32 {3FB6B4AB-F35C-4E04-8EF7-B68906E50C6D}.Release.static|Win32.ActiveCfg = Release|Win32 {3FB6B4AB-F35C-4E04-8EF7-B68906E50C6D}.Release.static|Win32.Build.0 = Release|Win32 + {2EDB497A-E9CF-40E6-A8F2-B001D895A8AE}.Debug|Win32.ActiveCfg = Debug|Win32 + {2EDB497A-E9CF-40E6-A8F2-B001D895A8AE}.Debug|Win32.Build.0 = Debug|Win32 + {2EDB497A-E9CF-40E6-A8F2-B001D895A8AE}.Release.dynamic|Win32.ActiveCfg = Release.dynamic|Win32 + {2EDB497A-E9CF-40E6-A8F2-B001D895A8AE}.Release.dynamic|Win32.Build.0 = Release.dynamic|Win32 + {2EDB497A-E9CF-40E6-A8F2-B001D895A8AE}.Release.static|Win32.ActiveCfg = Release|Win32 + {2EDB497A-E9CF-40E6-A8F2-B001D895A8AE}.Release.static|Win32.Build.0 = Release|Win32 + {CC778B5F-7B9B-4466-9E9A-B8222C6FF16F}.Debug|Win32.ActiveCfg = Debug|Win32 + {CC778B5F-7B9B-4466-9E9A-B8222C6FF16F}.Debug|Win32.Build.0 = Debug|Win32 + {CC778B5F-7B9B-4466-9E9A-B8222C6FF16F}.Release.dynamic|Win32.ActiveCfg = Release.dynamic|Win32 + {CC778B5F-7B9B-4466-9E9A-B8222C6FF16F}.Release.dynamic|Win32.Build.0 = Release.dynamic|Win32 + {CC778B5F-7B9B-4466-9E9A-B8222C6FF16F}.Release.static|Win32.ActiveCfg = Release|Win32 + {CC778B5F-7B9B-4466-9E9A-B8222C6FF16F}.Release.static|Win32.Build.0 = Release|Win32 + {8784A544-7616-4E56-8485-277A2F387ECA}.Debug|Win32.ActiveCfg = Debug|Win32 + {8784A544-7616-4E56-8485-277A2F387ECA}.Debug|Win32.Build.0 = Debug|Win32 + {8784A544-7616-4E56-8485-277A2F387ECA}.Release.dynamic|Win32.ActiveCfg = Release.dynamic|Win32 + {8784A544-7616-4E56-8485-277A2F387ECA}.Release.dynamic|Win32.Build.0 = Release.dynamic|Win32 + {8784A544-7616-4E56-8485-277A2F387ECA}.Release.static|Win32.ActiveCfg = Release|Win32 + {8784A544-7616-4E56-8485-277A2F387ECA}.Release.static|Win32.Build.0 = Release|Win32 + {454A6BAE-5F8A-461A-B8C3-5AAA531006F3}.Debug|Win32.ActiveCfg = Debug|Win32 + {454A6BAE-5F8A-461A-B8C3-5AAA531006F3}.Debug|Win32.Build.0 = Debug|Win32 + {454A6BAE-5F8A-461A-B8C3-5AAA531006F3}.Release.dynamic|Win32.ActiveCfg = Release.dynamic|Win32 + {454A6BAE-5F8A-461A-B8C3-5AAA531006F3}.Release.dynamic|Win32.Build.0 = Release.dynamic|Win32 + {454A6BAE-5F8A-461A-B8C3-5AAA531006F3}.Release.static|Win32.ActiveCfg = Release|Win32 + {454A6BAE-5F8A-461A-B8C3-5AAA531006F3}.Release.static|Win32.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/vs2008/tesseract.vcproj b/vs2008/tesseract.vcproj index 8a39401473..9babe3383b 100644 --- a/vs2008/tesseract.vcproj +++ b/vs2008/tesseract.vcproj @@ -54,14 +54,14 @@ AdditionalOptions="/wd4244 /wd4018 /wd4305 /wd4800 /wd4996" Optimization="2" InlineFunctionExpansion="1" - AdditionalIncludeDirectories="..\api;..\ccmain;..\ccutil;..\ccstruct;..\classify;..\cutil;..\dict;..\display;..\image;..\textord;..\viewer;..\wordrec;..\pageseg;.;include;include\leptonica" + AdditionalIncludeDirectories="..\api;..\ccmain;..\ccutil;..\ccstruct;..\classify;..\cutil;..\dict;..\display;..\image;..\textord;..\viewer;..\wordrec;..\pageseg;.;include;include\leptonica;..\vs2008\port" PreprocessorDefinitions="NDEBUG;WIN32;_WINDOWS;__MSW32__;_CRT_SECURE_NO_WARNINGS;HAVE_LIBLEPT" StringPooling="true" RuntimeLibrary="2" EnableFunctionLevelLinking="true" UsePrecompiledHeader="0" AssemblerListingLocation=".\tesseract.Release/" - ObjectFile=".\tess.Release/" + ObjectFile=".\tesseract.Release/" ProgramDataBaseFileName=".\tesseract.Release/" WarningLevel="3" SuppressStartupBanner="true" @@ -82,7 +82,7 @@ AdditionalOptions="/NODEFAULTLIB:library /LTCG" AdditionalDependencies="ws2_32.lib user32.lib zlib-static-mtdll.lib libpng-static-mtdll.lib libjpeg-static-mtdll.lib libtiff-static-mtdll.lib giflib-static-mtdll.lib leptonlib-static-mtdll.lib" OutputFile=".\bin\$(ProjectName).exe" - Version="3.00" + Version="3.01" LinkIncremental="1" SuppressStartupBanner="true" AdditionalLibraryDirectories=".\lib" @@ -119,7 +119,7 @@ - - - + + + + + + + + diff --git a/vs2008/textord.vcproj b/vs2008/textord.vcproj index f9d9650f26..a948ad09e7 100644 --- a/vs2008/textord.vcproj +++ b/vs2008/textord.vcproj @@ -262,6 +262,10 @@ RelativePath="..\textord\colpartition.cpp" > + + @@ -553,10 +557,6 @@ /> - - @@ -693,10 +693,18 @@ RelativePath="..\textord\tablefind.cpp" > + + + + @@ -917,6 +925,10 @@ RelativePath="..\textord\colpartition.h" > + + @@ -989,12 +1001,16 @@ RelativePath="..\textord\tablefind.h" > + + - - diff --git a/vs2008/wordrec.vcproj b/vs2008/wordrec.vcproj index 41fa4eba3a..308c4ec9d7 100644 --- a/vs2008/wordrec.vcproj +++ b/vs2008/wordrec.vcproj @@ -219,10 +219,6 @@ RelativePath="..\wordrec\associate.cpp" > - - @@ -256,23 +252,15 @@ > - - - - - - - - @@ -337,10 +317,6 @@ RelativePath="..\wordrec\associate.h" > - - @@ -370,7 +346,7 @@ > - - - - - - @@ -421,26 +385,10 @@ RelativePath="..\wordrec\render.h" > - - - - - - - - diff --git a/wordrec/Makefile.am b/wordrec/Makefile.am index 1525a88bb4..72d0b75dd6 100644 --- a/wordrec/Makefile.am +++ b/wordrec/Makefile.am @@ -5,22 +5,20 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/image -I$(top_srcdir)/dict \ -I$(top_srcdir)/viewer -EXTRA_DIST = wordrec.vcproj - include_HEADERS = \ - associate.h badwords.h bestfirst.h chop.h \ + associate.h bestfirst.h chop.h \ chopper.h closed.h drawfx.h findseam.h gradechop.h \ - heuristic.h makechop.h matchtab.h matrix.h measure.h metrics.h \ - mfvars.h olutil.h outlines.h pieces.h plotedges.h \ - plotseg.h render.h seam.h split.h tally.h tessinit.h tface.h \ + language_model.h makechop.h matchtab.h measure.h \ + olutil.h outlines.h pieces.h plotedges.h \ + plotseg.h render.h tally.h \ wordclass.h wordrec.h lib_LTLIBRARIES = libtesseract_wordrec.la libtesseract_wordrec_la_SOURCES = \ - associate.cpp badwords.cpp bestfirst.cpp chop.cpp chopper.cpp \ + associate.cpp bestfirst.cpp chop.cpp chopper.cpp \ closed.cpp drawfx.cpp findseam.cpp gradechop.cpp \ - heuristic.cpp makechop.cpp matchtab.cpp matrix.cpp metrics.cpp \ - mfvars.cpp olutil.cpp outlines.cpp pieces.cpp \ - plotedges.cpp plotseg.cpp render.cpp seam.cpp split.cpp \ - tally.cpp tessinit.cpp tface.cpp wordclass.cpp wordrec.cpp + heuristic.cpp language_model.cpp makechop.cpp matchtab.cpp \ + olutil.cpp outlines.cpp pieces.cpp \ + plotedges.cpp plotseg.cpp render.cpp segsearch.cpp \ + tally.cpp tface.cpp wordclass.cpp wordrec.cpp libtesseract_wordrec_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) diff --git a/wordrec/Makefile.in b/wordrec/Makefile.in index 8cb0c14a5c..a09db2513d 100644 --- a/wordrec/Makefile.in +++ b/wordrec/Makefile.in @@ -71,12 +71,12 @@ am__base_list = \ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) libtesseract_wordrec_la_LIBADD = -am_libtesseract_wordrec_la_OBJECTS = associate.lo badwords.lo \ - bestfirst.lo chop.lo chopper.lo closed.lo drawfx.lo \ - findseam.lo gradechop.lo heuristic.lo makechop.lo matchtab.lo \ - matrix.lo metrics.lo mfvars.lo olutil.lo outlines.lo pieces.lo \ - plotedges.lo plotseg.lo render.lo seam.lo split.lo tally.lo \ - tessinit.lo tface.lo wordclass.lo wordrec.lo +am_libtesseract_wordrec_la_OBJECTS = associate.lo bestfirst.lo chop.lo \ + chopper.lo closed.lo drawfx.lo findseam.lo gradechop.lo \ + heuristic.lo language_model.lo makechop.lo matchtab.lo \ + olutil.lo outlines.lo pieces.lo plotedges.lo plotseg.lo \ + render.lo segsearch.lo tally.lo tface.lo wordclass.lo \ + wordrec.lo libtesseract_wordrec_la_OBJECTS = \ $(am_libtesseract_wordrec_la_OBJECTS) libtesseract_wordrec_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ @@ -251,7 +251,6 @@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ @@ -274,23 +273,22 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/image -I$(top_srcdir)/dict \ -I$(top_srcdir)/viewer -EXTRA_DIST = wordrec.vcproj include_HEADERS = \ - associate.h badwords.h bestfirst.h chop.h \ + associate.h bestfirst.h chop.h \ chopper.h closed.h drawfx.h findseam.h gradechop.h \ - heuristic.h makechop.h matchtab.h matrix.h measure.h metrics.h \ - mfvars.h olutil.h outlines.h pieces.h plotedges.h \ - plotseg.h render.h seam.h split.h tally.h tessinit.h tface.h \ + language_model.h makechop.h matchtab.h measure.h \ + olutil.h outlines.h pieces.h plotedges.h \ + plotseg.h render.h tally.h \ wordclass.h wordrec.h lib_LTLIBRARIES = libtesseract_wordrec.la libtesseract_wordrec_la_SOURCES = \ - associate.cpp badwords.cpp bestfirst.cpp chop.cpp chopper.cpp \ + associate.cpp bestfirst.cpp chop.cpp chopper.cpp \ closed.cpp drawfx.cpp findseam.cpp gradechop.cpp \ - heuristic.cpp makechop.cpp matchtab.cpp matrix.cpp metrics.cpp \ - mfvars.cpp olutil.cpp outlines.cpp pieces.cpp \ - plotedges.cpp plotseg.cpp render.cpp seam.cpp split.cpp \ - tally.cpp tessinit.cpp tface.cpp wordclass.cpp wordrec.cpp + heuristic.cpp language_model.cpp makechop.cpp matchtab.cpp \ + olutil.cpp outlines.cpp pieces.cpp \ + plotedges.cpp plotseg.cpp render.cpp segsearch.cpp \ + tally.cpp tface.cpp wordclass.cpp wordrec.cpp libtesseract_wordrec_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) all: all-recursive @@ -368,7 +366,6 @@ distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/associate.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/badwords.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bestfirst.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chop.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chopper.Plo@am__quote@ @@ -377,21 +374,17 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findseam.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gradechop.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/heuristic.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/language_model.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/makechop.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matchtab.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrix.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/metrics.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mfvars.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/olutil.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/outlines.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pieces.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plotedges.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plotseg.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/render.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/seam.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/split.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/segsearch.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tally.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tessinit.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tface.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wordclass.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wordrec.Plo@am__quote@ diff --git a/wordrec/associate.cpp b/wordrec/associate.cpp index ba15fc3148..ccc7062da6 100644 --- a/wordrec/associate.cpp +++ b/wordrec/associate.cpp @@ -1,63 +1,164 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: associate.c (Formerly associate.c) - * Description: Associate the outlines and classify them - * Author: Mark Seaman, OCR Technology - * Created: Tue Jan 30 14:03:25 1990 - * Modified: Mon Jul 22 10:48:01 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - */ - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ +/////////////////////////////////////////////////////////////////////// +// File: associate.cpp +// Description: Functions for scoring segmentation paths according to +// their character widths, gap widths and seam cuts. +// Author: Daria Antonova +// Created: Mon Mar 8 11:26:43 PDT 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + + #include #ifdef __UNIX__ #include #endif +#include #include "associate.h" -#include "callcpp.h" +#include "baseline.h" + +namespace tesseract { + +const float AssociateUtils::kMaxFixedPitchCharAspectRatio = 2.0f; +const float AssociateUtils::kMinGap = 0.03f; + +void AssociateUtils::ComputeStats(int col, int row, + const AssociateStats *parent_stats, + int parent_path_length, + bool fixed_pitch, + float max_char_wh_ratio, + const DENORM *denorm, + CHUNKS_RECORD *chunks_record, + int debug_level, + AssociateStats *stats) { + stats->Clear(); + + if (debug_level > 0) { + tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n", + col, row, fixed_pitch ? " (fixed pitch)" : ""); + } + float normalizing_height = BASELINE_SCALE; + // TODO(rays/daria) Can unicharset.script_has_xheight be useful here? + if (fixed_pitch && denorm != NULL && denorm->row() != NULL) { + // For fixed pitch language like CJK, we use the full text height as the + // normalizing factor so we are not dependent on xheight calculation. + // In the normalized coord. xheight * scale == BASELINE_SCALE(128), + // so add proportionally scaled ascender zone to get full text height. + normalizing_height = denorm->scale() * + (denorm->row()->x_height() + denorm->row()->ascenders()); + if (debug_level > 0) { + tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n", + normalizing_height, denorm->scale(), denorm->row()->x_height(), + denorm->row()->ascenders()); + } + } + float wh_ratio = + GetChunksWidth(chunks_record->chunk_widths, col, row) / normalizing_height; + if (debug_level) tprintf("wh_ratio %g\n", wh_ratio); + if (!fixed_pitch) { + if (wh_ratio > max_char_wh_ratio) stats->bad_shape = true; + } else { + bool end_row = (row == (chunks_record->ratings->dimension() - 1)); -extern TBLOB *newblob(); + // Ensure that the blob has gaps on the left and the right sides + // (except for beginning and ending punctuation) and that there is + // no cutting through ink at the blob boundaries. + if (col > 0) { + float left_gap = + GetChunksGap(chunks_record->chunk_widths, col-1) / normalizing_height; + SEAM *left_seam = + static_cast(array_value(chunks_record->splits, col-1)); + if (debug_level) { + tprintf("left_gap %g, left_seam %g\n", left_gap, left_seam->priority); + } + if ((!end_row && left_gap < kMinGap) || left_seam->priority > 0.0f) { + stats->bad_shape = true; + } + } + float right_gap = 0.0f; + if (!end_row) { + right_gap = + GetChunksGap(chunks_record->chunk_widths, row) / normalizing_height; + SEAM *right_seam = + static_cast(array_value(chunks_record->splits, row)); + if (debug_level) { + tprintf("right_gap %g right_seam %g\n", + right_gap, right_seam->priority); + } + if (right_gap < kMinGap || right_seam->priority > 0.0f) { + stats->bad_shape = true; + if (right_gap < kMinGap) stats->bad_fixed_pitch_right_gap = true; + } + } -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ + // Impose additional segmentation penalties if blob widths or gaps + // distribution don't fit a fixed-pitch model. + // Since we only know the widths and gaps of the path explored so far, + // the means and variances are computed for the path so far (not + // considering characters to the right of the last character on the path). + stats->full_wh_ratio = wh_ratio + right_gap; + if (parent_stats != NULL) { + stats->full_wh_ratio_total = + (parent_stats->full_wh_ratio_total + stats->full_wh_ratio); + float mean = + stats->full_wh_ratio_total / static_cast(parent_path_length+1); + stats->full_wh_ratio_var = + parent_stats->full_wh_ratio_var + pow(mean-stats->full_wh_ratio, 2); + } else { + stats->full_wh_ratio_total = stats->full_wh_ratio; + } + if (debug_level) { + tprintf("full_wh_ratio %g full_wh_ratio_total %g full_wh_ratio_var %g\n", + stats->full_wh_ratio, stats->full_wh_ratio_total, + stats->full_wh_ratio_var); + } -BOOL_VAR(wordrec_enable_assoc, 1, "Associator Enable"); -BOOL_VAR(force_word_assoc, FALSE, - "always force associator to run, independent of what enable_assoc is." - "This is used for CJK where component grouping is necessary."); + stats->shape_cost = + FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio); -EVALUATION_ARRAY last_segmentation; + // For some reason Tesseract prefers to treat the whole CJ words + // as one blob when the initial segmentation is particularly bad. + // This hack is to avoid favoring such states. + if (col == 0 && end_row && wh_ratio > max_char_wh_ratio) { + stats->shape_cost += 10; + } + stats->shape_cost += stats->full_wh_ratio_var; + if (debug_level) tprintf("shape_cost %g\n", stats->shape_cost); + } +} -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ +int AssociateUtils::GetChunksWidth(WIDTH_RECORD *width_record, + int start_blob, int last_blob) { + int result = 0; + for (int x = start_blob * 2; x <= last_blob * 2; x++) + result += width_record->widths[x]; + return result; +} -/********************************************************************** - * print_eval_record - **********************************************************************/ -void print_eval_record(const char *label, EVALUATION_RECORD *rec) { - if (rec) { - cprintf ("%s: r%.2f c%.2f width=%d gap=%d\n", label, - rec->match, rec->certainty, rec->width, rec->gap); +float AssociateUtils::FixedPitchWidthCost(float norm_width, + float right_gap, + bool end_pos, + float max_char_wh_ratio) { + float cost = 0.0f; + if (norm_width > max_char_wh_ratio) cost += norm_width; + if (norm_width > kMaxFixedPitchCharAspectRatio) + cost += norm_width * norm_width; // extra penalty for merging CJK chars + // Penalize skinny blobs, except for punctuation in the last position. + if (norm_width+right_gap < 0.5f && !end_pos) { + cost += 1.0f - (norm_width + right_gap); } + return cost; } + +} // namespace tesseract diff --git a/wordrec/associate.h b/wordrec/associate.h index 17a354f63f..4987ec3667 100644 --- a/wordrec/associate.h +++ b/wordrec/associate.h @@ -1,101 +1,147 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: associate.h (Formerly associate.h) - * Description: Associate the outlines and classify them - * Author: Mark Seaman, OCR Technology - * Created: Mon Feb 5 11:42:51 1990 - * Modified: Tue May 21 15:34:56 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Experimental (Do Not Distribute) - * - * (c) Copyright 1990, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - ******************************************************************************** - */ +/////////////////////////////////////////////////////////////////////// +// File: associate.h +// Description: Structs, classes, typedefs useful for the segmentation +// search. Functions for scoring segmentation paths according +// to their character widths, gap widths and seam cuts. +// Author: Daria Antonova +// Created: Mon Mar 8 11:26:43 PDT 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// #ifndef ASSOCIATE_H #define ASSOCIATE_H -/* ----------------------------------------------------------------------- - I n c l u d e s ----------------------------------------------------------------------- -*/ - -#include "matrix.h" -#include "states.h" #include "blobs.h" -#include "split.h" +#include "elst.h" +#include "matrix.h" #include "seam.h" - -/* ----------------------------------------------------------------------- - T y p e s ----------------------------------------------------------------------- -*/ - -/** List of (BLOB*) */ -typedef LIST BLOBS; - -/** List of (TESSLINE*) */ -typedef LIST OUTLINES; - -/** List of (EDGEPT*) */ -typedef LIST EDGEPTS; +#include "split.h" +#include "states.h" typedef inT16 BLOB_WEIGHTS[MAX_NUM_CHUNKS]; -/** Each char evaluated */ -typedef struct -{ +// Each unichar evaluated. +struct EVALUATION_RECORD { float match; float certainty; char character; int width; int gap; -} EVALUATION_RECORD; +}; + +typedef EVALUATION_RECORD EVALUATION_ARRAY[MAX_NUM_CHUNKS]; -/** Classification info for chunks */ -struct CHUNKS_RECORD -{ +// Classification info for chunks. +// +// TODO(daria): move to tesseract namespace when obsolete code using +// this struct that is not in tesseract namespace is deprecated. +struct CHUNKS_RECORD { MATRIX *ratings; TBLOB *chunks; SEAMS splits; - TEXTROW *row; - int fx; int x_height; WIDTH_RECORD *chunk_widths; WIDTH_RECORD *char_widths; inT16 *weights; }; -/** Each segmentation */ -typedef EVALUATION_RECORD EVALUATION_ARRAY[MAX_NUM_CHUNKS]; +namespace tesseract { + +// Statisitcs about character widths, gaps and seams. +struct AssociateStats { + AssociateStats() { Clear(); } + + void Clear() { + shape_cost = 0.0f; + bad_shape = false; + full_wh_ratio = 0.0f; + full_wh_ratio_total = 0.0f; + full_wh_ratio_var = 0.0f; + bad_fixed_pitch_right_gap = false; + bad_fixed_pitch_wh_ratio = false; + } + + void Print() { + tprintf("AssociateStats: w(%g %d) s(%g %d)\n", shape_cost, bad_shape); + } + + float shape_cost; // cost of blob shape + bool bad_shape; // true if the shape of the blob is unacceptable + float full_wh_ratio; // width-to-hight ratio + gap on the right + float full_wh_ratio_total; // sum of width-to-hight ratios + // on the path terminating at this blob + float full_wh_ratio_var; // variance of full_wh_ratios on the path + bool bad_fixed_pitch_right_gap; // true if there is no gap before + // the blob on the right + bool bad_fixed_pitch_wh_ratio; // true if the blobs has width-to-hight + // ratio > kMaxFixedPitchCharAspectRatio +}; + +// Utility functions for scoring segmentation paths according to their +// character widths, gap widths, seam characteristics. +class AssociateUtils { + public: + static const float kMaxFixedPitchCharAspectRatio; + static const float kMinGap; + + // Computes character widths, gaps and seams stats given the + // AssociateStats of the path so far, col, row of the blob that + // is being added to the path, and CHUNKS_RECORD containing information + // about character widths, gaps and seams. + // Fills associate_cost with the combined shape, gap and seam cost + // of adding a unichar from (col, row) to the path (note that since + // this function could be used to compute the prioritization for + // pain points, (col, row) entry might not be classified yet; thus + // information in the (col, row) entry of the ratings matrix is not used). + // + // Note: the function assumes that chunks_record, stats and + // associate_cost pointers are not NULL. + static void ComputeStats(int col, int row, + const AssociateStats *parent_stats, + int parent_path_length, + bool fixed_pitch, + float max_char_wh_ratio, + const DENORM *denorm, + CHUNKS_RECORD *chunks_record, + int debug_level, + AssociateStats *stats); + + // Returns the width of a chunk which is a composed of several blobs + // blobs[start_blob..last_blob] inclusively. + // Widths/gaps records are in the form: + // width_record->num_char = n + // width_record->widths[2*n-1] = w0,g0,w1,g1..w(n-1),g(n-1) + static int GetChunksWidth(WIDTH_RECORD *width_record, + int start_blob, int last_blob); + + // Returns the width of a gap between the specified chunk and the next one. + static inline int GetChunksGap(WIDTH_RECORD *width_record, int last_chunk) { + return (last_chunk >= 0 && last_chunk < width_record->num_chars - 1) ? + width_record->widths[last_chunk * 2 + 1] : 0; + } + + // Returns the width cost for fixed-pitch text. + static float FixedPitchWidthCost(float norm_width, float right_gap, + bool end_pos, float max_char_wh_ratio); + + // Returns the gap cost for fixed-pitch text (penalizes vertically + // overlapping components). + static inline float FixedPitchGapCost(float norm_gap, bool end_pos) { + return (norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f; + } +}; + +} // namespace tesseract -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -extern EVALUATION_ARRAY last_segmentation; -extern WIDTH_RECORD *char_widths; -extern BOOL_VAR_H(wordrec_enable_assoc, 1, "Associator Enable"); -extern BOOL_VAR_H(force_word_assoc, FALSE, - "always force associator to run, independent of what enable_assoc is." - "This is used for CJK where component grouping is necessary."); - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -void init_associate_vars(); -void print_eval_record(const char* label, EVALUATION_RECORD *eval_rec); #endif diff --git a/wordrec/badwords.cpp b/wordrec/badwords.cpp deleted file mode 100644 index 90d5e15ec7..0000000000 --- a/wordrec/badwords.cpp +++ /dev/null @@ -1,108 +0,0 @@ -/****************************************************************************** - ** Filename: badwords.c - ** Purpose: Routines to keep the bad words in sorted order. - ** Author: Dan Johnson - ** History: Thu Apr 25 08:40:19 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "general.h" -#include "oldheap.h" -#include "callcpp.h" - -#include -#include -#ifdef __UNIX__ -#include -#endif - -#define MAX_NUM_BAD_WERDS 1000 - -/*---------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -/** heap that bad words are stored in */ -static HEAP *BadWords = NULL; -BOOL_VAR (tessedit_save_stats, FALSE, "Save final recognition statistics"); - -/*---------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** - * This routine prints the bad words stored in BadWords - * to file ordered by certainty (worst certainty first). - * - * @param File open text file to print bad words to - * - * Globals: - * - BadWords heap that bad words are stored in - * - * @note Exceptions: none - * @note History: Thu Apr 25 08:57:08 1991, DSJ, Created. - */ -void PrintBadWords(FILE *File) { - HEAPENTRY NextWord; - - if (BadWords == NULL) - return; - - fprintf (File, "\n"); - fprintf (File, "Bad Word Certainty\n"); - fprintf (File, "---------------- ---------\n"); - while (GetTopOfHeap (BadWords, &NextWord) != EMPTY) { - fprintf (File, "%16s %6.2f\n", (const char *) NextWord.Data, - NextWord.Key); - free_string ((char *) NextWord.Data); - } - fprintf (File, "\n"); - -} /* PrintBadWords */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine saves all words flagged as bad in a heap - * with the worst word on the top of the heap. The contents - * of this heap can be printed to a file by calling - * PrintBadWords (File). - * - * @param Word bad word to be saved - * @param Certainty certainty of word - * - * Globals: - * - BadWords heap to keep bad words in - * - * History: Thu Apr 25 08:41:00 1991, DSJ, Created. - */ -void SaveBadWord(const char *Word, FLOAT32 Certainty) { - HEAPENTRY NewWord; - - assert (Word != NULL); - - if (BadWords == NULL) { - BadWords = MakeHeap (MAX_NUM_BAD_WERDS); - InitHeap(BadWords); - } else if (HeapFull(BadWords)) { - return; - } - - NewWord.Key = Certainty; - NewWord.Data = alloc_string (strlen (Word) + 1); - strcpy ((char *) NewWord.Data, Word); - HeapStore(BadWords, &NewWord); - -} /* SaveBadWord */ diff --git a/wordrec/badwords.h b/wordrec/badwords.h deleted file mode 100644 index 93f60e451e..0000000000 --- a/wordrec/badwords.h +++ /dev/null @@ -1,51 +0,0 @@ -/****************************************************************************** - ** Filename: badwords.h - ** Purpose: Routines to keep the bad words in sorted order. - ** Author: Dan Johnson - ** History: Thu Apr 25 09:06:48 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef __BADWERDS__ -#define __BADWERDS__ - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -void PrintBadWords(FILE *File); - -void SaveBadWord(const char *Word, FLOAT32 Certainty); -extern BOOL_VAR_H (tessedit_save_stats, FALSE, "Save final recognition statistics"); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* badwords.c -void PrintBadWords - _ARGS((FILE *File)); - -void SaveBadWord - _ARGS((char *Word, - FLOAT32 Certainty)); - -#undef _ARGS -*/ -#endif diff --git a/wordrec/bestfirst.cpp b/wordrec/bestfirst.cpp index 740cde57ec..136fbc5dd3 100644 --- a/wordrec/bestfirst.cpp +++ b/wordrec/bestfirst.cpp @@ -29,15 +29,14 @@ #include +#include "associate.h" #include "bestfirst.h" #include "baseline.h" #include "bitvec.h" -#include "callback.h" #include "dict.h" #include "freelist.h" #include "globals.h" -#include "heuristic.h" -#include "metrics.h" +#include "pageres.h" #include "permute.h" #include "pieces.h" #include "plotseg.h" @@ -45,7 +44,6 @@ #include "states.h" #include "stopper.h" #include "structures.h" -#include "tordvars.h" #include "unicharset.h" #include "wordclass.h" #include "wordrec.h" @@ -57,17 +55,6 @@ void call_caller(); -/*---------------------------------------------------------------------- - V a r i a b l e s ----------------------------------------------------------------------*/ -int num_joints; /* Number of chunks - 1 */ -int num_pushed = 0; -int num_popped = 0; - -INT_VAR(wordrec_num_seg_states, 30, "Segmentation states"); - -double_VAR(wordrec_worst_state, 1, "Worst segmentation state"); - /**/ /*---------------------------------------------------------------------- F u n c t i o n s @@ -81,8 +68,8 @@ namespace tesseract { * solution space. */ void Wordrec::best_first_search(CHUNKS_RECORD *chunks_record, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, + WERD_RES *word, STATE *state, DANGERR *fixpt, STATE *best_state) { @@ -90,29 +77,25 @@ void Wordrec::best_first_search(CHUNKS_RECORD *chunks_record, inT16 keep_going; STATE guided_state; // not used - num_joints = chunks_record->ratings->dimension() - 1; - the_search = new_search(chunks_record, num_joints, - best_choice, raw_choice, state); + int num_joints = chunks_record->ratings->dimension() - 1; + the_search = new_search(chunks_record, num_joints, best_char_choices, + word->best_choice, word->raw_choice, state); // The default state is initialized as the best choice. In order to apply // segmentation adjustment, or any other contextual processing in permute, // we give the best choice a poor rating to force the processed raw choice // to be promoted to best choice. - the_search->best_choice->set_rating(100000.0); + the_search->best_choice->set_rating(WERD_CHOICE::kBadRating); evaluate_state(chunks_record, the_search, fixpt); - if (permute_debug) { + if (wordrec_debug_level) { tprintf("\n\n\n =========== BestFirstSearch ==============\n"); - best_choice->print("**Initial BestChoice**"); + word->best_choice->print("**Initial BestChoice**"); } -#ifndef GRAPHICS_DISABLED - save_best_state(chunks_record); -#endif - start_recording(); FLOAT32 worst_priority = 2.0f * prioritize_state(chunks_record, the_search); if (worst_priority < wordrec_worst_state) worst_priority = wordrec_worst_state; - if (segment_debug) { + if (wordrec_debug_level) { print_state("BestFirstSearch", best_state, num_joints); } @@ -120,20 +103,13 @@ void Wordrec::best_first_search(CHUNKS_RECORD *chunks_record, do { /* Look for answer */ if (!hash_lookup (the_search->closed_states, the_search->this_state)) { - - if (tord_blob_skip) { - free_state (the_search->this_state); - break; - } - guided_state = *(the_search->this_state); keep_going = evaluate_state(chunks_record, the_search, fixpt); hash_add (the_search->closed_states, the_search->this_state); if (!keep_going || - (the_search->num_states > wordrec_num_seg_states) || - (tord_blob_skip)) { - if (segment_debug) + (the_search->num_states > wordrec_num_seg_states)) { + if (wordrec_debug_level) tprintf("Breaking best_first_search on keep_going %s numstates %d\n", ((keep_going) ? "T" :"F"), the_search->num_states); free_state (the_search->this_state); @@ -143,7 +119,7 @@ void Wordrec::best_first_search(CHUNKS_RECORD *chunks_record, FLOAT32 new_worst_priority = 2.0f * prioritize_state(chunks_record, the_search); if (new_worst_priority < worst_priority) { - if (segment_debug) + if (wordrec_debug_level) tprintf("Lowering WorstPriority %f --> %f\n", worst_priority, new_worst_priority); // Tighten the threshold for admitting new paths as better search @@ -157,58 +133,28 @@ void Wordrec::best_first_search(CHUNKS_RECORD *chunks_record, free_state (the_search->this_state); num_popped++; the_search->this_state = pop_queue (the_search->open_states); - if (segment_debug && !the_search->this_state) + if (wordrec_debug_level && !the_search->this_state) tprintf("No more states to evalaute after %d evals", num_popped); } while (the_search->this_state); state->part1 = the_search->best_state->part1; state->part2 = the_search->best_state->part2; - stop_recording(); - if (permute_debug) { + if (wordrec_debug_level) { tprintf("\n\n\n =========== BestFirstSearch ==============\n"); // best_choice->debug_string(getDict().getUnicharset()).string()); - best_choice->print("**Final BestChoice**"); + word->best_choice->print("**Final BestChoice**"); } // save the best_state stats delete_search(the_search); } -} // namespace tesseract - - -/** - * @name chunks_width - * - * Return the width of a chunk which is a composed of several blobs - * blobs[start_blob..last_blob] inclusively, - * whose individual widths and gaps are record in width_record in the form - * width_record->num_char = n - * width_record->widths[2*n-1] = w0,g0,w1,g1..w(n-1),g(n-1) - */ -int chunks_width(WIDTH_RECORD *width_record, int start_blob, int last_blob) { - int result = 0; - for (int x = start_blob * 2; x <= last_blob * 2; x++) - result += width_record->widths[x]; - return (result); -} - -/** - * @name chunks_gap - * - * Return the width of between the specified chunk and next. - */ -int chunks_gap(WIDTH_RECORD *width_record, int last_chunk) { - return (last_chunk < width_record->num_chars - 1) ? - width_record->widths[last_chunk * 2 + 1] : 0; -} - /** * delete_search * * Terminate the current search and free all the memory involved. */ -void delete_search(SEARCH_RECORD *the_search) { +void Wordrec::delete_search(SEARCH_RECORD *the_search) { float closeness; closeness = (the_search->num_joints ? @@ -216,20 +162,16 @@ void delete_search(SEARCH_RECORD *the_search) { reinterpret_cast(the_search->best_state), 2) / (float) the_search->num_joints) : 0.0f); - record_search_status (the_search->num_states, - the_search->before_best, closeness); - free_state (the_search->first_state); free_state (the_search->best_state); - free_hash_table (the_search->closed_states); + free_hash_table(the_search->closed_states); FreeHeapData (the_search->open_states, (void_dest) free_state); memfree(the_search); } -namespace tesseract { /** * evaluate_chunks * @@ -245,18 +187,16 @@ BLOB_CHOICE_LIST_VECTOR *Wordrec::evaluate_chunks(CHUNKS_RECORD *chunks_record, int x = 0; int y; - /* Iterate sub-paths */ + // Iterate sub-paths. for (i = 1; i <= search_state[0] + 1; i++) { if (i > search_state[0]) y = count_blobs (chunks_record->chunks) - 1; else y = x + search_state[i]; - if (tord_blob_skip) { - delete char_choices; - return (NULL); - } /* Process one square */ - /* Classify if needed */ + // Process one square. + + // Classify if needed. blob_choices = get_piece_rating(chunks_record->ratings, chunks_record->chunks, chunks_record->splits, @@ -267,15 +207,15 @@ BLOB_CHOICE_LIST_VECTOR *Wordrec::evaluate_chunks(CHUNKS_RECORD *chunks_record, return (NULL); } - /* Add permuted ratings */ + // Add permuted ratings. blob_choice_it.set_to_list(blob_choices); last_segmentation[i - 1].certainty = blob_choice_it.data()->certainty(); last_segmentation[i - 1].match = blob_choice_it.data()->rating(); last_segmentation[i - 1].width = - chunks_width (chunks_record->chunk_widths, x, y); + AssociateUtils::GetChunksWidth(chunks_record->chunk_widths, x, y); last_segmentation[i - 1].gap = - chunks_gap (chunks_record->chunk_widths, y); + AssociateUtils::GetChunksGap(chunks_record->chunk_widths, y); *char_choices += blob_choices; x = y + 1; @@ -295,7 +235,7 @@ inT16 Wordrec::evaluate_state(CHUNKS_RECORD *chunks_record, BLOB_CHOICE_LIST_VECTOR *char_choices; SEARCH_STATE chunk_groups; float rating_limit = the_search->best_choice->rating(); - inT16 keep_going = TRUE; + bool keep_going = true; PIECES_STATE widths; the_search->num_states++; @@ -305,24 +245,28 @@ inT16 Wordrec::evaluate_state(CHUNKS_RECORD *chunks_record, getDict().LogNewSegmentation(widths); char_choices = evaluate_chunks(chunks_record, chunk_groups); - wordseg_rating_adjust_factor = -1.0f; + getDict().SetWordsegRatingAdjustFactor(-1.0f); + bool updated_best_choice = false; if (char_choices != NULL && char_choices->length() > 0) { // Compute the segmentation cost and include the cost in word rating. // TODO(dsl): We should change the SEARCH_RECORD to store this cost // from state evaluation and avoid recomputing it here. prioritize_state(chunks_record, the_search); - wordseg_rating_adjust_factor = the_search->segcost_bias; - getDict().permute_characters(*char_choices, rating_limit, - the_search->best_choice, - the_search->raw_choice); + getDict().SetWordsegRatingAdjustFactor(the_search->segcost_bias); + updated_best_choice = + getDict().permute_characters(*char_choices, + the_search->best_choice, + the_search->raw_choice); bool replaced = false; - if (getDict().AcceptableChoice(char_choices, the_search->best_choice, - *(the_search->raw_choice), fixpt, - ASSOCIATOR_CALLER, &replaced)) { - keep_going = FALSE; + if (updated_best_choice) { + if (getDict().AcceptableChoice(char_choices, the_search->best_choice, + NULL, ASSOCIATOR_CALLER, &replaced)) { + keep_going = false; + } + CopyCharChoices(*char_choices, the_search->best_char_choices); } } - wordseg_rating_adjust_factor = -1.0f; + getDict().SetWordsegRatingAdjustFactor(-1.0f); #ifndef GRAPHICS_DISABLED if (wordrec_display_segmentations) { @@ -333,13 +277,15 @@ inT16 Wordrec::evaluate_state(CHUNKS_RECORD *chunks_record, #endif if (rating_limit != the_search->best_choice->rating()) { + ASSERT_HOST(updated_best_choice); the_search->before_best = the_search->num_states; the_search->best_state->part1 = the_search->this_state->part1; the_search->best_state->part2 = the_search->this_state->part2; replace_char_widths(chunks_record, chunk_groups); + } else { + ASSERT_HOST(!updated_best_choice); + if (char_choices != NULL) fixpt->clear(); } - else if (char_choices != NULL) - fixpt->index = -1; if (char_choices != NULL) delete char_choices; memfree(chunk_groups); @@ -351,34 +297,36 @@ inT16 Wordrec::evaluate_state(CHUNKS_RECORD *chunks_record, /** * rebuild_current_state * - * Evaluate the segmentation that is represented by this state in the - * best first search. Add this state to the "states_seen" list. + * Transfers the given state to the word's output fields: rebuild_word, + * best_state, box_word, and returns the corresponding blob choices. */ BLOB_CHOICE_LIST_VECTOR *Wordrec::rebuild_current_state( - TBLOB *blobs, - SEAMS seam_list, + WERD_RES *word, STATE *state, BLOB_CHOICE_LIST_VECTOR *old_choices, - int fx, - bool force_rebuild, - const WERD_CHOICE &best_choice, - const MATRIX *ratings) { + MATRIX *ratings) { // Initialize search_state, num_joints, x, y. - int num_joints = array_count(seam_list); + int num_joints = array_count(word->seam_array); #ifndef GRAPHICS_DISABLED if (wordrec_display_segmentations) { - print_state("Rebuiling state", state, num_joints); + print_state("Rebuilding state", state, num_joints); } #endif + // Setup the rebuild_word ready for the output blobs. + if (word->rebuild_word != NULL) + delete word->rebuild_word; + word->rebuild_word = new TWERD; + // Setup the best_state. + word->best_state.clear(); SEARCH_STATE search_state = bin_to_chunks(state, num_joints); + // See which index is which below for information on x and y. int x = 0; int y; - int i; - for (i = 1; i <= search_state[0]; i++) { + for (int i = 1; i <= search_state[0]; i++) { y = x + search_state[i]; x = y + 1; } - y = count_blobs (blobs) - 1; + y = count_blobs(word->chopped_word->blobs) - 1; // Initialize char_choices, expanded_fragment_lengths: // e.g. if fragment_lengths = {1 1 2 3 1}, @@ -388,12 +336,13 @@ BLOB_CHOICE_LIST_VECTOR *Wordrec::rebuild_current_state( bool state_has_fragments = false; const char *fragment_lengths = NULL; - if (best_choice.length() > 0) { - fragment_lengths = best_choice.fragment_lengths(); + if (word->best_choice->length() > 0) { + fragment_lengths = word->best_choice->fragment_lengths(); } if (fragment_lengths) { - for (int i = 0; i < best_choice.length(); ++i) { + for (int i = 0; i < word->best_choice->length(); ++i) { *char_choices += NULL; + word->best_state.push_back(0); if (fragment_lengths[i] > 1) { state_has_fragments = true; } @@ -402,40 +351,29 @@ BLOB_CHOICE_LIST_VECTOR *Wordrec::rebuild_current_state( } } } else { - for (i = 0; i <= search_state[0]; ++i) { + for (int i = 0; i <= search_state[0]; ++i) { expanded_fragment_lengths_str += (char)1; *char_choices += NULL; + word->best_state.push_back(0); } } - // Finish early if force_rebuld is false and there are no fragments to merge. - if (!force_rebuild && !state_has_fragments) { - delete char_choices; - memfree(search_state); - return old_choices; - } - // Set up variables for concatenating fragments. const char *word_lengths_ptr = NULL; const char *word_ptr = NULL; if (state_has_fragments) { // Make word_lengths_ptr point to the last element in // best_choice->unichar_lengths(). - word_lengths_ptr = best_choice.unichar_lengths().string(); + word_lengths_ptr = word->best_choice->unichar_lengths().string(); word_lengths_ptr += (strlen(word_lengths_ptr)-1); // Make word_str point to the beginning of the last // unichar in best_choice->unichar_string(). - word_ptr = best_choice.unichar_string().string(); + word_ptr = word->best_choice->unichar_string().string(); word_ptr += (strlen(word_ptr)-*word_lengths_ptr); } const char *expanded_fragment_lengths = expanded_fragment_lengths_str.string(); - bool merging_fragment = false; - int true_y = -1; char unichar[UNICHAR_LEN + 1]; - int fragment_pieces = -1; - float rating = 0.0; - float certainty = -MAX_FLOAT32; // Populate char_choices list such that it corresponds to search_state. // @@ -447,103 +385,80 @@ BLOB_CHOICE_LIST_VECTOR *Wordrec::rebuild_current_state( // added to the new choices list for each character composed from // fragments (even if the choice for the corresponding character appears // in the re-classified choices list of for the newly merged blob). - BLOB_CHOICE_IT temp_it; - int char_choices_index = char_choices->length() - 1; - for (i = search_state[0]; i >= 0; i--) { + int ss_index = search_state[0]; + // Which index is which? + // char_choices_index refers to the finished product: there is one for each + // blob/unicharset entry in the final word. + // ss_index refers to the search_state, and indexes a group (chunk) of blobs + // that were classified together for the best state. + // old_choice_index is a copy of ss_index, and accesses the old_choices, + // which correspond to chunks in the best state. old_choice_index gets + // set to -1 on a fragment set, as there is no corresponding chunk in + // the best state. + // x and y refer to the underlying blobs and are the first and last blob + // indices in a chunk. + for (int char_choices_index = char_choices->length() - 1; + char_choices_index >= 0; + --char_choices_index) { + // The start and end of the blob to rebuild. + int true_x = x; + int true_y = y; + // The fake merged fragment choice. + BLOB_CHOICE* merged_choice = NULL; + // Test for and combine fragments first. + int fragment_pieces = expanded_fragment_lengths[ss_index]; + int old_choice_index = ss_index; + + if (fragment_pieces > 1) { + strncpy(unichar, word_ptr, *word_lengths_ptr); + unichar[*word_lengths_ptr] = '\0'; + merged_choice = rebuild_fragments(unichar, expanded_fragment_lengths, + old_choice_index, old_choices); + old_choice_index = -1; + } + while (fragment_pieces > 0) { + true_x = x; + // Move left to the previous blob. + y = x - 1; + x = y - search_state[ss_index--]; + --fragment_pieces; + } + word->best_state[char_choices_index] = true_y + 1 - true_x; BLOB_CHOICE_LIST *current_choices = join_blobs_and_classify( - blobs, seam_list, x, y, fx, ratings, old_choices); - // Combine character fragments. - if (expanded_fragment_lengths[i] > 1) { - // Start merging character fragments. - if (!merging_fragment) { - merging_fragment = true; - true_y = y; - fragment_pieces = expanded_fragment_lengths[i]; - rating = 0.0; - certainty = -MAX_FLOAT32; - strncpy(unichar, word_ptr, *word_lengths_ptr); - unichar[*word_lengths_ptr] = '\0'; - } - // Take into account the fact that we could have joined pieces - // since we first recorded the ending point of a fragment (true_y). - true_y -= y - x; - // Populate fragment with updated values and look for the - // fragment with the same values in current_choices. - // Update rating and certainty of the character being composed. - fragment_pieces--; - CHAR_FRAGMENT fragment; - fragment.set_all(unichar, fragment_pieces, - expanded_fragment_lengths[i]); - temp_it.set_to_list(current_choices); - for (temp_it.mark_cycle_pt(); !temp_it.cycled_list(); - temp_it.forward()) { - const CHAR_FRAGMENT *current_fragment = - getDict().getUnicharset().get_fragment(temp_it.data()->unichar_id()); - if (current_fragment && fragment.equals(current_fragment)) { - rating += temp_it.data()->rating(); - if (temp_it.data()->certainty() > certainty) { - certainty = temp_it.data()->certainty(); - } - break; - } - } - assert(!temp_it.cycled_list()); // make sure we found the fragment - // Free current_choices for the fragmented character. - delete current_choices; - - // Finish composing character from fragments. - if (fragment_pieces == 0) { - // Populate current_choices with the classification of - // the blob merged from blobs of each character fragment. - current_choices = join_blobs_and_classify(blobs, seam_list, x, - true_y, fx, ratings, NULL); - BLOB_CHOICE *merged_choice = - new BLOB_CHOICE(getDict().getUnicharset().unichar_to_id(unichar), - rating, certainty, 0, NO_PERM); - - // Insert merged_blob into current_choices, such that current_choices - // are still sorted in non-descending order by rating. - ASSERT_HOST(!current_choices->empty()); - temp_it.set_to_list(current_choices); - for (temp_it.mark_cycle_pt(); - !temp_it.cycled_list() && - merged_choice->rating() > temp_it.data()->rating(); - temp_it.forward()); - temp_it.add_before_stay_put(merged_choice); - - // Done merging this fragmented character. - merging_fragment = false; - } + word, true_x, true_y, old_choice_index, ratings, old_choices); + if (merged_choice != NULL) { + // Insert merged_blob into current_choices, such that current_choices + // are still sorted in non-descending order by rating. + ASSERT_HOST(!current_choices->empty()); + BLOB_CHOICE_IT choice_it(current_choices); + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() && + merged_choice->rating() > choice_it.data()->rating(); + choice_it.forward()); + choice_it.add_before_stay_put(merged_choice); } - if (!merging_fragment) { - // Get rid of fragments in current_choices. - temp_it.set_to_list(current_choices); - for (temp_it.mark_cycle_pt(); !temp_it.cycled_list(); - temp_it.forward()) { - if (getDict().getUnicharset().get_fragment( - temp_it.data()->unichar_id())) { - delete temp_it.extract(); - } + // Get rid of fragments in current_choices. + BLOB_CHOICE_IT choice_it(current_choices); + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); + choice_it.forward()) { + if (getDict().getUnicharset().get_fragment( + choice_it.data()->unichar_id())) { + delete choice_it.extract(); } - char_choices->set(current_choices, char_choices_index); - char_choices_index--; + } + char_choices->set(current_choices, char_choices_index); - // Update word_ptr and word_lengths_ptr. - if (word_lengths_ptr != NULL && word_ptr != NULL) { - word_lengths_ptr--; - word_ptr -= (*word_lengths_ptr); - } + // Update word_ptr and word_lengths_ptr. + if (word_lengths_ptr != NULL && word_ptr != NULL) { + word_lengths_ptr--; + word_ptr -= (*word_lengths_ptr); } - y = x - 1; - x = y - search_state[i]; } old_choices->delete_data_pointers(); delete old_choices; memfree(search_state); - return (char_choices); + return char_choices; } -} // namespace tesseract /** @@ -553,7 +468,6 @@ BLOB_CHOICE_LIST_VECTOR *Wordrec::rebuild_current_state( * each one has not already been visited. If not add it to the priority * queue. */ -namespace tesseract { void Wordrec::expand_node(FLOAT32 worst_priority, CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) { @@ -576,13 +490,13 @@ void Wordrec::expand_node(FLOAT32 worst_priority, the_search->this_state->part1 = mask ^ old_state.part1; if (!hash_lookup (the_search->closed_states, the_search->this_state)) { FLOAT32 new_merit = prioritize_state(chunks_record, the_search); - if (segment_debug && permute_debug) { + if (wordrec_debug_level) { cprintf ("....checking state: %8.3f ", new_merit); print_state ("", the_search->this_state, num_joints); } if (new_merit < worst_priority) { - push_queue (the_search->open_states, the_search->this_state, - worst_priority, new_merit); + push_queue(the_search->open_states, the_search->this_state, + worst_priority, new_merit, wordrec_debug_level > 0); nodes_added++; } } @@ -601,38 +515,37 @@ void Wordrec::expand_node(FLOAT32 worst_priority, the_search->this_state->part2 = mask ^ old_state.part2; if (!hash_lookup (the_search->closed_states, the_search->this_state)) { FLOAT32 new_merit = prioritize_state(chunks_record, the_search); - if (segment_debug && permute_debug) { + if (wordrec_debug_level) { cprintf ("....checking state: %8.3f ", new_merit); print_state ("", the_search->this_state, num_joints); } if (new_merit < worst_priority) { push_queue(the_search->open_states, the_search->this_state, - worst_priority, new_merit); + worst_priority, new_merit, wordrec_debug_level > 0); nodes_added++; } } mask >>= 1; } } -} // namespace tesseract - /** * @name new_search * * Create and initialize a new search record. */ -SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record, - int num_joints, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, - STATE *state) { +SEARCH_RECORD *Wordrec::new_search(CHUNKS_RECORD *chunks_record, + int num_joints, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, + WERD_CHOICE *best_choice, + WERD_CHOICE *raw_choice, + STATE *state) { SEARCH_RECORD *this_search; this_search = (SEARCH_RECORD *) memalloc (sizeof (SEARCH_RECORD)); this_search->open_states = MakeHeap (wordrec_num_seg_states * 20); - this_search->closed_states = new_hash_table (); + this_search->closed_states = new_hash_table(); if (state) this_search->this_state = new_state (state); @@ -644,6 +557,7 @@ SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record, this_search->best_choice = best_choice; this_search->raw_choice = raw_choice; + this_search->best_char_choices = best_char_choices; this_search->num_joints = num_joints; this_search->num_states = 0; @@ -653,17 +567,16 @@ SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record, return (this_search); } - /** * @name pop_queue * * Get this state from the priority queue. It should be the state that * has the greatest urgency to be evaluated. */ -STATE *pop_queue(HEAP *queue) { +STATE *Wordrec::pop_queue(HEAP *queue) { HEAPENTRY entry; - if (GetTopOfHeap (queue, &entry) == OK) { + if (GetTopOfHeap (queue, &entry) == TESS_HEAP_OK) { #ifndef GRAPHICS_DISABLED if (wordrec_display_segmentations) { cprintf ("eval state: %8.3f ", entry.Key); @@ -677,23 +590,21 @@ STATE *pop_queue(HEAP *queue) { } } - /** * @name push_queue * * Add this state into the priority queue. */ -void push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority, - FLOAT32 priority) { +void Wordrec::push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority, + FLOAT32 priority, bool debug) { HEAPENTRY entry; if (priority < worst_priority) { if (SizeOfHeap (queue) >= MaxSizeOfHeap(queue)) { - if (segment_debug) tprintf("Heap is Full\n"); + if (debug) tprintf("Heap is Full\n"); return; } - if (segment_debug) - tprintf("\tpushing %d node %f\n", num_pushed, priority); + if (debug) tprintf("\tpushing %d node %f\n", num_pushed, priority); entry.Data = (char *) new_state (state); num_pushed++; entry.Key = priority; @@ -701,14 +612,14 @@ void push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority, } } - /** * @name replace_char_widths * * Replace the value of the char_width field in the chunks_record with * the updated width measurements from the last_segmentation. */ -void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state) { +void Wordrec::replace_char_widths(CHUNKS_RECORD *chunks_record, + SEARCH_STATE state) { WIDTH_RECORD *width_record; int num_blobs; int i; @@ -729,52 +640,97 @@ void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state) { chunks_record->char_widths = width_record; } -namespace tesseract { +// Creates a fake blob choice from the combination of the given fragments. +// unichar is the class to be made from the combination, +// expanded_fragment_lengths[choice_index] is the number of fragments to use. +// old_choices[choice_index] has the classifier output for each fragment. +// choice index initially indexes the last fragment and should be decremented +// expanded_fragment_lengths[choice_index] times to get the earlier fragments. +// Guarantees to return something non-null, or abort! +BLOB_CHOICE* Wordrec::rebuild_fragments( + const char* unichar, + const char* expanded_fragment_lengths, + int choice_index, + BLOB_CHOICE_LIST_VECTOR *old_choices) { + float rating = 0.0f; + float certainty = 0.0f; + for (int fragment_pieces = expanded_fragment_lengths[choice_index] - 1; + fragment_pieces >= 0; --fragment_pieces, --choice_index) { + // Get a pointer to the classifier results from the old_choices. + BLOB_CHOICE_LIST *current_choices = old_choices->get(choice_index); + // Populate fragment with updated values and look for the + // fragment with the same values in current_choices. + // Update rating and certainty of the character being composed. + CHAR_FRAGMENT fragment; + fragment.set_all(unichar, fragment_pieces, + expanded_fragment_lengths[choice_index]); + BLOB_CHOICE_IT choice_it(current_choices); + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); + choice_it.forward()) { + const CHAR_FRAGMENT *current_fragment = + getDict().getUnicharset().get_fragment( + choice_it.data()->unichar_id()); + if (current_fragment && fragment.equals(current_fragment)) { + rating += choice_it.data()->rating(); + if (choice_it.data()->certainty() < certainty) { + certainty = choice_it.data()->certainty(); + } + break; + } + } + if (choice_it.cycled_list()) { + print_ratings_list("Failure", current_choices, unicharset); + tprintf("Failed to find fragment %s at index=%d\n", + fragment.to_string().string(), choice_index); + } + ASSERT_HOST(!choice_it.cycled_list()); // Be sure we found the fragment. + } + return new BLOB_CHOICE(getDict().getUnicharset().unichar_to_id(unichar), + rating, certainty, -1, -1, 0); +} + +// Creates a joined copy of the blobs between x and y (inclusive) and +// inserts as the first blob at word->rebuild_word->blobs. +// Returns a deep copy of the classifier results for the blob. BLOB_CHOICE_LIST *Wordrec::join_blobs_and_classify( - TBLOB *blobs, SEAMS seam_list, - int x, int y, int fx, const MATRIX *ratings, + WERD_RES* word, int x, int y, int choice_index, MATRIX *ratings, BLOB_CHOICE_LIST_VECTOR *old_choices) { + // Join parts to make the blob if needed. + if (x != y) + join_pieces(word->chopped_word->blobs, word->seam_array, x, y); + TBLOB *blob = word->chopped_word->blobs; + for (int i = 0; i < x; i++) { + blob = blob->next; + } + // Deep copy this blob into the output word. + TBLOB* copy_blob = new TBLOB(*blob); + copy_blob->next = word->rebuild_word->blobs; + word->rebuild_word->blobs = copy_blob; + BLOB_CHOICE_LIST *choices = NULL; // First check to see if we can look up the classificaiton // in old_choices (if there is no need to merge blobs). - if (x == y && old_choices != NULL && ratings == NULL) { - choices = old_choices->get(x); - old_choices->set(NULL, x); - return choices; + if (choice_index >= 0 && old_choices != NULL) { + choices = old_choices->get(choice_index); + old_choices->set(NULL, choice_index); } - // The ratings matrix filled in by the associator will contain the most + // The ratings matrix filled in by the associator will contain the next most // up-to-date classification info. Thus we look up the classification there - // first, and only call classify_blob() if the classification is not found. - if (ratings != NULL) { - BLOB_CHOICE_LIST *choices_ptr = ratings->get(x, y); - if (choices_ptr != NOT_CLASSIFIED) { - choices = new BLOB_CHOICE_LIST(); - choices->deep_copy(choices_ptr, &BLOB_CHOICE::deep_copy); + // next, and only call classify_blob() if the classification is not found. + if (choices == NULL && ratings != NULL) { + choices = ratings->get(x, y); + if (choices != NOT_CLASSIFIED) { + ratings->put(x, y, NULL); } } - if (x != y) { - join_pieces(blobs, seam_list, x, y); - - int blobindex; // current blob - TBLOB *p_blob; - TBLOB *blob; - TBLOB *next_blob; - for (blob = blobs, blobindex = 0, p_blob = NULL; - blobindex < x; blobindex++) { - p_blob = blob; - blob = blob->next; - } - while (blobindex < y) { - next_blob = blob->next; - blob->next = next_blob->next; - oldblob(next_blob); // junk dead blobs - blobindex++; - } - if (choices == NULL) { - choices = classify_blob(p_blob, blob, blob->next, - NULL, "rebuild", Orange); - } + // Get the choices for the blob by classification if necessary. + if (choices == NULL) { + choices = classify_blob(blob, "rebuild", Orange); } + // Undo join_pieces to restore the chopped word to its fully chopped state. + if (x != y) + break_pieces(blob, word->seam_array, x, y); return choices; } + } // namespace tesseract diff --git a/wordrec/bestfirst.h b/wordrec/bestfirst.h index 2fdad72f1b..7750958d86 100644 --- a/wordrec/bestfirst.h +++ b/wordrec/bestfirst.h @@ -38,7 +38,6 @@ #include "seam.h" #include "states.h" #include "stopper.h" -#include "tessclas.h" /*---------------------------------------------------------------------- T y p e s @@ -56,39 +55,16 @@ struct SEARCH_RECORD float segcost_bias; WERD_CHOICE *best_choice; WERD_CHOICE *raw_choice; + BLOB_CHOICE_LIST_VECTOR *best_char_choices; }; -/*---------------------------------------------------------------------- - V a r i a b l e s ----------------------------------------------------------------------*/ -extern INT_VAR_H(wordrec_num_seg_states, 30, "Segmentation states"); - -extern double_VAR_H(wordrec_worst_state, 1, "Worst segmentation state"); - /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ int chunks_width(WIDTH_RECORD *width_record, int start_chunk, int last_chunk); int chunks_gap(WIDTH_RECORD *width_record, int last_chunk); - -void delete_search(SEARCH_RECORD *the_search); - -SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record, - int num_joints, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, - STATE *state); - STATE *pop_queue(HEAP *queue); - -void push_queue(HEAP *queue, STATE *state, - FLOAT32 worst_priority, FLOAT32 priority); - void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state); -// Joins blobs between index x and y, hides corresponding seams and -// returns classification of the resulting merged blob. -BLOB_CHOICE_LIST *join_blobs_and_classify(TBLOB *blobs, SEAMS seam_list, - int x, int y, int fx); #endif diff --git a/wordrec/chop.cpp b/wordrec/chop.cpp index 180762348b..bb9c3a8f31 100644 --- a/wordrec/chop.cpp +++ b/wordrec/chop.cpp @@ -30,10 +30,10 @@ #include "chop.h" #include "outlines.h" #include "olutil.h" -#include "tordvars.h" #include "callcpp.h" #include "plotedges.h" #include "const.h" +#include "wordrec.h" #include @@ -42,55 +42,7 @@ #include "config_auto.h" #endif -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -INT_VAR(chop_debug, 0, "Chop debug"); - -BOOL_VAR(chop_enable, 1, "Chop enable"); - -BOOL_VAR(chop_vertical_creep, 0, "Vertical creep"); - -INT_VAR(chop_split_length, 10000, "Split Length"); - -INT_VAR(chop_same_distance, 2, "Same distance"); - -INT_VAR(chop_min_outline_points, 6, "Min Number of Points on Outline"); - -INT_VAR(chop_inside_angle, -50, "Min Inside Angle Bend"); - -INT_VAR(chop_min_outline_area, 2000, "Min Outline Area"); - -double_VAR(chop_split_dist_knob, 0.5, "Split length adjustment"); - -double_VAR(chop_overlap_knob, 0.9, "Split overlap adjustment"); - -double_VAR(chop_center_knob, 0.15, "Split center adjustment"); - -double_VAR(chop_sharpness_knob, 0.06, "Split sharpness adjustment"); - -double_VAR(chop_width_change_knob, 5.0, "Width change adjustment"); - -double_VAR(chop_ok_split, 100.0, "OK split limit"); - -double_VAR(chop_good_split, 50.0, "Good split limit"); - -INT_VAR(chop_x_y_weight, 3, "X / Y length weight"); - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/** - * @name length_product - * - * Compute the product of the length of two vectors. The - * vectors must be of type POINT. This product is used in computing - * angles. - */ -#define length_product(p1,p2) \ -(sqrt ((((float) (p1).x * (p1).x + (float) (p1).y * (p1).y) * \ - ((float) (p2).x * (p2).x + (float) (p2).y * (p2).y)))) - +namespace tesseract { /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ @@ -100,8 +52,8 @@ INT_VAR(chop_x_y_weight, 3, "X / Y length weight"); * Assign a priority to and edge point that might be used as part of a * split. The argument should be of type EDGEPT. */ -PRIORITY point_priority(EDGEPT *point) { - return ((PRIORITY) point_bend_angle (point)); +PRIORITY Wordrec::point_priority(EDGEPT *point) { + return (PRIORITY)angle_change(point->prev, point, point->next); } @@ -110,7 +62,7 @@ PRIORITY point_priority(EDGEPT *point) { * * Add an edge point to a POINT_GROUP containg a list of other points. */ -void add_point_to_list(POINT_GROUP point_list, EDGEPT *point) { +void Wordrec::add_point_to_list(POINT_GROUP point_list, EDGEPT *point) { HEAPENTRY data; if (SizeOfHeap (point_list) < MAX_NUM_POINTS - 2) { @@ -132,7 +84,7 @@ void add_point_to_list(POINT_GROUP point_list, EDGEPT *point) { * Return the change in angle (degrees) of the line segments between * points one and two, and two and three. */ -int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { +int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { VECTOR vector1; VECTOR vector2; @@ -145,7 +97,7 @@ int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { vector2.x = point3->pos.x - point2->pos.x; vector2.y = point3->pos.y - point2->pos.y; /* Use cross product */ - length = length_product (vector1, vector2); + length = (float)sqrt((float)LENGTH(vector1) * LENGTH(vector2)); if ((int) length == 0) return (0); angle = static_cast(floor(asin(CROSS (vector1, vector2) / @@ -168,7 +120,7 @@ int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { * Return TRUE if one of the pieces resulting from this split would * less than some number of edge points. */ -int is_little_chunk(EDGEPT *point1, EDGEPT *point2) { +int Wordrec::is_little_chunk(EDGEPT *point1, EDGEPT *point2) { EDGEPT *p = point1; /* Iterator */ int counter = 0; @@ -203,7 +155,7 @@ int is_little_chunk(EDGEPT *point1, EDGEPT *point2) { * * Test the area defined by a split accross this outline. */ -int is_small_area(EDGEPT *point1, EDGEPT *point2) { +int Wordrec::is_small_area(EDGEPT *point1, EDGEPT *point2) { EDGEPT *p = point1->next; /* Iterator */ int area = 0; TPOINT origin; @@ -227,9 +179,9 @@ int is_small_area(EDGEPT *point1, EDGEPT *point2) { * Choose the edge point that is closest to the critical point. This * point may not be exactly vertical from the critical point. */ -EDGEPT *pick_close_point(EDGEPT *critical_point, - EDGEPT *vertical_point, - int *best_dist) { +EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point, + EDGEPT *vertical_point, + int *best_dist) { EDGEPT *best_point = NULL; int this_distance; int found_better; @@ -265,7 +217,7 @@ EDGEPT *pick_close_point(EDGEPT *critical_point, * each of these points assign a priority. Sort these points using a * heap structure so that they can be visited in order. */ -void prioritize_points(TESSLINE *outline, POINT_GROUP points) { +void Wordrec::prioritize_points(TESSLINE *outline, POINT_GROUP points) { EDGEPT *this_point; EDGEPT *local_min = NULL; EDGEPT *local_max = NULL; @@ -274,13 +226,6 @@ void prioritize_points(TESSLINE *outline, POINT_GROUP points) { local_min = this_point; local_max = this_point; do { - if (tord_debug_5) - cprintf ("(%3d,%3d) min=%3d, max=%3d, dir=%2d, ang=%2.0f\n", - this_point->pos.x, this_point->pos.y, - (local_min ? local_min->pos.y : 999), - (local_max ? local_max->pos.y : 999), - direction (this_point), point_priority (this_point)); - if (this_point->vec.y < 0) { /* Look for minima */ if (local_max != NULL) @@ -331,7 +276,7 @@ void prioritize_points(TESSLINE *outline, POINT_GROUP points) { * Return the new value for the local minimum. If a point is saved then * the local minimum is reset to NULL. */ -void new_min_point(EDGEPT *local_min, POINT_GROUP points) { +void Wordrec::new_min_point(EDGEPT *local_min, POINT_GROUP points) { inT16 dir; dir = direction (local_min); @@ -355,7 +300,7 @@ void new_min_point(EDGEPT *local_min, POINT_GROUP points) { * Return the new value for the local minimum. If a point is saved then * the local minimum is reset to NULL. */ -void new_max_point(EDGEPT *local_max, POINT_GROUP points) { +void Wordrec::new_max_point(EDGEPT *local_max, POINT_GROUP points) { inT16 dir; dir = direction (local_max); @@ -382,8 +327,8 @@ void new_max_point(EDGEPT *local_max, POINT_GROUP points) { * the split point. Ensure that the point being returned is not right * next to the split point. Return the edge point as a result. */ -void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, - EDGEPT** best_point) { +void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, + EDGEPT** best_point) { EDGEPT *p; /* Iterator */ EDGEPT *this_edgept; /* Iterator */ int x = split_point->pos.x; /* X value of vertical */ @@ -416,3 +361,5 @@ void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, } while (p != target_point); } + +} // namespace tesseract diff --git a/wordrec/chop.h b/wordrec/chop.h index d7447b32aa..7c159acf89 100644 --- a/wordrec/chop.h +++ b/wordrec/chop.h @@ -39,78 +39,4 @@ typedef HEAP *POINT_GROUP; typedef HEAP *SPLIT_GROUP; -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -extern INT_VAR_H(chop_debug, 0, "Chop debug"); - -extern BOOL_VAR_H(chop_enable, 1, "Chop enable"); - -extern BOOL_VAR_H(chop_vertical_creep, 0, "Vertical creep"); - -extern INT_VAR_H(chop_split_length, 10000, "Split Length"); - -extern INT_VAR_H(chop_same_distance, 2, "Same distance"); - -extern INT_VAR_H(chop_min_outline_points, 6, - "Min Number of Points on Outline"); - -extern INT_VAR_H(chop_inside_angle, -50, "Min Inside Angle Bend"); - -extern INT_VAR_H(chop_min_outline_area, 2000, "Min Outline Area"); - -extern double_VAR_H(chop_split_dist_knob, 0.5, "Split length adjustment"); - -extern double_VAR_H(chop_overlap_knob, 0.9, "Split overlap adjustment"); - -extern double_VAR_H(chop_center_knob, 0.15, "Split center adjustment"); - -extern double_VAR_H(chop_sharpness_knob, 0.06, "Split sharpness adjustment"); - -extern double_VAR_H(chop_width_change_knob, 5.0, "Width change adjustment"); - -extern double_VAR_H(chop_ok_split, 100.0, "OK split limit"); - -extern double_VAR_H(chop_good_split, 50.0, "Good split limit"); - -extern INT_VAR_H(chop_x_y_weight, 3, "X / Y length weight"); - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/** - * @name point_bend_angle - * - * Measure the angle of bend at this edge point. The argument should - * be of type EDGEPT. - */ -#define point_bend_angle(point) \ -(angle_change ((point)->prev, (point), (point)->next)) - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -PRIORITY point_priority(EDGEPT *point); - -void add_point_to_list(POINT_GROUP point_list, EDGEPT *point); - -int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3); - -int is_little_chunk(EDGEPT *point1, EDGEPT *point2); - -int is_small_area(EDGEPT *point1, EDGEPT *point2); - -EDGEPT *pick_close_point(EDGEPT *critical_point, - EDGEPT *vertical_point, - int *best_dist); - -void prioritize_points(TESSLINE *outline, POINT_GROUP points); - -void new_min_point(EDGEPT *local_min, POINT_GROUP points); - -void new_max_point(EDGEPT *local_max, POINT_GROUP points); - -void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, - EDGEPT** best_point); - #endif diff --git a/wordrec/chopper.cpp b/wordrec/chopper.cpp index 7ece7bf037..5d27e55ddc 100644 --- a/wordrec/chopper.cpp +++ b/wordrec/chopper.cpp @@ -34,20 +34,18 @@ #include "assert.h" #include "associate.h" #include "callcpp.h" -#include "choices.h" #include "const.h" #include "findseam.h" #include "freelist.h" #include "globals.h" #include "makechop.h" -#include "metrics.h" #include "render.h" +#include "pageres.h" #include "permute.h" #include "pieces.h" #include "seam.h" #include "stopper.h" #include "structures.h" -#include "tordvars.h" #include "unicharset.h" #include "wordclass.h" #include "wordrec.h" @@ -57,14 +55,6 @@ #include "config_auto.h" #endif -INT_VAR (repair_unchopped_blobs, 1, "Fix blobs that aren't chopped"); - -//?extern int tessedit_dangambigs_chop; -double_VAR(tessedit_certainty_threshold, -2.25, "Good blob limit"); - -BOOL_VAR(fragments_guide_chopper, FALSE, - "Use information from fragments to guide chopping process"); - /*---------------------------------------------------------------------- M a c r o s ----------------------------------------------------------------------*/ @@ -110,8 +100,6 @@ void preserve_outline_tree(TESSLINE *srcline) { for (outline = srcline; outline != NULL; outline = outline->next) { preserve_outline (outline->loop); } - if (srcline != NULL && srcline->child != NULL) - preserve_outline_tree (srcline->child); } @@ -143,7 +131,7 @@ EDGEPT *restore_outline(EDGEPT *start) { deadpt->prev->next = srcpt; deadpt->prev->vec.x = srcpt->pos.x - deadpt->prev->pos.x; deadpt->prev->vec.y = srcpt->pos.y - deadpt->prev->pos.y; - oldedgept(deadpt); + delete deadpt; } else srcpt = srcpt->next; @@ -161,8 +149,6 @@ void restore_outline_tree(TESSLINE *srcline) { outline->loop = restore_outline (outline->loop); outline->start = outline->loop->pos; } - if (srcline != NULL && srcline->child != NULL) - restore_outline_tree (srcline->child); } @@ -172,7 +158,9 @@ void restore_outline_tree(TESSLINE *srcline) { * Try to split the this blob after this one. Check to make sure that * it was successful. */ -SEAM *attempt_blob_chop(TWERD *word, inT32 blob_number, SEAMS seam_list) { +namespace tesseract { +SEAM *Wordrec::attempt_blob_chop(TWERD *word, inT32 blob_number, + bool italic_blob, SEAMS seam_list) { TBLOB *blob; TBLOB *other_blob; SEAM *seam; @@ -180,11 +168,6 @@ SEAM *attempt_blob_chop(TWERD *word, inT32 blob_number, SEAMS seam_list) { TBLOB *next_blob; inT16 x; - if (first_pass) - chops_attempted1++; - else - chops_attempted2++; - last_blob = NULL; blob = word->blobs; for (x = 0; x < blob_number; x++) { @@ -195,12 +178,19 @@ SEAM *attempt_blob_chop(TWERD *word, inT32 blob_number, SEAMS seam_list) { if (repair_unchopped_blobs) preserve_outline_tree (blob->outlines); - other_blob = newblob (); /* Make new blob */ + other_blob = new TBLOB; /* Make new blob */ other_blob->next = blob->next; other_blob->outlines = NULL; blob->next = other_blob; - seam = pick_good_seam (blob); + seam = pick_good_seam(blob); + if (seam == NULL && word->latin_script) { + // If the blob can simply be divided into outlines, then do that. + TPOINT location; + if (divisible_blob(blob, italic_blob, &location)) { + seam = new_seam(0.0f, location, NULL, NULL, NULL); + } + } if (chop_debug) { if (seam != NULL) { print_seam ("Good seam picked=", seam); @@ -209,7 +199,7 @@ SEAM *attempt_blob_chop(TWERD *word, inT32 blob_number, SEAMS seam_list) { cprintf ("\n** no seam picked *** \n"); } if (seam) { - apply_seam(blob, other_blob, seam); + apply_seam(blob, other_blob, italic_blob, seam); } if ((seam == NULL) || @@ -233,9 +223,8 @@ SEAM *attempt_blob_chop(TWERD *word, inT32 blob_number, SEAMS seam_list) { cprintf ("\n** seam being removed ** \n"); } #endif - } - else { - oldblob(other_blob); + } else { + delete other_blob; } if (repair_unchopped_blobs) @@ -244,6 +233,7 @@ SEAM *attempt_blob_chop(TWERD *word, inT32 blob_number, SEAMS seam_list) { } return (seam); } +} // namespace tesseract /** @@ -296,12 +286,10 @@ namespace tesseract { */ bool Wordrec::improve_one_blob(TWERD *word, BLOB_CHOICE_LIST_VECTOR *char_choices, - int fx, inT32 *blob_number, SEAMS *seam_list, DANGERR *fixpt, bool split_next_to_fragment) { - TBLOB *pblob; TBLOB *blob; inT16 x = 0; float rating_ceiling = MAX_FLOAT32; @@ -317,7 +305,8 @@ bool Wordrec::improve_one_blob(TWERD *word, if (*blob_number == -1) return false; - seam = attempt_blob_chop (word, *blob_number, *seam_list); + // TODO(rays) it may eventually help to allow italic_blob to be true, + seam = attempt_blob_chop (word, *blob_number, false, *seam_list); if (seam != NULL) break; /* Must split null blobs */ @@ -326,10 +315,9 @@ bool Wordrec::improve_one_blob(TWERD *word, return false; answer_it.set_to_list(answer); rating_ceiling = answer_it.data()->rating(); // try a different blob - } while (!tord_blob_skip); + } while (true); /* Split OK */ - for (blob = word->blobs, pblob = NULL; x < *blob_number; x++) { - pblob = blob; + for (blob = word->blobs; x < *blob_number; x++) { blob = blob->next; } @@ -338,11 +326,10 @@ bool Wordrec::improve_one_blob(TWERD *word, delete char_choices->get(*blob_number); - answer = classify_blob(pblob, blob, blob->next, NULL, "improve 1:", Red); + answer = classify_blob(blob, "improve 1:", Red); char_choices->insert(answer, *blob_number); - answer = classify_blob(blob, blob->next, blob->next->next, NULL, - "improve 2:", Yellow); + answer = classify_blob(blob->next, "improve 2:", Yellow); char_choices->set(answer, *blob_number + 1); return true; @@ -365,13 +352,17 @@ void Wordrec::modify_blob_choice(BLOB_CHOICE_LIST *answer, chop_index_string[1] = '\0'; } UNICHAR_ID unichar_id = unicharset.unichar_to_id(chop_index_string); - ASSERT_HOST(unichar_id!=INVALID_UNICHAR_ID); + if (unichar_id == INVALID_UNICHAR_ID) { + // If the word is very long, we might exhaust the possibilities. + unichar_id = 1; + } BLOB_CHOICE_IT answer_it(answer); BLOB_CHOICE *modified_blob = new BLOB_CHOICE(unichar_id, - answer_it.data()->rating(), - answer_it.data()->certainty(), - answer_it.data()->config(), - answer_it.data()->script_id()); + answer_it.data()->rating(), + answer_it.data()->certainty(), + answer_it.data()->config(), + answer_it.data()->config2(), + answer_it.data()->script_id()); answer->clear(); answer_it.set_to_list(answer); answer_it.add_after_then_move(modified_blob); @@ -389,7 +380,6 @@ bool Wordrec::chop_one_blob(TWERD *word, inT32 *blob_number, SEAMS *seam_list, int *right_chop_index) { - TBLOB *pblob; TBLOB *blob; inT16 x = 0; float rating_ceiling = MAX_FLOAT32; @@ -400,13 +390,12 @@ bool Wordrec::chop_one_blob(TWERD *word, int left_chop_index = 0; do { - *blob_number = select_blob_to_split(*char_choices, rating_ceiling, - false); + *blob_number = select_blob_to_split(*char_choices, rating_ceiling, false); if (chop_debug) cprintf("blob_number = %d\n", *blob_number); if (*blob_number == -1) return false; - seam = attempt_blob_chop(word, *blob_number, *seam_list); + seam = attempt_blob_chop(word, *blob_number, true, *seam_list); if (seam != NULL) break; /* Must split null blobs */ @@ -415,28 +404,32 @@ bool Wordrec::chop_one_blob(TWERD *word, return false; answer_it.set_to_list(answer); rating_ceiling = answer_it.data()->rating(); // try a different blob - } while (!tord_blob_skip); + } while (true); /* Split OK */ - for (blob = word->blobs, pblob = NULL; x < *blob_number; x++) { - pblob = blob; + for (blob = word->blobs; x < *blob_number; x++) { blob = blob->next; } - *seam_list = - insert_seam(*seam_list, *blob_number, seam, blob, word->blobs); + if (chop_debug) { + tprintf("Chop made blob1:"); + blob->bounding_box().print(); + tprintf("and blob2:"); + blob->next->bounding_box().print(); + } + *seam_list = insert_seam(*seam_list, *blob_number, seam, blob, word->blobs); answer = char_choices->get(*blob_number); answer_it.set_to_list(answer); unichar_id = answer_it.data()->unichar_id(); + float rating = answer_it.data()->rating() / exp(1.0); left_chop_index = atoi(unicharset.id_to_unichar(unichar_id)); delete char_choices->get(*blob_number); // combine confidence w/ serial # - answer = classify_blob(pblob, blob, blob->next, NULL, "improve 1:", Red); + answer = fake_classify_blob(0, rating, -rating); modify_blob_choice(answer, left_chop_index); char_choices->insert(answer, *blob_number); - answer = classify_blob(blob, blob->next, blob->next->next, NULL, - "improve 2:", Yellow); + answer = fake_classify_blob(0, rating - 0.125f, -rating); modify_blob_choice(answer, ++*right_chop_index); char_choices->set(answer, *blob_number + 1); return true; @@ -495,112 +488,82 @@ namespace tesseract { * a good answer has been found or all the blobs have been chopped up * enough. Return the word level ratings. */ -BLOB_CHOICE_LIST_VECTOR *Wordrec::chop_word_main(register TWERD *word, - int fx, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, - BOOL8 tester, - BOOL8 trainer) { - TBLOB *pblob; +BLOB_CHOICE_LIST_VECTOR *Wordrec::chop_word_main(WERD_RES *word) { TBLOB *blob; int index; int did_chopping; - float rating_limit = 1000.0; STATE state; - SEAMS seam_list = start_seam_list(word->blobs); BLOB_CHOICE_LIST *match_result; MATRIX *ratings = NULL; DANGERR fixpt; /*dangerous ambig */ - inT32 state_count; //no of states inT32 bit_count; //no of bits - static STATE best_state; - static STATE chop_states[64]; //in between states - state_count = 0; - best_choice->make_bad(); - raw_choice->make_bad(); + set_denorm(&word->denorm); BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR(); + BLOB_CHOICE_LIST_VECTOR *best_char_choices = new BLOB_CHOICE_LIST_VECTOR(); did_chopping = 0; - for (blob = word->blobs, pblob = NULL, index = 0; + for (blob = word->chopped_word->blobs, index = 0; blob != NULL; blob = blob->next, index++) { - match_result = classify_blob(pblob, blob, blob->next, NULL, - "chop_word:", Green); + match_result = classify_blob(blob, "chop_word:", Green); if (match_result == NULL) cprintf("Null classifier output!\n"); *char_choices += match_result; - pblob = blob; } bit_count = index - 1; - getDict().permute_characters(*char_choices, rating_limit, - best_choice, raw_choice); set_n_ones(&state, char_choices->length() - 1); - if (matcher_fp != NULL) { - bits_in_states = bit_count; - chop_states[state_count] = state; - state_count++; - } + bool acceptable = false; bool replaced = false; - if (!getDict().AcceptableChoice(char_choices, best_choice, *raw_choice, - &fixpt, CHOPPER_CALLER, &replaced) || - ((tester || trainer) && - strcmp(word->correct, best_choice->unichar_string().string()))) { - if (replaced) update_blob_classifications(word, *char_choices); + bool best_choice_updated = + getDict().permute_characters(*char_choices, word->best_choice, + word->raw_choice); + if (best_choice_updated && + getDict().AcceptableChoice(char_choices, word->best_choice, &fixpt, + CHOPPER_CALLER, &replaced)) { + acceptable = true; + } + if (replaced) + update_blob_classifications(word->chopped_word, *char_choices); + CopyCharChoices(*char_choices, best_char_choices); + if (!acceptable) { // do more work to find a better choice did_chopping = 1; - if (first_pass) - words_chopped1++; - else - words_chopped2++; + bool best_choice_acceptable = false; if (chop_enable) improve_by_chopping(word, char_choices, - fx, &state, - best_choice, - raw_choice, - &seam_list, + best_char_choices, &fixpt, - chop_states, - &state_count); + &best_choice_acceptable); if (chop_debug) - print_seams ("Final seam list:", seam_list); + print_seams ("Final seam list:", word->seam_array); // The force_word_assoc is almost redundant to enable_assoc. However, // it is not conditioned on the dict behavior. For CJK, we need to force // the associator to be invoked. When we figure out the exact behavior // of dict on CJK, we can remove the flag if it turns out to be redundant. - if ((wordrec_enable_assoc && - !getDict().AcceptableChoice(char_choices, best_choice, *raw_choice, - NULL, CHOPPER_CALLER, &replaced)) || - force_word_assoc || - ((tester || trainer) && - strcmp(word->correct, best_choice->unichar_string().string()))) { - ratings = word_associator (word->blobs, seam_list, &state, fx, - best_choice, raw_choice, word->correct, - /*0, */ &fixpt, &best_state); + if ((wordrec_enable_assoc && !best_choice_acceptable) || force_word_assoc) { + ratings = word_associator(word, &state, best_char_choices, + &fixpt, &state); } - bits_in_states = bit_count + state_count - 1; } - if (replaced) update_blob_classifications(word, *char_choices); - - char_choices = - rebuild_current_state(word->blobs, seam_list, &state, char_choices, fx, - (did_chopping || tester || trainer), *best_choice, - ratings); - + best_char_choices = rebuild_current_state(word, &state, best_char_choices, + ratings); if (ratings != NULL) { + if (wordrec_debug_level > 0) { + tprintf("Final Ratings Matrix:\n"); + ratings->print(getDict().getUnicharset()); + } ratings->delete_matrix_pointers(); delete ratings; } - if (seam_list != NULL) - free_seam_list(seam_list); - if (matcher_fp != NULL) { - best_state = state; - } getDict().FilterWordChoices(); - return char_choices; + char_choices->delete_data_pointers(); + delete char_choices; + + return best_char_choices; } @@ -614,36 +577,30 @@ BLOB_CHOICE_LIST_VECTOR *Wordrec::chop_word_main(register TWERD *word, * all the splitting has been accomplished all the ratings memory is * reclaimed. */ -void Wordrec::improve_by_chopping(register TWERD *word, +void Wordrec::improve_by_chopping(WERD_RES *word, BLOB_CHOICE_LIST_VECTOR *char_choices, - int fx, STATE *best_state, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, - SEAMS *seam_list, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, - STATE *chop_states, - inT32 *state_count) { + bool *best_choice_acceptable) { inT32 blob_number; - inT32 index; //to states float old_best; int fixpt_valid = 1; - static inT32 old_count; //from pass1 - bool replaced = false; + bool updated_best_choice = false; - do { // improvement loop - if (replaced) update_blob_classifications(word, *char_choices); - if (!fixpt_valid) - fixpt->index = -1; - old_best = best_choice->rating(); - if (improve_one_blob(word, char_choices, fx, &blob_number, seam_list, + while (1) { // improvement loop + if (!fixpt_valid) fixpt->clear(); + old_best = word->best_choice->rating(); + if (improve_one_blob(word->chopped_word, char_choices, + &blob_number, &word->seam_array, fixpt, (fragments_guide_chopper && - best_choice->fragment_mark()))) { + word->best_choice->fragment_mark()))) { getDict().LogNewSplit(blob_number); - getDict().permute_characters(*char_choices, best_choice->rating(), - best_choice, raw_choice); + updated_best_choice = + getDict().permute_characters(*char_choices, word->best_choice, + word->raw_choice); - if (old_best > best_choice->rating()) { + if (old_best > word->best_choice->rating()) { set_n_ones(best_state, char_choices->length() - 1); fixpt_valid = 1; } @@ -651,32 +608,30 @@ void Wordrec::improve_by_chopping(register TWERD *word, insert_new_chunk(best_state, blob_number, char_choices->length() - 2); fixpt_valid = 0; } - if (*state_count > 0) { - for (index = 0; index < *state_count; index++) { - insert_new_chunk(&chop_states[index], blob_number, - char_choices->length() - 2); - } - set_n_ones(&chop_states[index], char_choices->length() - 1); - (*state_count)++; - } if (chop_debug) - print_state ("best state = ", - best_state, count_blobs (word->blobs) - 1); - if (first_pass) - chops_performed1++; - else - chops_performed2++; + print_state("best state = ", + best_state, count_blobs(word->chopped_word->blobs) - 1); } else { break; } - } while (!getDict().AcceptableChoice(char_choices, best_choice, *raw_choice, - fixpt, CHOPPER_CALLER, &replaced) && - !tord_blob_skip && char_choices->length() < MAX_NUM_CHUNKS); - if (replaced) update_blob_classifications(word, *char_choices); - old_count = *state_count; - if (!fixpt_valid) - fixpt->index = -1; + + // Check if we should break from the loop. + bool done = false; + bool replaced = false; + if ((updated_best_choice && + (*best_choice_acceptable = + getDict().AcceptableChoice(char_choices, word->best_choice, + fixpt, CHOPPER_CALLER, &replaced))) || + char_choices->length() >= MAX_NUM_CHUNKS) { + done = true; + } + if (replaced) update_blob_classifications(word->chopped_word, + *char_choices); + if (updated_best_choice) CopyCharChoices(*char_choices, best_char_choices); + if (done) break; + } + if (!fixpt_valid) fixpt->clear(); } @@ -778,37 +733,6 @@ inT16 Wordrec::select_blob_to_split(const BLOB_CHOICE_LIST_VECTOR &char_choices, } // namespace tesseract -/********************************************************************** - * start_seam_list - * - * Initialize a list of seams that match the original number of blobs - * present in the starting segmentation. Each of the seams created - * by this routine have location information only. - **********************************************************************/ -SEAMS start_seam_list(TBLOB *blobs) { - TBLOB *blob; - SEAMS seam_list; - TPOINT topleft; - TPOINT botright; - int location; - /* Seam slot per char */ - seam_list = new_seam_list (); - - for (blob = blobs; blob->next != NULL; blob = blob->next) { - - blob_bounding_box(blob, &topleft, &botright); - location = botright.x; - blob_bounding_box (blob->next, &topleft, &botright); - location += topleft.x; - location /= 2; - - seam_list = add_seam (seam_list, - new_seam (0.0, location, NULL, NULL, NULL)); - } - - return (seam_list); -} - /********************************************************************** * total_containment @@ -837,13 +761,9 @@ inT16 total_containment(TBLOB *blob1, TBLOB *blob2) { * until a good answer is found or all the possibilities have been tried. **********************************************************************/ namespace tesseract { -MATRIX *Wordrec::word_associator(TBLOB *blobs, - SEAMS seams, +MATRIX *Wordrec::word_associator(WERD_RES *word, STATE *state, - int fxid, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, - char *correct, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, STATE *best_state) { CHUNKS_RECORD chunks_record; @@ -852,18 +772,18 @@ MATRIX *Wordrec::word_associator(TBLOB *blobs, int num_chunks; BLOB_CHOICE_IT blob_choice_it; - num_chunks = array_count (seams) + 1; + num_chunks = array_count(word->seam_array) + 1; + TBLOB* blobs = word->chopped_word->blobs; chunks_record.chunks = blobs; - chunks_record.splits = seams; + chunks_record.splits = word->seam_array; chunks_record.ratings = record_piece_ratings (blobs); chunks_record.char_widths = blobs_widths (blobs); chunks_record.chunk_widths = blobs_widths (blobs); - chunks_record.fx = fxid; /* Save chunk weights */ for (x = 0; x < num_chunks; x++) { BLOB_CHOICE_LIST* choices = get_piece_rating(chunks_record.ratings, - blobs, seams, x, x); + blobs, word->seam_array, x, x); blob_choice_it.set_to_list(choices); //This is done by Jetsoft. Divide by zero is possible. if (blob_choice_it.data()->certainty() == 0) { @@ -879,17 +799,16 @@ MATRIX *Wordrec::word_associator(TBLOB *blobs, if (chop_debug) chunks_record.ratings->print(getDict().getUnicharset()); - best_first_search(&chunks_record, - best_choice, - raw_choice, - state, - fixpt, - best_state); + if (enable_new_segsearch) { + SegSearch(&chunks_record, word->best_choice, + best_char_choices, word->raw_choice, state); + } else { + best_first_search(&chunks_record, best_char_choices, word, + state, fixpt, best_state); + } free_widths (chunks_record.chunk_widths); free_widths (chunks_record.char_widths); return chunks_record.ratings; } } // namespace tesseract - - diff --git a/wordrec/chopper.h b/wordrec/chopper.h index 921ea9eb90..1ff1a13517 100644 --- a/wordrec/chopper.h +++ b/wordrec/chopper.h @@ -26,26 +26,13 @@ #ifndef CHOPPER_H #define CHOPPER_H -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ #include "cutil.h" #include "matrix.h" #include "seam.h" #include "states.h" #include "stopper.h" -/*--------------------------------------------------------------------------- - Variables ----------------------------------------------------------------------------*/ -extern BOOL_VAR_H (fragments_guide_chopper, FALSE, - "Use information from fragments to guide chopping process"); - - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ void preserve_outline(EDGEPT *start); void preserve_outline_tree(TESSLINE *srcline); @@ -54,15 +41,11 @@ EDGEPT *restore_outline(EDGEPT *start); void restore_outline_tree(TESSLINE *srcline); -SEAM *attempt_blob_chop(TWERD *word, inT32 blob_number, SEAMS seam_list); - int any_shared_split_points(SEAMS seam_list, SEAM *seam); int check_blob(TBLOB *blob); inT16 check_seam_order(TBLOB *blob, SEAM *seam); -SEAMS start_seam_list(TBLOB *blobs); - inT16 total_containment(TBLOB *blob1, TBLOB *blob2); #endif diff --git a/wordrec/closed.cpp b/wordrec/closed.cpp index 0fe14723b4..86c684d893 100644 --- a/wordrec/closed.cpp +++ b/wordrec/closed.cpp @@ -29,7 +29,6 @@ #include "closed.h" #include "cutil.h" #include "callcpp.h" -//#include #ifdef __UNIX__ #include #endif @@ -38,7 +37,6 @@ V a r i a b l e s ----------------------------------------------------------------------*/ #define TABLE_SIZE 2000 -HASH_TABLE global_hash = NULL; /*---------------------------------------------------------------------- F u n c t i o n s @@ -49,7 +47,7 @@ HASH_TABLE global_hash = NULL; * Look in the hash table for a particular value. If it is not there * then add it. */ -int hash_add(HASH_TABLE state_table, STATE *state) { +int hash_add(HASH_TABLE state_table, STATE *state) { int x; int i = 0; int table_limit = TABLE_SIZE; @@ -73,8 +71,8 @@ int hash_add(HASH_TABLE state_table, STATE *state) { x = 0; } cprintf("warning: hash table is full"); - - abort(); + + abort(); return 0; } @@ -85,7 +83,7 @@ int hash_add(HASH_TABLE state_table, STATE *state) { * Look in the hash table for a particular value. If the value is there * then return TRUE, FALSE otherwise. */ -int hash_lookup(HASH_TABLE state_table, STATE *state) { +int hash_lookup(HASH_TABLE state_table, STATE *state) { int x; int i = 0; int table_limit = TABLE_SIZE; @@ -109,7 +107,7 @@ int hash_lookup(HASH_TABLE state_table, STATE *state) { } cprintf ("warning: fell off end of hash table (%x) %x\n", state->part2, state->part2 % table_limit); - abort(); + abort(); return 0; } @@ -119,15 +117,11 @@ int hash_lookup(HASH_TABLE state_table, STATE *state) { * * Create and initialize a hash table. */ -HASH_TABLE new_hash_table() { +HASH_TABLE new_hash_table() { HASH_TABLE ht; int x; - if (global_hash == NULL) - ht = (HASH_TABLE) memalloc (TABLE_SIZE * sizeof (STATE)); - else - ht = global_hash; - + ht = (HASH_TABLE) memalloc (TABLE_SIZE * sizeof (STATE)); for (x = 0; x < TABLE_SIZE; x++) { ht[x].part1 = NO_STATE; ht[x].part2 = NO_STATE; diff --git a/wordrec/closed.h b/wordrec/closed.h index d5ed68d972..97851dc2af 100644 --- a/wordrec/closed.h +++ b/wordrec/closed.h @@ -26,40 +26,18 @@ #ifndef CLOSED_H #define CLOSED_H -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ #include #include "states.h" -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ typedef STATE *HASH_TABLE; #define NO_STATE ~0 -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -extern HASH_TABLE global_hash; +#define free_hash_table(table) memfree(table) -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/** - * free_hash_table - * - * Free the memory taken by a state variable. - */ -#define free_hash_table(table) \ - global_hash = table +int hash_add(HASH_TABLE state_table, STATE *state); -/*--------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -int hash_add(HASH_TABLE state_table, STATE *state); +int hash_lookup(HASH_TABLE state_table, STATE *state); -int hash_lookup(HASH_TABLE state_table, STATE *state); +HASH_TABLE new_hash_table(); -HASH_TABLE new_hash_table(); #endif diff --git a/wordrec/drawfx.cpp b/wordrec/drawfx.cpp index 97933aa9b3..18d595c36f 100644 --- a/wordrec/drawfx.cpp +++ b/wordrec/drawfx.cpp @@ -35,7 +35,7 @@ #define FXDEMOYSIZE 256 #define BLN_MAX 512 //max coord for bln #define WERDWIDTH (BLN_MAX*20) -#define DECENT_WERD_WIDTH (5*bln_x_height) +#define DECENT_WERD_WIDTH (5*kBlnXHeight) //title of window #define DEBUG_WIN_NAME "FXDebug" #define DEBUG_XPOS 0 @@ -75,8 +75,9 @@ void create_fx_win() { //make features win void clear_fx_win() { //make features win fx_win->Clear(); fx_win->Pen(64,64,64); - fx_win->Line(-WERDWIDTH, bln_baseline_offset, WERDWIDTH, bln_baseline_offset); - fx_win->Line(-WERDWIDTH, bln_x_height + bln_baseline_offset, WERDWIDTH, bln_x_height + bln_baseline_offset); + fx_win->Line(-WERDWIDTH, kBlnBaselineOffset, WERDWIDTH, kBlnBaselineOffset); + fx_win->Line(-WERDWIDTH, kBlnXHeight + kBlnBaselineOffset, WERDWIDTH, + kBlnXHeight + kBlnBaselineOffset); } #endif // GRAPHICS_DISABLED diff --git a/wordrec/drawfx.h b/wordrec/drawfx.h index e9c727c407..d35537c482 100644 --- a/wordrec/drawfx.h +++ b/wordrec/drawfx.h @@ -20,7 +20,7 @@ #ifndef DRAWFX_H #define DRAWFX_H -#include "varable.h" +#include "params.h" #include "notdll.h" #include "scrollview.h" diff --git a/wordrec/findseam.cpp b/wordrec/findseam.cpp index abbdaa3eff..d691c66558 100644 --- a/wordrec/findseam.cpp +++ b/wordrec/findseam.cpp @@ -32,6 +32,7 @@ #include "outlines.h" #include "freelist.h" #include "seam.h" +#include "wordrec.h" // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H @@ -119,7 +120,7 @@ if (seam)\ **********************************************************************/ #define pop_next_seam(seams,seam,priority) \ -(HeapPop (seams,&priority,&seam) == OK) \ +(HeapPop (seams,&priority,&seam) == TESS_HEAP_OK) \ /********************************************************************** @@ -137,12 +138,15 @@ if (seam)\ /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ +namespace tesseract { + /********************************************************************** * junk_worst_seam * * Delete the worst seam from the queue because it is full. **********************************************************************/ -void junk_worst_seam(SEAM_QUEUE seams, SEAM *new_seam, float new_priority) { +void Wordrec::junk_worst_seam(SEAM_QUEUE seams, SEAM *new_seam, + float new_priority) { SEAM *seam; float priority; @@ -171,12 +175,12 @@ void junk_worst_seam(SEAM_QUEUE seams, SEAM *new_seam, float new_priority) { * a split of NULL, then no further splits can be supplied by the * caller. **********************************************************************/ -void choose_best_seam(SEAM_QUEUE seam_queue, - SEAM_PILE *seam_pile, - SPLIT *split, - PRIORITY priority, - SEAM **seam_result, - TBLOB *blob) { +void Wordrec::choose_best_seam(SEAM_QUEUE seam_queue, + SEAM_PILE *seam_pile, + SPLIT *split, + PRIORITY priority, + SEAM **seam_result, + TBLOB *blob) { SEAM *seam; TPOINT topleft; TPOINT botright; @@ -185,9 +189,10 @@ void choose_best_seam(SEAM_QUEUE seam_queue, /* Add seam of split */ my_priority = priority; if (split != NULL) { - seam = new_seam (my_priority, - (split->point1->pos.x + split->point1->pos.x) / 2, - split, NULL, NULL); + TPOINT split_point = split->point1->pos; + split_point += split->point2->pos; + split_point /= 2; + seam = new_seam(my_priority, split_point, split, NULL, NULL); if (chop_debug > 1) print_seam ("Partial priority ", seam); add_seam_to_queue (seam_queue, seam, (float) my_priority); @@ -253,7 +258,8 @@ void choose_best_seam(SEAM_QUEUE seam_queue, * from this union should be added to the seam queue. The return value * tells whether or not any additional seams were added to the queue. **********************************************************************/ -void combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, SEAM *seam) { +void Wordrec::combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, + SEAM *seam) { register inT16 x; register inT16 dist; inT16 bottom1, top1; @@ -284,7 +290,7 @@ void combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, SEAM *seam) { } array_loop(seam_pile, x) { this_one = (SEAM *) array_value (seam_pile, x); - dist = seam->location - this_one->location; + dist = seam->location.x - this_one->location.x; if (-SPLIT_CLOSENESS < dist && dist < SPLIT_CLOSENESS && seam->priority + this_one->priority < chop_ok_split) { @@ -336,7 +342,7 @@ void combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, SEAM *seam) { * Constrain this split to obey certain rules. It must not cross any * inner outline. It must not cut off a small chunk of the outline. **********************************************************************/ -inT16 constrained_split(SPLIT *split, TBLOB *blob) { +inT16 Wordrec::constrained_split(SPLIT *split, TBLOB *blob) { TESSLINE *outline; if (is_little_chunk (split->point1, split->point2)) @@ -358,7 +364,7 @@ inT16 constrained_split(SPLIT *split, TBLOB *blob) { * Delete the seams that are held in the seam pile. Destroy the splits * that are referenced by these seams. **********************************************************************/ -void delete_seam_pile(SEAM_PILE seam_pile) { +void Wordrec::delete_seam_pile(SEAM_PILE seam_pile) { inT16 x; array_loop(seam_pile, x) { @@ -367,14 +373,13 @@ void delete_seam_pile(SEAM_PILE seam_pile) { array_free(seam_pile); } - /********************************************************************** * pick_good_seam * * Find and return a good seam that will split this blob into two pieces. * Work from the outlines provided. **********************************************************************/ -SEAM *pick_good_seam(TBLOB *blob) { +SEAM *Wordrec::pick_good_seam(TBLOB *blob) { SEAM_QUEUE seam_queue; SEAM_PILE seam_pile; POINT_GROUP point_heap; @@ -396,7 +401,7 @@ SEAM *pick_good_seam(TBLOB *blob) { for (outline = blob->outlines; outline; outline = outline->next) prioritize_points(outline, point_heap); - while (HeapPop (point_heap, &priority, &edge) == OK) { + while (HeapPop (point_heap, &priority, &edge) == TESS_HEAP_OK) { if (num_points < MAX_NUM_POINTS) points[num_points++] = (EDGEPT *) edge; } @@ -453,7 +458,7 @@ SEAM *pick_good_seam(TBLOB *blob) { * * Assign a full priority value to the seam. **********************************************************************/ -PRIORITY seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) { +PRIORITY Wordrec::seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) { PRIORITY priority; if (seam->split1 == NULL) @@ -491,11 +496,12 @@ PRIORITY seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) { * together. See if any of them are suitable for use. Use a seam * queue and seam pile that have already been initialized and used. **********************************************************************/ -void -try_point_pairs (EDGEPT * points[MAX_NUM_POINTS], -inT16 num_points, -SEAM_QUEUE seam_queue, -SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob) { +void Wordrec::try_point_pairs (EDGEPT * points[MAX_NUM_POINTS], + inT16 num_points, + SEAM_QUEUE seam_queue, + SEAM_PILE * seam_pile, + SEAM ** seam, + TBLOB * blob) { inT16 x; inT16 y; SPLIT *split; @@ -532,11 +538,12 @@ SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob) { * if any of them are suitable for use. Use a seam queue and seam pile * that have already been initialized and used. **********************************************************************/ -void -try_vertical_splits (EDGEPT * points[MAX_NUM_POINTS], -inT16 num_points, -SEAM_QUEUE seam_queue, -SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob) { +void Wordrec::try_vertical_splits (EDGEPT * points[MAX_NUM_POINTS], + inT16 num_points, + SEAM_QUEUE seam_queue, + SEAM_PILE * seam_pile, + SEAM ** seam, + TBLOB * blob) { EDGEPT *vertical_point = NULL; SPLIT *split; inT16 x; @@ -567,3 +574,5 @@ SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob) { } } } + +} diff --git a/wordrec/findseam.h b/wordrec/findseam.h index a4af64a272..d92121de0e 100644 --- a/wordrec/findseam.h +++ b/wordrec/findseam.h @@ -35,35 +35,5 @@ typedef HEAP *SEAM_QUEUE; typedef ARRAY SEAM_PILE; -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -void junk_worst_seam(SEAM_QUEUE seams, SEAM *new_seam, float new_priority); - -void choose_best_seam(SEAM_QUEUE seam_queue, - SEAM_PILE *seam_pile, - SPLIT *split, - PRIORITY priority, - SEAM **seam_result, - TBLOB *blob); - -void combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, SEAM *seam); - -inT16 constrained_split(SPLIT *split, TBLOB *blob); - -void delete_seam_pile(SEAM_PILE seam_pile); - -SEAM *pick_good_seam(TBLOB *blob); - -PRIORITY seam_priority(SEAM *seam, inT16 xmin, inT16 xmax); - -void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS], -inT16 num_points, -SEAM_QUEUE seam_queue, -SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob); -void try_vertical_splits (EDGEPT * points[MAX_NUM_POINTS], -inT16 num_points, -SEAM_QUEUE seam_queue, -SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob); #endif diff --git a/wordrec/gradechop.cpp b/wordrec/gradechop.cpp index 3cc659139a..e3410b4b2b 100644 --- a/wordrec/gradechop.cpp +++ b/wordrec/gradechop.cpp @@ -26,6 +26,7 @@ I n c l u d e s ----------------------------------------------------------------------*/ #include "gradechop.h" +#include "wordrec.h" #include "olutil.h" #include "chop.h" #include "ndminx.h" @@ -58,6 +59,8 @@ while (this_point != point2 && this_point != point1) \ +namespace tesseract { + /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ @@ -68,7 +71,7 @@ * Part of the priority has already been calculated so just return the * additional amount for the bounding box type information. **********************************************************************/ -PRIORITY full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax) { +PRIORITY Wordrec::full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax) { BOUNDS_RECT rect; set_outline_bounds (split->point1, split->point2, rect); @@ -89,7 +92,7 @@ PRIORITY full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax) { * 0 = "perfect" * 100 = "no way jay" **********************************************************************/ -PRIORITY grade_center_of_blob(register BOUNDS_RECT rect) { +PRIORITY Wordrec::grade_center_of_blob(register BOUNDS_RECT rect) { register PRIORITY grade; grade = (rect[1] - rect[0]) - (rect[3] - rect[2]); @@ -109,7 +112,7 @@ PRIORITY grade_center_of_blob(register BOUNDS_RECT rect) { * 0 = "perfect" * 100 = "no way jay" **********************************************************************/ -PRIORITY grade_overlap(register BOUNDS_RECT rect) { +PRIORITY Wordrec::grade_overlap(register BOUNDS_RECT rect) { register PRIORITY grade; register inT16 width1; register inT16 width2; @@ -139,7 +142,7 @@ PRIORITY grade_overlap(register BOUNDS_RECT rect) { * 0 = "perfect" * 100 = "no way jay" **********************************************************************/ -PRIORITY grade_split_length(register SPLIT *split) { +PRIORITY Wordrec::grade_split_length(register SPLIT *split) { register PRIORITY grade; register float split_length; @@ -162,7 +165,7 @@ PRIORITY grade_split_length(register SPLIT *split) { * 0 = "perfect" * 100 = "no way jay" **********************************************************************/ -PRIORITY grade_sharpness(register SPLIT *split) { +PRIORITY Wordrec::grade_sharpness(register SPLIT *split) { register PRIORITY grade; grade = point_priority (split->point1) + point_priority (split->point2); @@ -185,7 +188,7 @@ PRIORITY grade_sharpness(register SPLIT *split) { * 0 = "perfect" * 100 = "no way jay" **********************************************************************/ -PRIORITY grade_width_change(register BOUNDS_RECT rect) { +PRIORITY Wordrec::grade_width_change(register BOUNDS_RECT rect) { register PRIORITY grade; register inT32 width1; register inT32 width2; @@ -207,9 +210,9 @@ PRIORITY grade_width_change(register BOUNDS_RECT rect) { * * Set up the limits for the x coordinate of the outline. **********************************************************************/ -void set_outline_bounds(register EDGEPT *point1, - register EDGEPT *point2, - BOUNDS_RECT rect) { +void Wordrec::set_outline_bounds(register EDGEPT *point1, + register EDGEPT *point2, + BOUNDS_RECT rect) { register EDGEPT *this_point; register inT16 x_min; register inT16 x_max; @@ -224,3 +227,5 @@ void set_outline_bounds(register EDGEPT *point1, rect[2] = x_min; rect[3] = x_max; } + +} // namespace tesseract diff --git a/wordrec/gradechop.h b/wordrec/gradechop.h index faec2daa64..469a140b3d 100644 --- a/wordrec/gradechop.h +++ b/wordrec/gradechop.h @@ -66,22 +66,4 @@ typedef inT16 BOUNDS_RECT[4]; outline->botright.y <= MAX (split->point1->pos.y,split->point2->pos.y) && \ outline->topleft.y >= MIN (split->point1->pos.y,split->point2->pos.y)) -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -PRIORITY full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax); - -PRIORITY grade_center_of_blob(register BOUNDS_RECT rect); - -PRIORITY grade_overlap(register BOUNDS_RECT rect); - -PRIORITY grade_split_length(register SPLIT *split); - -PRIORITY grade_sharpness(register SPLIT *split); - -PRIORITY grade_width_change(register BOUNDS_RECT rect); - -void set_outline_bounds(register EDGEPT *point1, - register EDGEPT *point2, - BOUNDS_RECT rect); #endif diff --git a/wordrec/heuristic.cpp b/wordrec/heuristic.cpp index 72b43d337e..ef7eff99ed 100644 --- a/wordrec/heuristic.cpp +++ b/wordrec/heuristic.cpp @@ -27,11 +27,12 @@ ----------------------------------------------------------------------*/ #include -#include "heuristic.h" - +// Note: "heuristic.h" is an empty file and deleted +#include "associate.h" +#include "bestfirst.h" +#include "seam.h" #include "baseline.h" #include "freelist.h" -#include "metrics.h" #include "measure.h" #include "ratngs.h" #include "wordrec.h" @@ -39,27 +40,8 @@ /*---------------------------------------------------------------------- M a c r o s ----------------------------------------------------------------------*/ -#define MAX_SQUAT 2.0 /* Width ratio */ #define BAD_RATING 1000.0 /* No valid blob */ -INT_VAR(segment_adjust_debug, 0, - "Segmentation adjustment debug"); -BOOL_VAR(assume_fixed_pitch_char_segment, 0, - "include fixed-pitch heuristics in char segmentation"); -BOOL_VAR(use_new_state_cost, 0, - "use new state cost heuristics for segmentation state evaluation"); -double_VAR(heuristic_segcost_rating_base, 1.25, - "base factor for adding segmentation cost into word rating." - "It's a multiplying factor, the larger the value above 1, " - "the bigger the effect of segmentation cost."); -double_VAR(heuristic_weight_rating, 1, - "weight associated with char rating in combined cost of state"); -double_VAR(heuristic_weight_width, 0, - "weight associated with width evidence in combined cost of state"); -double_VAR(heuristic_weight_seamcut, 0, - "weight associated with seam cut in combined cost of state"); -double_VAR(heuristic_max_char_wh_ratio, MAX_SQUAT, - "max char width-to-height ratio allowed in segmentation"); namespace tesseract { @@ -73,9 +55,9 @@ namespace tesseract { // This is the same as the original function, only cosmetic changes, // except instead of passing chunks back to be freed, it deallocates // internally. -WIDTH_RECORD *state_char_widths(WIDTH_RECORD *chunk_widths, - STATE *state, - int num_joints) { +WIDTH_RECORD *Wordrec::state_char_widths(WIDTH_RECORD *chunk_widths, + STATE *state, + int num_joints) { SEARCH_STATE chunks = bin_to_chunks(state, num_joints); int num_chars = chunks[0] + 1; @@ -89,10 +71,12 @@ WIDTH_RECORD *state_char_widths(WIDTH_RECORD *chunk_widths, for (int i = 1; i <= num_chars; i++) { last_blob = (i > chunks[0]) ? num_joints : first_blob + chunks[i]; - char_widths->widths[2*i-2] = chunks_width(chunk_widths, - first_blob, last_blob); - if (i <= chunks[0]) - char_widths->widths[2*i-1] = chunks_gap(chunk_widths, last_blob); + char_widths->widths[2*i-2] = + AssociateUtils::GetChunksWidth(chunk_widths, first_blob, last_blob); + if (i <= chunks[0]) { + char_widths->widths[2*i-1] = + AssociateUtils::GetChunksGap(chunk_widths, last_blob); + } if (segment_adjust_debug > 3) tprintf("width_record[%d]s%d--s%d(%d) %d %d:%d\n", @@ -109,7 +93,7 @@ WIDTH_RECORD *state_char_widths(WIDTH_RECORD *chunk_widths, // Computes the variance of the char widths normalized to the given height // TODO(dsl): Do this in a later stage and use char choice info to skip // punctuations. -FLOAT32 get_width_variance(WIDTH_RECORD *wrec, float norm_height) { +FLOAT32 Wordrec::get_width_variance(WIDTH_RECORD *wrec, float norm_height) { MEASUREMENT ws; new_measurement(ws); for (int x = 0; x < wrec->num_chars; x++) { @@ -124,7 +108,7 @@ FLOAT32 get_width_variance(WIDTH_RECORD *wrec, float norm_height) { } // Computes the variance of char positioning (width + spacing) wrt norm_height -FLOAT32 get_gap_variance(WIDTH_RECORD *wrec, float norm_height) { +FLOAT32 Wordrec::get_gap_variance(WIDTH_RECORD *wrec, float norm_height) { MEASUREMENT ws; new_measurement(ws); for (int x = 0; x < wrec->num_chars - 1; x++) { @@ -223,48 +207,6 @@ FLOAT32 Wordrec::rating_priority(CHUNKS_RECORD *chunks_record, return rating_cost; } -// Returns the cost, eg. -log(p), of a given value in char width distribution. -FLOAT32 fp_width_cost(float norm_width, bool end_pos) { - bool use_old_hack = true; - if (use_old_hack) { - float cost = 0; - if (norm_width > heuristic_max_char_wh_ratio) - cost += norm_width; - if (norm_width > MAX_SQUAT) // extra penalty for merging two CJK chars - cost += norm_width * norm_width; - // penalize skinny blobs, except for punctuation in the last position - if (norm_width < 0.5 && !end_pos) - cost += 1 - norm_width; - return cost; - } - - // otherwise, approximate with our not-so-normal distribution - float s = fabs((norm_width - 0.85) / 0.35); - if (s < 1) // clip penalty to zero for anything within 1 std - return 0.0f; - // Allow smaller chars at begin or end position for punctuations - if (end_pos && norm_width < 0.3) - return 0.0f; - if (segment_adjust_debug > 2) - tprintf("fp_width_cost(%f) = %f**2 = %f\n", norm_width, s, s*s); - return s*s; -} - -FLOAT32 fp_gap_cost(float norm_gap, bool end_pos) { - bool use_old_hack = true; - if (use_old_hack) { - if (norm_gap < 0.05 && !end_pos) - return 5; // penalize vertically overlapping components - else - return 0; - } - float s = fabs((norm_gap - 0.1) / 0.02); - if (s > -1) return 0.0f; // no penalty for wider gaps - if (segment_adjust_debug > 2) - tprintf("fp_gap_cost(%f) = %f**2 = %f\n", norm_gap, s, s*s); - return s*s; -} - /********************************************************************** * width_priority * @@ -286,18 +228,16 @@ FLOAT32 Wordrec::width_priority(CHUNKS_RECORD *chunks_record, // When baseline_enable==True, which is the current default for Tesseract, // a fixed value of 128 (BASELINE_SCALE) is always used. FLOAT32 normalizing_height = BASELINE_SCALE; - if (!classify_baseline_normalized) // this doesn't work and is never invoked - normalizing_height = chunks_record->row->lineheight; if (assume_fixed_pitch_char_segment) { // For fixed pitch language like CJK, we use the full text height as the // normalizing factor so we are not dependent on xheight calculation. // In the normalized coord. xheight * scale == BASELINE_SCALE(128), // so add proportionally scaled ascender zone to get full text height. - normalizing_height = tess_denorm->scale() * - (tess_denorm->row()->x_height() + tess_denorm->row()->ascenders()); + normalizing_height = denorm_.scale() * + (denorm_.row()->x_height() + denorm_.row()->ascenders()); if (segment_adjust_debug > 1) tprintf("WidthPriority: %f %f normalizing height = %f\n", - tess_denorm->row()->x_height(), tess_denorm->row()->ascenders(), + denorm_.row()->x_height(), denorm_.row()->ascenders(), normalizing_height); // Impose additional segmentation penalties if blob widths or gaps // distribution don't fit a fixed-pitch model. @@ -313,12 +253,18 @@ FLOAT32 Wordrec::width_priority(CHUNKS_RECORD *chunks_record, squat /= normalizing_height; gap /= normalizing_height; if (assume_fixed_pitch_char_segment) { - penalty += fp_width_cost(squat, x == 0 || x == width_rec->num_chars -1); - penalty += fp_gap_cost(gap, x == width_rec->num_chars - 1); - if (width_rec->num_chars == 1 && squat > MAX_SQUAT) + penalty += AssociateUtils::FixedPitchWidthCost( + squat, 0.0f, x == 0 || x == width_rec->num_chars -1, + heuristic_max_char_wh_ratio); + penalty += AssociateUtils::FixedPitchGapCost( + gap, x == width_rec->num_chars - 1); + if (width_rec->num_chars == 1 && + squat > AssociateUtils::kMaxFixedPitchCharAspectRatio) { penalty += 10; + } } else { - // original equation when heuristic_max_char_ratio == MAX_SQUAT + // Original equation when + // heuristic_max_char_ratio == AssociateUtils::kMaxSquat if (squat > heuristic_max_char_wh_ratio) penalty += squat - heuristic_max_char_wh_ratio; } @@ -362,7 +308,6 @@ FLOAT32 Wordrec::prioritize_state(CHUNKS_RECORD *chunks_record, seam_cost = seamcut_priority(chunks_record->splits, the_search->this_state, the_search->num_joints); - record_priorities(the_search, shape_cost, width_cost); // TODO(dsl): how do we normalize the scores for these separate evidence? // FLOAT32 total_cost = shape_cost + width_cost * 0.01 + seam_cost * 0.001; @@ -393,104 +338,4 @@ FLOAT32 Wordrec::prioritize_state(CHUNKS_RECORD *chunks_record, return total_cost; } - -/*---------------------------------------------------------------------- - F u n c t i o n s - - Below are the original state prioritization functions for reference. - Since they work well for Latin, we need to keep them around until the - new path is verified to do no worse than before. - -// Assign a segmentation priority based on the ratings of the blobs -// (in that segmentation) that have been classified. The average -// "goodness" (i.e. rating / weight) for each blob is used to indicate -// the segmentation priority. -FLOAT32 rating_priority(CHUNKS_RECORD *chunks_record, - STATE *state, - STATE *old_state, - int num_joints) { - PIECES_STATE blob_chunks; - inT16 x; - inT16 y; - BLOB_CHOICE_LIST *blob_choices; - BLOB_CHOICE_IT blob_choice_it; - inT16 first_chunk = 0; - inT16 last_chunk; - inT16 ratings = 0; - inT16 weights = 0; - - bin_to_pieces(state, num_joints, blob_chunks); - - for (x = 0; blob_chunks[x]; x++) { - // Iterate each blob - last_chunk = first_chunk + blob_chunks[x] - 1; - - blob_choices = chunks_record->ratings->get(first_chunk, last_chunk); - - if (blob_choices != NOT_CLASSIFIED) { - blob_choice_it.set_to_list(blob_choices); - ratings += (inT16) blob_choice_it.data()->rating(); - for (y = first_chunk; y <= last_chunk; y++) { - weights += (inT16) (chunks_record->weights[y]); - } - } - first_chunk += blob_chunks[x]; - } - if (weights <= 0) - weights = 1; - return ((FLOAT32) ratings / weights); -} - -// Return a priority value for this word segmentation based on the -// character widths present in the new segmentation. -FLOAT32 width_priority(CHUNKS_RECORD *chunks_record, - STATE *state, - int num_joints) { - FLOAT32 result = 0.0; - WIDTH_RECORD *width_record; - FLOAT32 squat; - int x; - - width_record = state_char_widths (chunks_record->chunk_widths, - state, num_joints); - for (x = 0; x < width_record->num_chars; x++) { - - squat = width_record->widths[2 * x]; - if (!classify_baseline_normalized) { - squat /= chunks_record->row->lineheight; - } - else { - squat /= BASELINE_SCALE; - } - - if (squat > MAX_SQUAT) - result += squat - MAX_SQUAT; - - } - - free_widths(width_record); - - return (result); -} - -// Create a priority for this state. It represents the urgency of -// checking this state. -FLOAT32 prioritize_state(CHUNKS_RECORD *chunks_record, - SEARCH_RECORD *the_search, - STATE *old_state) { - FLOAT32 width_pri; - FLOAT32 match_pri; - - match_pri = rating_priority (chunks_record, the_search->this_state, - old_state, the_search->num_joints); - - width_pri = width_priority (chunks_record, the_search->this_state, - the_search->num_joints) * 1000.0; - - record_priorities(the_search, old_state, match_pri, width_pri); - - return (width_pri + match_pri); -} ------------------- Original Rating Functions -----------------*/ - } // namespace tesseract diff --git a/wordrec/heuristic.h b/wordrec/heuristic.h deleted file mode 100644 index 1108b0bd49..0000000000 --- a/wordrec/heuristic.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: heuristic.h (Formerly heuristic.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Jul 9 17:14:44 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef HEURISTIC_H -#define HEURISTIC_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "associate.h" -#include "bestfirst.h" -#include "seam.h" - -extern INT_VAR_H(segment_adjust_debug, 0, - "Segmentation adjustment debug"); -extern BOOL_VAR_H(assume_fixed_pitch_char_segment, 0, - "include fixed-pitch heuristics in char segmentation"); -extern BOOL_VAR_H(use_new_state_cost, 0, - "use new state cost heuristics for segmentation evaluation"); -extern double_VAR_H(heuristic_segcost_rating_base, 1.25, - "base factor for adding segmentation cost into word rating." - "It's a multiplying factor, the larger the value above 1, " - "the bigger the effect of segmentation cost."); -extern double_VAR_H(heuristic_weight_rating, 1, - "weight associated with char rating in combined cost of state"); -extern double_VAR_H(heuristic_weight_width, 0, - "weight associated with width evidence in combined cost of state"); -extern double_VAR_H(heuristic_weight_seamcut, 0, - "weight associated with seam cut in combined cost of state"); -extern double_VAR_H(heuristic_max_char_wh_ratio, MAX_SQUAT, - "max char width-to-height ratio allowed in segmentation"); - -#endif diff --git a/wordrec/language_model.cpp b/wordrec/language_model.cpp new file mode 100644 index 0000000000..649e79d49c --- /dev/null +++ b/wordrec/language_model.cpp @@ -0,0 +1,1737 @@ +/////////////////////////////////////////////////////////////////////// +// File: language_model.cpp +// Description: Functions that utilize the knowledge about the properties, +// structure and statistics of the language to help recognition. +// Author: Daria Antonova +// Created: Mon Nov 11 11:26:43 PST 2009 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include + +#include "language_model.h" + +#include "dawg.h" +#include "matrix.h" +#include "params.h" + +namespace tesseract { + +ELISTIZE(ViterbiStateEntry); + +const float LanguageModel::kInitialPainPointPriorityAdjustment = 5.0f; +const float LanguageModel::kDefaultPainPointPriorityAdjustment = 2.0f; +const float LanguageModel::kBestChoicePainPointPriorityAdjustment = 0.5f; +const float LanguageModel::kCriticalPainPointPriorityAdjustment = 0.1f; +const float LanguageModel::kMaxAvgNgramCost = 25.0f; +const int LanguageModel::kMinFixedLengthDawgLength = 2; +const float LanguageModel::kLooseMaxCharWhRatio = 2.5f; + +LanguageModel::LanguageModel(Dict *dict, + WERD_CHOICE **prev_word_best_choice_ptr) + : INT_MEMBER(language_model_debug_level, 0, "Language model debug level", + dict->getImage()->getCCUtil()->params()), + BOOL_INIT_MEMBER(language_model_ngram_on, false, + "Turn on/off the use of character ngram model", + dict->getImage()->getCCUtil()->params()), + INT_INIT_MEMBER(language_model_ngram_order, 8, + "Maximum order of the character ngram model", + dict->getImage()->getCCUtil()->params()), + INT_INIT_MEMBER(language_model_max_viterbi_list_size, 10, + "Maximum size of viterbi lists recorded in BLOB_CHOICEs" + "(excluding entries that represent dictionary word paths)", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_ngram_small_prob, 0.000001, + "To avoid overly small denominators use this as the " + "floor of the probability returned by the ngram model.", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_ngram_nonmatch_score, -40.0, + "Average classifier score of a non-matching unichar.", + dict->getImage()->getCCUtil()->params()), + BOOL_INIT_MEMBER(language_model_ngram_use_only_first_uft8_step, false, + "Use only the first UTF8 step of the given string" + " when computing log probabilities.", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_ngram_scale_factor, 0.03, + "Strength of the character ngram model relative to the" + " character classifier ", + dict->getImage()->getCCUtil()->params()), + INT_INIT_MEMBER(language_model_min_compound_length, 3, + "Minimum length of compound words", + dict->getImage()->getCCUtil()->params()), + INT_INIT_MEMBER(language_model_fixed_length_choices_depth, 3, + "Depth of blob choice lists to explore" + " when fixed length dawgs are on", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_penalty_non_freq_dict_word, 0.1, + "Penalty for words not in the frequent word dictionary", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_penalty_non_dict_word, 0.15, + "Penalty for non-dictionary words", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_penalty_punc, 0.2, + "Penalty for inconsistent punctuation", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_penalty_case, 0.1, + "Penalty for inconsistent case", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_penalty_script, 0.5, + "Penalty for inconsistent script", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_penalty_chartype, 0.3, + "Penalty for inconsistent character type", + dict->getImage()->getCCUtil()->params()), + double_INIT_MEMBER(language_model_penalty_increment, 0.01, + "Penalty increment", + dict->getImage()->getCCUtil()->params()), + dict_(dict), denorm_(NULL), fixed_pitch_(false), + max_char_wh_ratio_(0.0), acceptable_choice_found_(false) { + ASSERT_HOST(dict_ != NULL); + dawg_args_ = new DawgArgs(NULL, NULL, new DawgInfoVector(), + new DawgInfoVector(), + 0.0, NO_PERM, kAnyWordLength, -1); + beginning_active_dawgs_ = new DawgInfoVector(); + beginning_constraints_ = new DawgInfoVector(); + fixed_length_beginning_active_dawgs_ = new DawgInfoVector(); + empty_dawg_info_vec_ = new DawgInfoVector(); +} + +LanguageModel::~LanguageModel() { + delete beginning_active_dawgs_; + delete beginning_constraints_; + delete fixed_length_beginning_active_dawgs_; + delete empty_dawg_info_vec_; + delete dawg_args_->updated_active_dawgs; + delete dawg_args_->updated_constraints; + delete dawg_args_; +} + +void LanguageModel::InitForWord( + const WERD_CHOICE *prev_word, const DENORM *denorm, + bool fixed_pitch, float best_choice_cert, float max_char_wh_ratio, + HEAP *pain_points, CHUNKS_RECORD *chunks_record) { + denorm_ = denorm; + fixed_pitch_ = fixed_pitch; + max_char_wh_ratio_ = max_char_wh_ratio; + acceptable_choice_found_ = false; + + // For each cell, generate a "pain point" if the cell is not classified + // and has a left or right neighbor that was classified. + MATRIX *ratings = chunks_record->ratings; + for (int col = 0; col < ratings->dimension(); ++col) { + for (int row = col+1; row < ratings->dimension(); ++row) { + if ((row > 0 && ratings->get(col, row-1) != NOT_CLASSIFIED) || + (col+1 < ratings->dimension() && + ratings->get(col+1, row) != NOT_CLASSIFIED)) { + float worst_piece_cert; + bool fragmented; + GetWorstPieceCertainty(col, row, chunks_record->ratings, + &worst_piece_cert, &fragmented); + GeneratePainPoint(col, row, true, kInitialPainPointPriorityAdjustment, + worst_piece_cert, fragmented, best_choice_cert, + max_char_wh_ratio_, NULL, NULL, + chunks_record, pain_points); + } + } + } + + // Initialize vectors with beginning DawgInfos. + beginning_active_dawgs_->clear(); + dict_->init_active_dawgs(kAnyWordLength, beginning_active_dawgs_, false); + beginning_constraints_->clear(); + dict_->init_constraints(beginning_constraints_); + if (dict_->GetMaxFixedLengthDawgIndex() >= 0) { + fixed_length_beginning_active_dawgs_->clear(); + for (int i = 0; i < beginning_active_dawgs_->size(); ++i) { + int dawg_index = (*beginning_active_dawgs_)[i].dawg_index; + if (dawg_index <= dict_->GetMaxFixedLengthDawgIndex() && + dawg_index >= kMinFixedLengthDawgLength) { + *fixed_length_beginning_active_dawgs_ += (*beginning_active_dawgs_)[i]; + } + } + } + + max_penalty_adjust_ = (dict_->segment_penalty_dict_nonword - + dict_->segment_penalty_dict_case_ok); + + // The rest of the function contains ngram-model specific initialization. + if (!language_model_ngram_on) return; + + // Fill prev_word_str_ with the last language_model_ngram_order + // unichars from prev_word. + // Assume that populate_unichars() has been called on a valid + // prev_word_best_choice_, which is the case, since it points + // to the final result of the classification of the previous word. + if (prev_word != NULL && prev_word->unichar_string() != NULL) { + prev_word_str_ = prev_word->unichar_string(); + } + prev_word_str_ += ' '; + const char *str_ptr = prev_word_str_.string(); + const char *str_end = str_ptr + prev_word_str_.length(); + int step; + prev_word_unichar_step_len_ = 0; + while (str_ptr != str_end && (step = UNICHAR::utf8_step(str_ptr))) { + str_ptr += step; + ++prev_word_unichar_step_len_; + } + ASSERT_HOST(str_ptr == str_end); +} + +void LanguageModel::CleanUp() { + for (int i = 0; i < updated_flags_.size(); ++i) *(updated_flags_[i]) = false; + updated_flags_.clear(); +} + +void LanguageModel::DeleteState(BLOB_CHOICE_LIST *choices) { + BLOB_CHOICE_IT b_it(choices); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + if (b_it.data()->language_model_state() != NULL) { + LanguageModelState *state = reinterpret_cast( + b_it.data()->language_model_state()); + delete state; + b_it.data()->set_language_model_state(NULL); + } + } +} + +LanguageModelFlagsType LanguageModel::UpdateState( + LanguageModelFlagsType changed, + int curr_col, int curr_row, + BLOB_CHOICE_LIST *curr_list, + BLOB_CHOICE_LIST *parent_list, + HEAP *pain_points, + BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record, + BestChoiceBundle *best_choice_bundle) { + if (language_model_debug_level > 0) { + tprintf("\nUpdateState: col=%d row=%d (changed=0x%x parent=%p)\n", + curr_col, curr_row, changed, parent_list); + } + // Initialize helper variables. + bool word_end = (curr_row+1 >= chunks_record->ratings->dimension()); + bool just_classified = (changed & kJustClassifiedFlag); + LanguageModelFlagsType new_changed = 0x0; + float denom = (language_model_ngram_on) ? ComputeDenom(curr_list) : 1.0f; + + // Call AddViterbiStateEntry() for each parent+child ViterbiStateEntry. + ViterbiStateEntry_IT vit; + BLOB_CHOICE_IT c_it(curr_list); + int c_it_counter = 0; + bool first_iteration = true; + BLOB_CHOICE *first_lower = NULL; + BLOB_CHOICE *first_upper = NULL; + GetTopChoiceLowerUpper(changed, curr_list, &first_lower, &first_upper); + for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { + if (dict_->GetMaxFixedLengthDawgIndex() >= 0 && + c_it_counter++ >= language_model_fixed_length_choices_depth) { + break; + } + // Skip NULL unichars unless it is the only choice. + if (!curr_list->singleton() && c_it.data()->unichar_id() == 0) continue; + if (dict_->getUnicharset().get_fragment(c_it.data()->unichar_id())) { + continue; // skip fragments + } + // Set top choice flags. + LanguageModelFlagsType top_choice_flags = 0x0; + if (first_iteration && (changed | kSmallestRatingFlag)) { + top_choice_flags |= kSmallestRatingFlag; + } + if (first_lower == c_it.data()) top_choice_flags |= kLowerCaseFlag; + if (first_upper == c_it.data()) top_choice_flags |= kUpperCaseFlag; + + if (parent_list == NULL) { // process the beginning of a word + new_changed |= AddViterbiStateEntry( + top_choice_flags, denom, word_end, curr_col, curr_row, c_it.data(), + NULL, NULL, pain_points, best_path_by_column, + chunks_record, best_choice_bundle); + } else { // get viterbi entries from each of the parent BLOB_CHOICEs + BLOB_CHOICE_IT p_it(parent_list); + for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { + LanguageModelState *parent_lms = + reinterpret_cast( + p_it.data()->language_model_state()); + if (parent_lms == NULL || parent_lms->viterbi_state_entries.empty()) { + continue; + } + vit.set_to_list(&(parent_lms->viterbi_state_entries)); + int vit_counter = 0; + for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { + // Skip pruned entries and do not look at prunable entries if we have + // already examined language_model_max_viterbi_list_size of them. + if (PrunablePath(vit.data()->top_choice_flags, + vit.data()->dawg_info) && + (++vit_counter > language_model_max_viterbi_list_size || + (language_model_ngram_on && vit.data()->ngram_info->pruned))) { + continue; + } + // Only consider the parent if it has been updated or + // if the current ratings cell has just been classified. + if (!just_classified && !vit.data()->updated) continue; + // Create a new ViterbiStateEntry if BLOB_CHOICE in c_it.data() + // looks good according to the Dawgs or character ngram model. + new_changed |= AddViterbiStateEntry( + top_choice_flags, denom, word_end, curr_col, curr_row, + c_it.data(), p_it.data(), vit.data(), pain_points, + best_path_by_column, chunks_record, best_choice_bundle); + } + } // done looking at parents for this c_it.data() + } + first_iteration = false; + } + return new_changed; +} + +bool LanguageModel::ProblematicPath(const ViterbiStateEntry &vse, + UNICHAR_ID unichar_id, bool word_end) { + // The path is problematic if it is inconsistent and has a parent that + // is consistent (or a NULL parent). + if (!vse.Consistent() && (vse.parent_vse == NULL || + vse.parent_vse->Consistent())) { + if (language_model_debug_level > 0) { + tprintf("ProblematicPath: inconsistent\n"); + } + return true; + } + // The path is problematic if it does not represent a dicionary word, + // while its parent does. + if (vse.dawg_info == NULL && + (vse.parent_vse == NULL || vse.parent_vse->dawg_info != NULL)) { + if (language_model_debug_level > 0) { + tprintf("ProblematicPath: dict word terminated\n"); + } + return true; + } + // The path is problematic if ngram info indicates that this path is + // an unlikely sequence of character, while its parent is does not have + // extreme dips in ngram probabilities. + if (vse.ngram_info != NULL && vse.ngram_info->pruned && + (vse.parent_vse == NULL || !vse.parent_vse->ngram_info->pruned)) { + if (language_model_debug_level > 0) { + tprintf("ProblematicPath: small ngram prob\n"); + } + return true; + } + // The path is problematic if there is a non-alpha character in the + // middle of the path (unless it is a digit in the middle of a path + // that represents a number). + if ((vse.parent_vse != NULL) && !word_end && // is middle + !(dict_->getUnicharset().get_isalpha(unichar_id) || // alpha + (dict_->getUnicharset().get_isdigit(unichar_id) && // ok digit + vse.dawg_info != NULL && vse.dawg_info->permuter == NUMBER_PERM))) { + if (language_model_debug_level > 0) { + tprintf("ProblematicPath: non-alpha middle\n"); + } + return true; + } + return false; +} + +void LanguageModel::GetTopChoiceLowerUpper(LanguageModelFlagsType changed, + BLOB_CHOICE_LIST *curr_list, + BLOB_CHOICE **first_lower, + BLOB_CHOICE **first_upper) { + if (!(changed & kLowerCaseFlag || changed & kUpperCaseFlag)) return; + BLOB_CHOICE_IT c_it(curr_list); + const UNICHARSET &unicharset = dict_->getUnicharset(); + BLOB_CHOICE *first_unichar = NULL; + for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { + UNICHAR_ID unichar_id = c_it.data()->unichar_id(); + if (unicharset.get_fragment(unichar_id)) continue; // skip fragments + if (first_unichar == NULL) first_unichar = c_it.data(); + if (*first_lower == NULL && unicharset.get_islower(unichar_id)) { + *first_lower = c_it.data(); + } + if (*first_upper == NULL && unicharset.get_isupper(unichar_id)) { + *first_upper = c_it.data(); + } + } + ASSERT_HOST(first_unichar != NULL); + if (*first_lower == NULL) *first_lower = first_unichar; + if (*first_upper == NULL) *first_upper = first_unichar; +} + +LanguageModelFlagsType LanguageModel::AddViterbiStateEntry( + LanguageModelFlagsType top_choice_flags, + float denom, + bool word_end, + int curr_col, int curr_row, + BLOB_CHOICE *b, + BLOB_CHOICE *parent_b, + ViterbiStateEntry *parent_vse, + HEAP *pain_points, + BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record, + BestChoiceBundle *best_choice_bundle) { + ViterbiStateEntry_IT vit; + if (language_model_debug_level > 0) { + tprintf("\nAddViterbiStateEntry for unichar %s rating=%.4f" + " certainty=%.4f top_choice_flags=0x%x parent_vse=%p\n", + dict_->getUnicharset().id_to_unichar(b->unichar_id()), + b->rating(), b->certainty(), top_choice_flags, parent_vse); + if (language_model_debug_level > 3 && b->language_model_state() != NULL) { + tprintf("Existing viterbi list:\n"); + vit.set_to_list(&(reinterpret_cast( + b->language_model_state())->viterbi_state_entries)); + for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { + PrintViterbiStateEntry("", vit.data(), b, chunks_record); + } + } + } + LanguageModelFlagsType changed = 0x0; + float optimistic_cost = 0.0f; + if (!language_model_ngram_on) optimistic_cost += b->rating(); + if (parent_vse != NULL) optimistic_cost += parent_vse->cost; + // Discard this entry if it will not beat best choice. + if (optimistic_cost >= best_choice_bundle->best_choice->rating()) { + if (language_model_debug_level > 1) { + tprintf("Discarded ViterbiEntry with high cost %.4f" + " best_choice->rating()=%.4f\n", optimistic_cost, + best_choice_bundle->best_choice->rating()); + } + return 0x0; + } + + // Check consistency of the path and set the relevant consistency_info. + LanguageModelConsistencyInfo consistency_info; + FillConsistencyInfo(word_end, b->unichar_id(), parent_vse, parent_b, + &consistency_info); + + // Invoke Dawg language model component. + LanguageModelDawgInfo *dawg_info = + GenerateDawgInfo(word_end, consistency_info.script_id, + curr_col, curr_row, *b, parent_vse, &changed); + + // Invoke TopChoice language model component + float ratings_sum = b->rating(); + if (parent_vse != NULL) ratings_sum += parent_vse->ratings_sum; + GenerateTopChoiceInfo(ratings_sum, dawg_info, consistency_info, + parent_vse, b, &top_choice_flags, &changed); + + // Invoke Ngram language model component. + LanguageModelNgramInfo *ngram_info = NULL; + if (language_model_ngram_on) { + ngram_info = GenerateNgramInfo( + dict_->getUnicharset().id_to_unichar(b->unichar_id()), b->certainty(), + denom, curr_col, curr_row, parent_vse, parent_b, &changed); + ASSERT_HOST(ngram_info != NULL); + } + + // Prune non-top choice paths with incosistent scripts. + if (consistency_info.inconsistent_script) { + if (!(top_choice_flags & kSmallestRatingFlag)) changed = 0x0; + if (ngram_info != NULL) ngram_info->pruned = true; + } + + // If language model components did not like this unichar - return + if (!changed) { + if (language_model_debug_level > 0) { + tprintf("Language model components did not like this entry\n"); + } + delete dawg_info; + delete ngram_info; + return 0x0; + } + + // Compute cost of associating the blobs that represent the current unichar. + AssociateStats associate_stats; + ComputeAssociateStats(curr_col, curr_row, max_char_wh_ratio_, + parent_vse, chunks_record, &associate_stats); + if (parent_vse != NULL) { + associate_stats.shape_cost += parent_vse->associate_stats.shape_cost; + associate_stats.bad_shape |= parent_vse->associate_stats.bad_shape; + } + + // Compute the aggregate cost (adjusted ratings sum). + float cost = ComputeAdjustedPathCost( + ratings_sum, + (parent_vse == NULL) ? 1 : (parent_vse->length+1), + (dawg_info == NULL) ? 0.0f : 1.0f, + dawg_info, ngram_info, consistency_info, associate_stats, parent_vse); + + if (b->language_model_state() == NULL) { + b->set_language_model_state(new LanguageModelState(curr_col, curr_row)); + } + LanguageModelState *lms = + reinterpret_cast(b->language_model_state()); + + // Discard this entry if it represents a prunable path and + // language_model_max_viterbi_list_size such entries with a lower + // cost have already been recorded. + if (PrunablePath(top_choice_flags, dawg_info) && + (lms->viterbi_state_entries_prunable_length >= + language_model_max_viterbi_list_size) && + cost >= lms->viterbi_state_entries_prunable_max_cost) { + if (language_model_debug_level > 1) { + tprintf("Discarded ViterbiEntry with high cost %g max cost %g\n", + cost, lms->viterbi_state_entries_prunable_max_cost); + } + delete dawg_info; + delete ngram_info; + return 0x0; + } + + // Create the new ViterbiStateEntry and add it to lms->viterbi_state_entries + ViterbiStateEntry *new_vse = new ViterbiStateEntry( + parent_b, parent_vse, b, cost, consistency_info, + associate_stats, top_choice_flags, dawg_info, ngram_info); + updated_flags_.push_back(&(new_vse->updated)); + lms->viterbi_state_entries.add_sorted(ViterbiStateEntry::Compare, + false, new_vse); + if (PrunablePath(top_choice_flags, dawg_info)) { + lms->viterbi_state_entries_prunable_length++; + } + + // Update lms->viterbi_state_entries_prunable_max_cost and clear + // top_choice_flags of enties with ratings_sum than new_vse->ratings_sum. + if ((lms->viterbi_state_entries_prunable_length >= + language_model_max_viterbi_list_size) || top_choice_flags) { + ASSERT_HOST(!lms->viterbi_state_entries.empty()); + int prunable_counter = language_model_max_viterbi_list_size; + vit.set_to_list(&(lms->viterbi_state_entries)); + for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { + ViterbiStateEntry *curr_vse = vit.data(); + // Clear the appropriate top choice flags of the entries in the + // list that have ratings_sum higher thank new_entry->ratings_sum + // (since they will not be top choices any more). + if (curr_vse->top_choice_flags && curr_vse != new_vse && + ComputeConsistencyAdjustedRatingsSum( + curr_vse->ratings_sum, curr_vse->dawg_info, + curr_vse->consistency_info) > + ComputeConsistencyAdjustedRatingsSum( + new_vse->ratings_sum, new_vse->dawg_info, + new_vse->consistency_info)) { + curr_vse->top_choice_flags &= ~(top_choice_flags); + } + if (prunable_counter > 0 && + PrunablePath(curr_vse->top_choice_flags, curr_vse->dawg_info)) { + --prunable_counter; + } + // Update lms->viterbi_state_entries_prunable_max_cost. + if (prunable_counter == 0) { + lms->viterbi_state_entries_prunable_max_cost = vit.data()->cost; + if (language_model_debug_level > 1) { + tprintf("Set viterbi_state_entries_prunable_max_cost to %.4f\n", + lms->viterbi_state_entries_prunable_max_cost); + } + prunable_counter = -1; // stop counting + } + } + } + + // Print the newly created ViterbiStateEntry. + if (language_model_debug_level > 2) { + PrintViterbiStateEntry("New", new_vse, b, chunks_record); + if (language_model_debug_level > 3) { + tprintf("Updated viterbi list (max cost %g):\n", + lms->viterbi_state_entries_prunable_max_cost); + vit.set_to_list(&(lms->viterbi_state_entries)); + for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { + PrintViterbiStateEntry("", vit.data(), b, chunks_record); + } + } + } + + // Update best choice is needed. + if (word_end) { + UpdateBestChoice(b, new_vse, pain_points, + chunks_record, best_choice_bundle); + } + + // Update stats in best_path_by_column. + if (new_vse->Consistent() || new_vse->dawg_info != NULL || + (new_vse->ngram_info != NULL && !new_vse->ngram_info->pruned)) { + float avg_cost = new_vse->cost / static_cast(curr_row+1); + for (int c = curr_col; c <= curr_row; ++c) { + if (avg_cost < (*best_path_by_column)[c].avg_cost) { + (*best_path_by_column)[c].avg_cost = avg_cost; + (*best_path_by_column)[c].best_vse = new_vse; + (*best_path_by_column)[c].best_b = b; + if (language_model_debug_level > 0) { + tprintf("Set best_path_by_column[%d]=(%g %p)\n", + c, avg_cost, new_vse); + } + } + } + } + return changed; +} + +void LanguageModel::PrintViterbiStateEntry( + const char *msg, ViterbiStateEntry *vse, + BLOB_CHOICE *b, CHUNKS_RECORD *chunks_record) { + tprintf("%s ViterbiStateEntry %p with ratings_sum=%.4f length=%d cost=%.4f", + msg, vse, vse->ratings_sum, vse->length, vse->cost); + if (vse->top_choice_flags) { + tprintf(" top_choice_flags=0x%x", vse->top_choice_flags); + } + if (!vse->Consistent()) { + tprintf(" inconsistent=(punc %d case %d chartype %d script %d)\n", + vse->consistency_info.NumInconsistentPunc(), + vse->consistency_info.NumInconsistentCase(), + vse->consistency_info.NumInconsistentChartype(), + vse->consistency_info.inconsistent_script); + } + if (vse->dawg_info) tprintf(" permuter=%d", vse->dawg_info->permuter); + if (vse->ngram_info) { + tprintf(" ngram_cost=%g context=%s ngram pruned=%d", + vse->ngram_info->ngram_cost, + vse->ngram_info->context.string(), + vse->ngram_info->pruned); + } + if (vse->associate_stats.shape_cost > 0.0f) { + tprintf(" shape_cost=%g", vse->associate_stats.shape_cost); + } + if (language_model_debug_level > 3) { + STRING wd_str; + WERD_CHOICE *wd = ConstructWord(b, vse, chunks_record, + NULL, NULL, NULL, NULL); + wd->string_and_lengths(dict_->getUnicharset(), &wd_str, NULL); + delete wd; + tprintf(" str=%s", wd_str.string()); + } + tprintf("\n"); +} + +void LanguageModel::GenerateTopChoiceInfo( + float ratings_sum, + const LanguageModelDawgInfo *dawg_info, + const LanguageModelConsistencyInfo &consistency_info, + const ViterbiStateEntry *parent_vse, + BLOB_CHOICE *b, + LanguageModelFlagsType *top_choice_flags, + LanguageModelFlagsType *changed) { + ratings_sum = ComputeConsistencyAdjustedRatingsSum( + ratings_sum, dawg_info, consistency_info); + // Clear flags that do not agree with parent_vse->top_choice_flags. + if (parent_vse != NULL) *top_choice_flags &= parent_vse->top_choice_flags; + if (consistency_info.Consistent()) *top_choice_flags |= kConsistentFlag; + if (*top_choice_flags == 0x0) return; + LanguageModelState *lms = + reinterpret_cast(b->language_model_state()); + if (lms != NULL && !lms->viterbi_state_entries.empty()) { + ViterbiStateEntry_IT vit(&(lms->viterbi_state_entries)); + for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { + if (ratings_sum >= ComputeConsistencyAdjustedRatingsSum( + vit.data()->ratings_sum, vit.data()->dawg_info, + vit.data()->consistency_info)) { + // Clear the appropriate flags if the list already contains + // a top choice entry with a lower cost. + *top_choice_flags &= ~(vit.data()->top_choice_flags); + } + } + } + if (language_model_debug_level > 0) { + tprintf("GenerateTopChoiceInfo: top_choice_flags=0x%x\n", + *top_choice_flags); + } + *changed |= *top_choice_flags; +} + +LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo( + bool word_end, int script_id, + int curr_col, int curr_row, + const BLOB_CHOICE &b, + const ViterbiStateEntry *parent_vse, + LanguageModelFlagsType *changed) { + // Initialize active_dawgs and constraints from parent_vse if it is not NULL, + // otherwise use beginning_active_dawgs_ and beginning_constraints_. + if (parent_vse == NULL) { + dawg_args_->active_dawgs = beginning_active_dawgs_; + dawg_args_->constraints = beginning_constraints_; + dawg_args_->permuter = NO_PERM; + } else { + if (parent_vse->dawg_info == NULL) return NULL; // not a dict word path + dawg_args_->active_dawgs = parent_vse->dawg_info->active_dawgs; + dawg_args_->constraints = parent_vse->dawg_info->constraints; + dawg_args_->permuter = parent_vse->dawg_info->permuter; + } + bool use_fixed_length_dawgs = UseFixedLengthDawgs(script_id); + + // Deal with hyphenated words. + if (!use_fixed_length_dawgs && word_end && + dict_->has_hyphen_end(b.unichar_id(), curr_col == 0)) { + if (language_model_debug_level > 0) tprintf("Hyphenated word found\n"); + *changed |= kDawgFlag; + return new LanguageModelDawgInfo(dawg_args_->active_dawgs, + dawg_args_->constraints, + COMPOUND_PERM); + } + + // Deal with compound words. + if (!use_fixed_length_dawgs && dict_->compound_marker(b.unichar_id()) && + (parent_vse == NULL || parent_vse->dawg_info->permuter != NUMBER_PERM)) { + if (language_model_debug_level > 0) tprintf("Found compound marker"); + // Do not allow compound operators at the beginning and end of the word. + // Do not allow more than one compound operator per word. + // Do not allow compounding of words with lengths shorter than + // language_model_min_compound_length + if (parent_vse == NULL || word_end || + dawg_args_->permuter == COMPOUND_PERM || + parent_vse->length < language_model_min_compound_length) return NULL; + + int i; + // Check a that the path terminated before the current character is a word. + bool has_word_ending = false; + for (i = 0; i < parent_vse->dawg_info->active_dawgs->size(); ++i) { + const DawgInfo &info = (*parent_vse->dawg_info->active_dawgs)[i]; + const Dawg *pdawg = dict_->GetDawg(info.dawg_index); + assert(pdawg != NULL); + if (pdawg->type() == DAWG_TYPE_WORD && info.ref != NO_EDGE && + pdawg->end_of_word(info.ref)) { + has_word_ending = true; + break; + } + } + if (!has_word_ending) return NULL; + + // Return LanguageModelDawgInfo with active_dawgs set to + // the beginning dawgs of type DAWG_TYPE_WORD. + if (language_model_debug_level > 0) tprintf("Compound word found\n"); + DawgInfoVector beginning_word_dawgs; + for (i = 0; i < beginning_active_dawgs_->size(); ++i) { + const Dawg *bdawg = + dict_->GetDawg((*beginning_active_dawgs_)[i].dawg_index); + if (bdawg->type() == DAWG_TYPE_WORD) { + beginning_word_dawgs += (*beginning_active_dawgs_)[i]; + } + } + *changed |= kDawgFlag; + return new LanguageModelDawgInfo(&(beginning_word_dawgs), + dawg_args_->constraints, + COMPOUND_PERM); + } // done dealing with compound words + + LanguageModelDawgInfo *dawg_info = NULL; + + // Call LetterIsOkay(). + dict_->LetterIsOkay(dawg_args_, b.unichar_id(), word_end); + if (dawg_args_->permuter != NO_PERM) { + *changed |= kDawgFlag; + dawg_info = new LanguageModelDawgInfo(dawg_args_->updated_active_dawgs, + dawg_args_->updated_constraints, + dawg_args_->permuter); + } + + // For non-space delimited languages: since every letter could be + // a valid word, a new word could start at every unichar. Thus append + // fixed_length_beginning_active_dawgs_ to dawg_info->active_dawgs. + if (use_fixed_length_dawgs) { + if (dawg_info == NULL) { + *changed |= kDawgFlag; + dawg_info = new LanguageModelDawgInfo( + fixed_length_beginning_active_dawgs_, + empty_dawg_info_vec_, SYSTEM_DAWG_PERM); + } else { + *(dawg_info->active_dawgs) += *(fixed_length_beginning_active_dawgs_); + } + } // done dealing with fixed-length dawgs + + return dawg_info; +} + +LanguageModelNgramInfo *LanguageModel::GenerateNgramInfo( + const char *unichar, float certainty, float denom, + int curr_col, int curr_row, + const ViterbiStateEntry *parent_vse, + BLOB_CHOICE *parent_b, + LanguageModelFlagsType *changed) { + // Initialize parent context. + const char *pcontext_ptr = ""; + int pcontext_unichar_step_len = 0; + if (parent_vse == NULL) { + pcontext_ptr = prev_word_str_.string(); + pcontext_unichar_step_len = prev_word_unichar_step_len_; + } else { + pcontext_ptr = parent_vse->ngram_info->context.string(); + pcontext_unichar_step_len = + parent_vse->ngram_info->context_unichar_step_len; + } + // Compute p(unichar | parent context). + int unichar_step_len = 0; + bool pruned = false; + float ngram_cost = ComputeNgramCost(unichar, certainty, denom, + pcontext_ptr, &unichar_step_len, + &pruned); + // First attempt to normalize ngram_cost for strings of different + // lengths - we multiply ngram_cost by P(char | context) as many times + // as the number of chunks occupied by char. This makes the ngram costs + // of all the paths ending at the current BLOB_CHOICE comparable. + // TODO(daria): it would be worth looking at different ways of normalization. + if (curr_row > curr_col) ngram_cost += (curr_row - curr_col) * ngram_cost; + // Add the ngram_cost of the parent. + if (parent_vse != NULL) ngram_cost += parent_vse->ngram_info->ngram_cost; + + // Shorten parent context string by unichar_step_len unichars. + int num_remove = (unichar_step_len + pcontext_unichar_step_len - + language_model_ngram_order); + if (num_remove > 0) pcontext_unichar_step_len -= num_remove; + while (num_remove > 0 && *pcontext_ptr != '\0') { + pcontext_ptr += UNICHAR::utf8_step(pcontext_ptr); + --num_remove; + } + + // Decide whether to prune this ngram path and update changed accordingly. + if (parent_vse != NULL && parent_vse->ngram_info->pruned) pruned = true; + if (!pruned) *changed |= kNgramFlag; + + // Construct and return the new LanguageModelNgramInfo. + LanguageModelNgramInfo *ngram_info = new LanguageModelNgramInfo( + pcontext_ptr, pcontext_unichar_step_len, pruned, ngram_cost); + ngram_info->context += unichar; + ngram_info->context_unichar_step_len += unichar_step_len; + assert(ngram_info->context_unichar_step_len <= language_model_ngram_order); + return ngram_info; +} + +float LanguageModel::ComputeNgramCost(const char *unichar, + float certainty, + float denom, + const char *context, + int *unichar_step_len, + bool *found_small_prob) { + const char *context_ptr = context; + char *modified_context = NULL; + char *modified_context_end = NULL; + const char *unichar_ptr = unichar; + const char *unichar_end = unichar_ptr + strlen(unichar_ptr); + float prob = 0.0f; + int step = 0; + while (unichar_ptr < unichar_end && + (step = UNICHAR::utf8_step(unichar_ptr)) > 0) { + if (language_model_debug_level > 1) { + tprintf("prob(%s | %s)=%g\n", unichar_ptr, context_ptr, + dict_->ProbabilityInContext(context_ptr, -1, unichar_ptr, step)); + } + prob += dict_->ProbabilityInContext(context_ptr, -1, unichar_ptr, step); + ++(*unichar_step_len); + if (language_model_ngram_use_only_first_uft8_step) break; + unichar_ptr += step; + // If there are multiple UTF8 characters present in unichar, context is + // updated to include the previously examined characters from str, + // unless use_only_first_uft8_step is true. + if (unichar_ptr < unichar_end) { + if (modified_context == NULL) { + int context_len = strlen(context); + modified_context = + new char[context_len + strlen(unichar_ptr) + step + 1]; + strncpy(modified_context, context, context_len); + modified_context_end = modified_context + context_len; + context_ptr = modified_context; + } + strncpy(modified_context_end, unichar_ptr - step, step); + modified_context_end += step; + *modified_context_end = '\0'; + } + } + if (prob < language_model_ngram_small_prob) { + if (language_model_debug_level > 0) tprintf("Found small prob %g\n", prob); + *found_small_prob = true; + if (prob < MIN_FLOAT32) prob = MIN_FLOAT32; + } + float cost = -(log(CertaintyScore(certainty) / denom) + + language_model_ngram_scale_factor * + log(prob / static_cast(*unichar_step_len))); + if (language_model_debug_level > 1) { + tprintf("-log [ p(%s) * p(%s | %s) ] = -log(%g*%g) = %g\n", unichar, + unichar, context_ptr, CertaintyScore(certainty)/denom, + prob/static_cast(*unichar_step_len), cost); + } + if (modified_context != NULL) delete[] modified_context; + return cost; +} + +float LanguageModel::ComputeDenom(BLOB_CHOICE_LIST *curr_list) { + if (curr_list->empty()) return 1.0f; + float denom = 0.0f; + int len = 0; + BLOB_CHOICE_IT c_it(curr_list); + for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { + ASSERT_HOST(c_it.data() != NULL); + ++len; + denom += CertaintyScore(c_it.data()->certainty()); + } + assert(len != 0); + // The ideal situation would be to have the classifier scores for + // classifying each position as each of the characters in the unicharset. + // Since we can not do this because of speed, we add a very crude estimate + // of what these scores for the "missing" classifications would sum up to. + denom += (dict_->getUnicharset().size() - len) * + CertaintyScore(language_model_ngram_nonmatch_score); + + return denom; +} + +void LanguageModel::FillConsistencyInfo( + bool word_end, + UNICHAR_ID unichar_id, + ViterbiStateEntry *parent_vse, + BLOB_CHOICE *parent_b, + LanguageModelConsistencyInfo *consistency_info) { + const UNICHARSET &unicharset = dict_->getUnicharset(); + if (parent_vse != NULL) *consistency_info = parent_vse->consistency_info; + + // Check punctuation validity. + if (unicharset.get_ispunctuation(unichar_id)) consistency_info->num_punc++; + if (dict_->GetPuncDawg() != NULL && !consistency_info->invalid_punc) { + if (dict_->compound_marker(unichar_id) && parent_b != NULL && + (unicharset.get_isalpha(parent_b->unichar_id()) || + unicharset.get_isdigit(parent_b->unichar_id()))) { + // reset punc_ref for compound words + consistency_info->punc_ref = NO_EDGE; + } else { + UNICHAR_ID pattern_unichar_id = + (unicharset.get_isalpha(unichar_id) || + unicharset.get_isdigit(unichar_id)) ? + Dawg::kPatternUnicharID : unichar_id; + if (consistency_info->punc_ref == NO_EDGE || + pattern_unichar_id != Dawg::kPatternUnicharID || + dict_->GetPuncDawg()->edge_letter(consistency_info->punc_ref) != + Dawg::kPatternUnicharID) { + NODE_REF node = Dict::GetStartingNode(dict_->GetPuncDawg(), + consistency_info->punc_ref); + consistency_info->punc_ref = + (node != NO_EDGE) ? dict_->GetPuncDawg()->edge_char_of( + node, pattern_unichar_id, word_end) : NO_EDGE; + if (consistency_info->punc_ref == NO_EDGE) { + consistency_info->invalid_punc = true; + } + } + } + } + + // Update case related counters. + if (parent_vse != NULL && !word_end && dict_->compound_marker(unichar_id)) { + // Reset counters if we are dealing with a compound word. + consistency_info->num_lower = 0; + consistency_info->num_non_first_upper = 0; + } + else if (unicharset.get_islower(unichar_id)) { + consistency_info->num_lower++; + } else if ((parent_b != NULL) && unicharset.get_isupper(unichar_id)) { + if (unicharset.get_isupper(parent_b->unichar_id()) || + consistency_info->num_lower > 0 || + consistency_info->num_non_first_upper > 0) { + consistency_info->num_non_first_upper++; + } + } + + // Initialize consistency_info->script_id (use script of unichar_id + // if it is not Common, use script id recorded by the parent otherwise). + // Set inconsistent_script to true if the script of the current unichar + // is not consistent with that of the parent. + consistency_info->script_id = unicharset.get_script(unichar_id); + // Hiragana and Katakana can mix with Han. + if (dict_->getUnicharset().han_sid() != dict_->getUnicharset().null_sid()) { + if ((unicharset.hiragana_sid() != unicharset.null_sid() && + consistency_info->script_id == unicharset.hiragana_sid()) || + (unicharset.katakana_sid() != unicharset.null_sid() && + consistency_info->script_id == unicharset.katakana_sid())) { + consistency_info->script_id = dict_->getUnicharset().han_sid(); + } + } + + if (parent_vse != NULL && + (parent_vse->consistency_info.script_id != + dict_->getUnicharset().common_sid())) { + int parent_script_id = parent_vse->consistency_info.script_id; + // If we are dealing with Han script, unichars from Common + // script are only ok at the beginning and the end of words. + bool han_inconsistent = + (IsHan(parent_script_id) && !word_end && + (consistency_info->script_id == dict_->getUnicharset().common_sid())); + // If script_id is Common, use script id of the parent instead. + if (consistency_info->script_id == dict_->getUnicharset().common_sid()) { + consistency_info->script_id = parent_script_id; + } + if (consistency_info->script_id != parent_script_id || han_inconsistent) { + consistency_info->inconsistent_script = true; + } + } + + // Update chartype related counters. + if (unicharset.get_isalpha(unichar_id)) { + consistency_info->num_alphas++; + } else if (unicharset.get_isdigit(unichar_id)) { + consistency_info->num_digits++; + } else if (!unicharset.get_ispunctuation(unichar_id)) { + consistency_info->num_other++; + } +} + +float LanguageModel::ComputeAdjustedPathCost( + float ratings_sum, int length, float dawg_score, + const LanguageModelDawgInfo *dawg_info, + const LanguageModelNgramInfo *ngram_info, + const LanguageModelConsistencyInfo &consistency_info, + const AssociateStats &associate_stats, + ViterbiStateEntry *parent_vse) { + float adjustment = 1.0f; + if (dawg_info == NULL || dawg_info->permuter != FREQ_DAWG_PERM) { + adjustment += language_model_penalty_non_freq_dict_word; + } + if (dawg_score == 0.0f) { + adjustment += language_model_penalty_non_dict_word; + if (length > language_model_min_compound_length) { + adjustment += ((length - language_model_min_compound_length) * + language_model_penalty_increment); + } + } else if (dawg_score < 1.0f) { + adjustment += (1.0f - dawg_score) * language_model_penalty_non_dict_word; + } + if (associate_stats.shape_cost > 0) { + adjustment += associate_stats.shape_cost / static_cast(length); + } + if (language_model_ngram_on) { + ASSERT_HOST(ngram_info != NULL); + return ngram_info->ngram_cost * adjustment; + } else { + adjustment += ComputeConsistencyAdjustment(dawg_info, consistency_info); + return ratings_sum * adjustment; + } +} + +void LanguageModel::UpdateBestChoice( + BLOB_CHOICE *b, + ViterbiStateEntry *vse, + HEAP *pain_points, + CHUNKS_RECORD *chunks_record, + BestChoiceBundle *best_choice_bundle) { + int i; + BLOB_CHOICE_LIST_VECTOR temp_best_char_choices(vse->length); + for (i = 0; i < vse->length; ++i) { + temp_best_char_choices.push_back(NULL); + } + float *certainties = new float[vse->length]; + STATE temp_state; + // The fraction of letters in the path that are "covered" by dawgs. + // For space delimited languages this number will be 0.0 for nonwords + // and 1.0 for dictionary words. For non-space delimited languages + // this number will be in the [0.0, 1.0] range. + float dawg_score; + WERD_CHOICE *word = ConstructWord(b, vse, chunks_record, + &temp_best_char_choices, certainties, + &dawg_score, &temp_state); + ASSERT_HOST(word != NULL); + + // Log new segmentation (for dict_->LogNewChoice()). + PIECES_STATE pieces_widths; + bin_to_pieces(&temp_state, chunks_record->ratings->dimension() - 1, + pieces_widths); + dict_->LogNewSegmentation(pieces_widths); + + if (language_model_debug_level > 0) { + STRING word_str; + word->string_and_lengths(dict_->getUnicharset(), &word_str, NULL); + tprintf("UpdateBestChoice() constructed word %s\n", word_str.string()); + if (language_model_debug_level > 2) word->print(); + }; + + // Update raw_choice if needed. + if ((vse->top_choice_flags & kSmallestRatingFlag) && + word->rating() < best_choice_bundle->raw_choice->rating()) { + dict_->LogNewChoice(1.0, certainties, true, word); + *(best_choice_bundle->raw_choice) = *word; + best_choice_bundle->raw_choice->set_permuter(TOP_CHOICE_PERM); + best_choice_bundle->raw_choice->populate_unichars(dict_->getUnicharset()); + if (language_model_debug_level > 0) tprintf("Updated raw choice\n"); + } + + // When working with non-space delimited languages we re-adjust the cost + // taking into account the final dawg_score and a more precise shape cost. + // While constructing the paths we assume that they are all dictionary words + // (since a single character would be a valid dictionary word). At the end + // we compute dawg_score which reflects how many characters on the path are + // "covered" by dictionary words of length > 1. + if (vse->associate_stats.full_wh_ratio_var != 0.0f || + (dict_->GetMaxFixedLengthDawgIndex() >= 0 && dawg_score < 1.0f)) { + vse->cost = ComputeAdjustedPathCost( + vse->ratings_sum, vse->length, dawg_score, vse->dawg_info, + vse->ngram_info, vse->consistency_info, vse->associate_stats, + vse->parent_vse); + if (language_model_debug_level > 0) { + tprintf("Updated vse cost to %g (dawg_score %g full_wh_ratio_var %g)\n", + vse->cost, dawg_score, vse->associate_stats.full_wh_ratio_var); + } + } + + // Update best choice and best char choices if needed. + // TODO(daria): re-write AcceptableChoice() and NoDangerousAmbig() + // to fit better into the new segmentation search. + word->set_rating(vse->cost); + if (word->rating() < best_choice_bundle->best_choice->rating()) { + dict_->LogNewChoice(vse->cost / (language_model_ngram_on ? + vse->ngram_info->ngram_cost : + vse->ratings_sum), + certainties, false, word); + // Since the rating of the word could have been modified by + // Dict::LogNewChoice() - check again. + if (word->rating() < best_choice_bundle->best_choice->rating()) { + bool modified_blobs; // not used + DANGERR fixpt; + if (dict_->AcceptableChoice(&temp_best_char_choices, word, &fixpt, + ASSOCIATOR_CALLER, &modified_blobs) && + AcceptablePath(*vse)) { + acceptable_choice_found_ = true; + } + // Update best_choice_bundle. + *(best_choice_bundle->best_choice) = *word; + best_choice_bundle->best_choice->populate_unichars( + dict_->getUnicharset()); + best_choice_bundle->updated = true; + best_choice_bundle->best_char_choices->delete_data_pointers(); + best_choice_bundle->best_char_choices->clear(); + for (i = 0; i < temp_best_char_choices.size(); ++i) { + BLOB_CHOICE_LIST *cc_list = new BLOB_CHOICE_LIST(); + cc_list->deep_copy(temp_best_char_choices[i], &BLOB_CHOICE::deep_copy); + best_choice_bundle->best_char_choices->push_back(cc_list); + } + best_choice_bundle->best_state->part2 = temp_state.part2; + best_choice_bundle->best_state->part1 = temp_state.part1; + if (language_model_debug_level > 0) { + tprintf("Updated best choice\n"); + print_state("New state ", best_choice_bundle->best_state, + chunks_record->ratings->dimension()-1); + } + // Update hyphen state if we are dealing with a dictionary word. + if (vse->dawg_info != NULL && dict_->GetMaxFixedLengthDawgIndex() < 0) { + if (dict_->has_hyphen_end(*word)) { + dict_->set_hyphen_word(*word, *(dawg_args_->active_dawgs), + *(dawg_args_->constraints)); + } else { + dict_->reset_hyphen_vars(true); + } + } + best_choice_bundle->best_vse = vse; + best_choice_bundle->best_b = b; + best_choice_bundle->fixpt = fixpt; + } + } + + // Clean up. + delete[] certainties; + delete word; +} + +WERD_CHOICE *LanguageModel::ConstructWord( + BLOB_CHOICE *b, + ViterbiStateEntry *vse, + CHUNKS_RECORD *chunks_record, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, + float certainties[], + float *dawg_score, + STATE *state) { + uinT64 state_uint = 0x0; + // Construct a WERD_CHOICE by tracing parent pointers. + WERD_CHOICE *word = new WERD_CHOICE(vse->length); + word->set_length(vse->length); + const uinT64 kHighestBitOn = 0x8000000000000000LL; + BLOB_CHOICE *curr_b = b; + LanguageModelState *curr_lms = + reinterpret_cast(b->language_model_state()); + ViterbiStateEntry *curr_vse = vse; + + int i; + bool compound = dict_->hyphenated(); // treat hyphenated words as compound + bool dawg_score_done = true; + if (dawg_score != NULL) { + *dawg_score = 0.0f; + // For space-delimited languages the presence of dawg_info in the last + // ViterbyStateEntry on the path means that the whole path represents + // a valid dictionary word. + if (dict_->GetMaxFixedLengthDawgIndex() < 0) { + if (vse->dawg_info != NULL) *dawg_score = 1.0f; + } else if (vse->length == 1) { + *dawg_score = 1.0f; // each one-letter word is legal + dawg_score_done = true; // in non-space delimited languages + } else { + dawg_score_done = false; // do more work to compute dawg_score + } + } + // For non-space delimited languages we compute the fraction of letters + // "covered" by fixed length dawgs (i.e. words of length > 1 on the path). + int covered_by_fixed_length_dawgs = 0; + // The number of unichars remaining that should be skipped because + // they are covered by the previous word from fixed length dawgs. + int fixed_length_num_unichars_to_skip = 0; + + // Re-compute the variance of the width-to-hight ratios (since we now + // can compute the mean over the whole word). + float full_wh_ratio_mean = 0.0f; + if (vse->associate_stats.full_wh_ratio_var != 0.0f) { + vse->associate_stats.shape_cost -= vse->associate_stats.full_wh_ratio_var; + full_wh_ratio_mean = (vse->associate_stats.full_wh_ratio_total / + static_cast(vse->length)); + vse->associate_stats.full_wh_ratio_var = 0.0f; + } + + for (i = (vse->length-1); i >= 0; --i) { + word->set_unichar_id(curr_b->unichar_id(), i); + word->set_fragment_length(1, i); + if (certainties != NULL) certainties[i] = curr_b->certainty(); + if (best_char_choices != NULL) { + best_char_choices->set(chunks_record->ratings->get( + curr_lms->contained_in_col, curr_lms->contained_in_row), i); + } + if (state != NULL) { + // Record row minus col zeroes in the reverse state to mark the number + // of joins done by using a blob from this cell in the ratings matrix. + state_uint >>= (curr_lms->contained_in_row - curr_lms->contained_in_col); + // Record a one in the reverse state to indicate the split before + // the blob from the next cell in the ratings matrix (unless we are + // at the first cell, in which case there is no next blob). + if (i != 0) { + state_uint >>= 1; + state_uint |= kHighestBitOn; + } + } + // For non-space delimited languages: find word endings recorded while + // trying to separate the current path into words (for words found in + // fixed length dawgs. + if (!dawg_score_done && curr_vse->dawg_info != NULL) { + UpdateCoveredByFixedLengthDawgs(*(curr_vse->dawg_info->active_dawgs), + i, vse->length, + &fixed_length_num_unichars_to_skip, + &covered_by_fixed_length_dawgs, + dawg_score, &dawg_score_done); + } + // Update the width-to-height ratio variance. Useful non-space delimited + // languages to ensure that the blobs are of uniform width. + // Skip leading and trailing punctuation when computing the variance. + if ((full_wh_ratio_mean != 0.0f && + ((curr_vse != vse && curr_vse->parent_vse != NULL) || + !dict_->getUnicharset().get_ispunctuation(curr_b->unichar_id())))) { + vse->associate_stats.full_wh_ratio_var += + pow(full_wh_ratio_mean - curr_vse->associate_stats.full_wh_ratio, 2); + if (language_model_debug_level > 2) { + tprintf("full_wh_ratio_var += (%g-%g)^2\n", + full_wh_ratio_mean, curr_vse->associate_stats.full_wh_ratio); + } + } + + // Mark the word as compound if compound permuter was set for any of + // the unichars on the path (usually this will happen for unichars + // that are compounding operators, like "-" and "/"). + if (!compound && curr_vse->dawg_info && + curr_vse->dawg_info->permuter == COMPOUND_PERM) compound = true; + + // Update curr_* pointers. + if (curr_vse->parent_b == NULL) break; + curr_b = curr_vse->parent_b; + curr_lms = + reinterpret_cast(curr_b->language_model_state()); + curr_vse = curr_vse->parent_vse; + } + ASSERT_HOST(i == 0); // check that we recorded all the unichar ids + // Re-adjust shape cost to include the updated width-to-hight variance. + if (full_wh_ratio_mean != 0.0f) { + vse->associate_stats.shape_cost += vse->associate_stats.full_wh_ratio_var; + } + + if (state != NULL) { + state_uint >>= (64 - (chunks_record->ratings->dimension()-1)); + state->part2 = state_uint; + state_uint >>= 32; + state->part1 = state_uint; + } + word->set_rating(vse->ratings_sum); + word->set_certainty(vse->min_certainty); + if (vse->dawg_info != NULL && dict_->GetMaxFixedLengthDawgIndex() < 0) { + word->set_permuter(compound ? COMPOUND_PERM : vse->dawg_info->permuter); + } else if (language_model_ngram_on && !vse->ngram_info->pruned) { + word->set_permuter(NGRAM_PERM); + } else if (vse->top_choice_flags) { + word->set_permuter(TOP_CHOICE_PERM); + } else { + word->set_permuter(NO_PERM); + } + return word; +} + +void LanguageModel::UpdateCoveredByFixedLengthDawgs( + const DawgInfoVector &active_dawgs, int word_index, int word_length, + int *skip, int *covered, float *dawg_score, bool *dawg_score_done) { + if (language_model_debug_level > 3) { + tprintf("UpdateCoveredByFixedLengthDawgs for index %d skip=%d\n", + word_index, *skip, word_length); + } + + if (*skip > 0) { + --(*skip); + } else { + int best_index = -1; + for (int d = 0; d < active_dawgs.size(); ++d) { + int dawg_index = (active_dawgs[d]).dawg_index; + if (dawg_index > dict_->GetMaxFixedLengthDawgIndex()) { + // If active_dawgs of the last ViterbiStateEntry on the path + // contain a non-fixed length dawg, this means that the whole + // path represents a word from a non-fixed length word dawg. + if (word_index == (word_length - 1)) { + *dawg_score = 1.0f; + *dawg_score_done = true; + return; + } + } else if (dawg_index >= kMinFixedLengthDawgLength) { + const Dawg *curr_dawg = dict_->GetDawg(dawg_index); + ASSERT_HOST(curr_dawg != NULL); + if ((active_dawgs[d]).ref != NO_EDGE && + curr_dawg->end_of_word((active_dawgs[d]).ref) && + dawg_index > best_index) { + best_index = dawg_index; + } + + if (language_model_debug_level > 3) { + tprintf("dawg_index %d, ref %d, eow %d\n", dawg_index, + (active_dawgs[d]).ref, + ((active_dawgs[d]).ref != NO_EDGE && + curr_dawg->end_of_word((active_dawgs[d]).ref))); + } + } + } // end for + if (best_index != -1) { + *skip = best_index - 1; + *covered += best_index; + } + } // end if/else skip + + if (word_index == 0) { + ASSERT_HOST(*covered <= word_length); + *dawg_score = (static_cast(*covered) / + static_cast(word_length)); + *dawg_score_done = true; + } +} + +void LanguageModel::GeneratePainPointsFromColumn( + int col, + const GenericVector &non_empty_rows, + float best_choice_cert, + HEAP *pain_points, + BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record) { + for (int i = 0; i < non_empty_rows.length(); ++i) { + int row = non_empty_rows[i]; + if (language_model_debug_level > 0) { + tprintf("\nLooking for pain points in col=%d row=%d\n", col, row); + } + if (language_model_ngram_on) { + GenerateNgramModelPainPointsFromColumn( + col, row, pain_points, chunks_record); + } else { + GenerateProblematicPathPainPointsFromColumn( + col, row, best_choice_cert, pain_points, + best_path_by_column, chunks_record); + } + } +} + +void LanguageModel::GenerateNgramModelPainPointsFromColumn( + int col, int row, HEAP *pain_points, CHUNKS_RECORD *chunks_record) { + // Find the first top choice path recorded for this cell. + // If this path is prunned - generate a pain point. + ASSERT_HOST(chunks_record->ratings->get(col, row) != NULL); + BLOB_CHOICE_IT bit(chunks_record->ratings->get(col, row)); + bool fragmented = false; + for (bit.mark_cycle_pt(); !bit.cycled_list(); bit.forward()) { + if (dict_->getUnicharset().get_fragment(bit.data()->unichar_id())) { + fragmented = true; + continue; + } + LanguageModelState *lms = reinterpret_cast( + bit.data()->language_model_state()); + if (lms == NULL) continue; + ViterbiStateEntry_IT vit(&(lms->viterbi_state_entries)); + for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { + const ViterbiStateEntry *vse = vit.data(); + if (!vse->top_choice_flags) continue; + ASSERT_HOST(vse->ngram_info != NULL); + if (vse->ngram_info->pruned && (vse->parent_vse == NULL || + !vse->parent_vse->ngram_info->pruned)) { + if (vse->parent_vse != NULL) { + LanguageModelState *pp_lms = reinterpret_cast( + vse->parent_b->language_model_state()); + GeneratePainPoint(pp_lms->contained_in_col, row, false, + kDefaultPainPointPriorityAdjustment, + -1.0f, fragmented, -1.0f, + max_char_wh_ratio_, + vse->parent_vse->parent_b, + vse->parent_vse->parent_vse, + chunks_record, pain_points); + } + if (vse->parent_vse != NULL && + vse->parent_vse->parent_vse != NULL && + dict_->getUnicharset().get_ispunctuation( + vse->parent_b->unichar_id())) { + // If the dip in the ngram probability is due to punctuation in the + // middle of the word - go two unichars back to combine this + // puntuation mark with the previous character on the path. + LanguageModelState *pp_lms = reinterpret_cast( + vse->parent_vse->parent_b->language_model_state()); + GeneratePainPoint(pp_lms->contained_in_col, col-1, false, + kDefaultPainPointPriorityAdjustment, + -1.0f, fragmented, -1.0f, + max_char_wh_ratio_, + vse->parent_vse->parent_vse->parent_b, + vse->parent_vse->parent_vse->parent_vse, + chunks_record, pain_points); + } else if (row+1 < chunks_record->ratings->dimension()) { + GeneratePainPoint(col, row+1, true, + kDefaultPainPointPriorityAdjustment, + -1.0f, fragmented, -1.0f, + max_char_wh_ratio_, + vse->parent_b, + vse->parent_vse, + chunks_record, pain_points); + } + } + return; // examined the lowest cost top choice path + } + } +} + +void LanguageModel::GenerateProblematicPathPainPointsFromColumn( + int col, int row, float best_choice_cert, + HEAP *pain_points, BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record) { + MATRIX *ratings = chunks_record->ratings; + + // Get the best path from this matrix cell. + BLOB_CHOICE_LIST *blist = ratings->get(col, row); + ASSERT_HOST(blist != NULL); + if (blist->empty()) return; + BLOB_CHOICE_IT bit(blist); + bool fragment = false; + while (dict_->getUnicharset().get_fragment(bit.data()->unichar_id()) && + !bit.at_last()) { // skip fragments + fragment = true; + bit.forward(); + } + if (bit.data()->language_model_state() == NULL) return; + ViterbiStateEntry_IT vit(&(reinterpret_cast( + bit.data()->language_model_state())->viterbi_state_entries)); + if (vit.empty()) return; + ViterbiStateEntry *vse = vit.data(); + // Check whether this path is promising. + bool path_is_promising = true; + if (vse->parent_vse != NULL) { + float potential_avg_cost = + ((vse->parent_vse->cost + bit.data()->rating()*0.5f) / + static_cast(row+1)); + if (language_model_debug_level > 0) { + tprintf("potential_avg_cost %g best cost %g\n", + potential_avg_cost, (*best_path_by_column)[col].avg_cost); + } + if (potential_avg_cost >= (*best_path_by_column)[col].avg_cost) { + path_is_promising = false; + } + } + // Set best_parent_vse to the best parent recorded in best_path_by_column. + ViterbiStateEntry *best_parent_vse = vse->parent_vse; + BLOB_CHOICE *best_parent_b = vse->parent_b; + if (col > 0 && (*best_path_by_column)[col-1].best_vse != NULL) { + ASSERT_HOST((*best_path_by_column)[col-1].best_b != NULL); + LanguageModelState *best_lms = reinterpret_cast( + ((*best_path_by_column)[col-1].best_b)->language_model_state()); + if (best_lms->contained_in_row == col-1) { + best_parent_vse = (*best_path_by_column)[col-1].best_vse; + best_parent_b = (*best_path_by_column)[col-1].best_b; + if (language_model_debug_level > 0) { + tprintf("Setting best_parent_vse to %p\n", best_parent_vse); + } + } + } + // Check whether this entry terminates the best parent path + // recorded in best_path_by_column. + bool best_not_prolonged = (best_parent_vse != vse->parent_vse); + + // If this path is problematic because of the last unichar - generate + // a pain point to combine it with its left and right neighbor. + BLOB_CHOICE_IT tmp_bit; + if (best_not_prolonged || + (path_is_promising && + ProblematicPath(*vse, bit.data()->unichar_id(), + row+1 == ratings->dimension()))) { + float worst_piece_cert; + bool fragmented; + if (col-1 > 0) { + GetWorstPieceCertainty(col-1, row, chunks_record->ratings, + &worst_piece_cert, &fragmented); + GeneratePainPoint(col-1, row, false, + kDefaultPainPointPriorityAdjustment, + worst_piece_cert, fragmented, best_choice_cert, + max_char_wh_ratio_, best_parent_b, best_parent_vse, + chunks_record, pain_points); + } + if (row+1 < ratings->dimension()) { + GetWorstPieceCertainty(col, row+1, chunks_record->ratings, + &worst_piece_cert, &fragmented); + GeneratePainPoint(col, row+1, true, kDefaultPainPointPriorityAdjustment, + worst_piece_cert, fragmented, best_choice_cert, + max_char_wh_ratio_, best_parent_b, best_parent_vse, + chunks_record, pain_points); + } + } // for ProblematicPath +} + +void LanguageModel::GeneratePainPointsFromBestChoice( + HEAP *pain_points, + CHUNKS_RECORD *chunks_record, + BestChoiceBundle *best_choice_bundle) { + // Variables to backtrack best_vse path; + ViterbiStateEntry *curr_vse = best_choice_bundle->best_vse; + BLOB_CHOICE *curr_b = best_choice_bundle->best_b; + + // Begins and ends in DANGERR vector record the positions in the blob choice + // list of the best choice. We need to translate these endpoints into the + // beginning column and ending row for the pain points. We maintain + // danger_begin and danger_end arrays indexed by the position in + // best_choice_bundle->best_char_choices (which is equal to the position + // on the best_choice_bundle->best_vse path). + // danger_end[d] stores the DANGERR_INFO structs with end==d and is + // initialized at the beginning of this function. + // danger_begin[d] stores the DANGERR_INFO struct with begin==d and + // has end set to the row of the end of this ambiguity. + // The translation from end in terms of the best choice index to the end row + // is done while following the parents of best_choice_bundle->best_vse. + assert(best_choice_bundle->best_char_choices->length() == + best_choice_bundle->best_vse->length); + DANGERR *danger_begin = NULL; + DANGERR *danger_end = NULL; + int d; + if (!best_choice_bundle->fixpt.empty()) { + danger_begin = new DANGERR[best_choice_bundle->best_vse->length]; + danger_end = new DANGERR[best_choice_bundle->best_vse->length]; + for (d = 0; d < best_choice_bundle->fixpt.size(); ++d) { + const DANGERR_INFO &danger = best_choice_bundle->fixpt[d]; + // Only use n->1 ambiguities. + if (danger.end > danger.begin && !danger.correct_is_ngram && + (!language_model_ngram_on || danger.dangerous)) { + danger_end[danger.end].push_back(danger); + } + } + } + + // Variables to keep track of punctuation/number streaks. + int punc_streak_end_row = -1; + int punc_streak_length = 0; + float punc_streak_min_cert = 0.0f; + + if (language_model_debug_level > 0) { + tprintf("\nGenerating pain points for best path=%p\n", curr_vse); + } + + int word_index = best_choice_bundle->best_vse->length; + while (curr_vse != NULL) { + word_index--; + ASSERT_HOST(word_index >= 0); + ASSERT_HOST(curr_b != NULL); + if (language_model_debug_level > 0) { + tprintf("Looking at unichar %s\n", + dict_->getUnicharset().id_to_unichar(curr_b->unichar_id())); + } + + int pp_col = reinterpret_cast( + curr_b->language_model_state())->contained_in_col; + int pp_row = reinterpret_cast( + curr_b->language_model_state())->contained_in_row; + + // Generate pain points for ambiguities found by NoDangerousAmbig(). + if (danger_end != NULL) { + // Translate end index of an ambiguity to an end row. + for (d = 0; d < danger_end[word_index].size(); ++d) { + danger_end[word_index][d].end = pp_row; + danger_begin[danger_end[word_index][d].begin].push_back( + danger_end[word_index][d]); + } + // Generate a pain point to combine unchars in the "wrong" part + // of the ambiguity. + for (d = 0; d < danger_begin[word_index].size(); ++d) { + if (language_model_debug_level > 0) { + tprintf("Generating pain point from %sambiguity\n", + danger_begin[word_index][d].dangerous ? "dangerous " : ""); + } + GeneratePainPoint(pp_col, danger_begin[word_index][d].end, false, + danger_begin[word_index][d].dangerous ? + kCriticalPainPointPriorityAdjustment : + kBestChoicePainPointPriorityAdjustment, + best_choice_bundle->best_choice->certainty(), true, + best_choice_bundle->best_choice->certainty(), + kLooseMaxCharWhRatio, + curr_vse->parent_b, curr_vse->parent_vse, + chunks_record, pain_points); + } + } + + if (!language_model_ngram_on) { // no need to use further heuristics if we + // are guided by the character ngram model + // Generate pain points for problematic sub-paths. + if (ProblematicPath(*curr_vse, curr_b->unichar_id(), + pp_row+1 == chunks_record->ratings->dimension())) { + if (language_model_debug_level > 0) { + tprintf("Generating pain point from a problematic sub-path\n"); + } + float worst_piece_cert; + bool fragment; + if (pp_col > 0) { + GetWorstPieceCertainty(pp_col-1, pp_row, chunks_record->ratings, + &worst_piece_cert, &fragment); + GeneratePainPoint(pp_col-1, pp_row, false, + kBestChoicePainPointPriorityAdjustment, + worst_piece_cert, true, + best_choice_bundle->best_choice->certainty(), + max_char_wh_ratio_, NULL, NULL, + chunks_record, pain_points); + } + if (pp_row+1 < chunks_record->ratings->dimension()) { + GetWorstPieceCertainty(pp_col, pp_row+1, chunks_record->ratings, + &worst_piece_cert, &fragment); + GeneratePainPoint(pp_col, pp_row+1, true, + kBestChoicePainPointPriorityAdjustment, + worst_piece_cert, true, + best_choice_bundle->best_choice->certainty(), + max_char_wh_ratio_, NULL, NULL, + chunks_record, pain_points); + } + } + + // Generate a pain point if we encountered a punctuation/number streak to + // combine all punctuation marks into one blob and try to classify it. + bool is_alpha = dict_->getUnicharset().get_isalpha(curr_b->unichar_id()); + if (!is_alpha) { + if (punc_streak_end_row == -1) punc_streak_end_row = pp_row; + punc_streak_length++; + if (curr_b->certainty() < punc_streak_min_cert) + punc_streak_min_cert = curr_b->certainty(); + } + if (is_alpha || curr_vse->parent_vse == NULL) { + if (punc_streak_end_row != -1 && punc_streak_length > 1) { + if (language_model_debug_level > 0) { + tprintf("Generating pain point from a punctuation streak\n"); + } + if (is_alpha || + (curr_vse->parent_vse == NULL && punc_streak_length > 2)) { + GeneratePainPoint(pp_row+1, punc_streak_end_row, false, + kBestChoicePainPointPriorityAdjustment, + punc_streak_min_cert, true, + best_choice_bundle->best_choice->certainty(), + max_char_wh_ratio_, curr_b, curr_vse, + chunks_record, pain_points); + } + // Handle punctuation/number streaks starting from the first unichar. + if (curr_vse->parent_vse == NULL) { + GeneratePainPoint(0, punc_streak_end_row, false, + kBestChoicePainPointPriorityAdjustment, + punc_streak_min_cert, true, + best_choice_bundle->best_choice->certainty(), + max_char_wh_ratio_, NULL, NULL, + chunks_record, pain_points); + } + } + punc_streak_end_row = -1; + punc_streak_length = 0; + punc_streak_min_cert = 0.0f; + } // end handling punctuation streaks + } + + curr_b = curr_vse->parent_b; + curr_vse = curr_vse->parent_vse; + } // end looking at best choice subpaths + + // Clean up. + if (danger_end != NULL) { + delete[] danger_begin; + delete[] danger_end; + } +} + +void LanguageModel::GeneratePainPoint( + int col, int row, bool ok_to_extend, float priority, + float worst_piece_cert, bool fragmented, float best_choice_cert, + float max_char_wh_ratio, + BLOB_CHOICE *parent_b, ViterbiStateEntry *parent_vse, + CHUNKS_RECORD *chunks_record, HEAP *pain_points) { + if (col < 0 || row >= chunks_record->ratings->dimension() || + chunks_record->ratings->get(col, row) != NOT_CLASSIFIED) { + return; + } + if (language_model_debug_level > 3) { + tprintf("\nGenerating pain point for col=%d row=%d priority=%g parent=", + col, row, priority); + if (parent_vse != NULL) { + PrintViterbiStateEntry("", parent_vse, parent_b, chunks_record); + } else { + tprintf("NULL"); + } + tprintf("\n"); + } + + AssociateStats associate_stats; + ComputeAssociateStats(col, row, max_char_wh_ratio, parent_vse, + chunks_record, &associate_stats); + // For fixed-pitch fonts/languages: if the current combined blob overlaps + // the next blob on the right and it is ok to extend the blob, try expending + // the blob untill there is no overlap with the next blob on the right or + // until the width-to-hight ratio becomes too large. + if (ok_to_extend) { + while (associate_stats.bad_fixed_pitch_right_gap && + row+1 < chunks_record->ratings->dimension() && + !associate_stats.bad_fixed_pitch_wh_ratio) { + ComputeAssociateStats(col, ++row, max_char_wh_ratio, parent_vse, + chunks_record, &associate_stats); + } + } + + if (associate_stats.bad_shape) { + if (language_model_debug_level > 3) { + tprintf("Discarded pain point with a bad shape\n"); + } + return; + } + + // Compute pain point priority. + if (associate_stats.shape_cost > 0) { + priority *= associate_stats.shape_cost; + } + if (worst_piece_cert < best_choice_cert) { + worst_piece_cert = best_choice_cert; + } + priority *= CertaintyScore(worst_piece_cert); + if (fragmented) priority /= kDefaultPainPointPriorityAdjustment; + + if (language_model_debug_level > 3) { + tprintf("worst_piece_cert=%g fragmented=%d\n", + worst_piece_cert, fragmented); + } + + if (parent_vse != NULL) { + priority *= sqrt(parent_vse->cost / static_cast(col)); + if (parent_vse->dawg_info != NULL) { + priority /= kDefaultPainPointPriorityAdjustment; + if (parent_vse->length > language_model_min_compound_length) { + priority /= sqrt(static_cast(parent_vse->length)); + } + } + } + + MATRIX_COORD *pain_point = new MATRIX_COORD(col, row); + if (HeapPushCheckSize(pain_points, priority, pain_point)) { + if (language_model_debug_level) { + tprintf("Added pain point with priority %g\n", priority); + } + } else { + delete pain_point; + if (language_model_debug_level) tprintf("Pain points heap is full\n"); + } +} + +} // namespace tesseract diff --git a/wordrec/language_model.h b/wordrec/language_model.h new file mode 100644 index 0000000000..3d80235c89 --- /dev/null +++ b/wordrec/language_model.h @@ -0,0 +1,789 @@ +/////////////////////////////////////////////////////////////////////// +// File: language_model.h +// Description: Functions that utilize the knowledge about the properties, +// structure and statistics of the language to help recognition. +// Author: Daria Antonova +// Created: Mon Nov 11 11:26:43 PST 2009 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_H_ +#define TESSERACT_WORDREC_LANGUAGE_MODEL_H_ + +#include "associate.h" +#include "dawg.h" +#include "dict.h" +#include "matrix.h" +#include "oldheap.h" +#include "params.h" + +namespace tesseract { + +// Used for expressing various language model flags. +typedef unsigned char LanguageModelFlagsType; + +// Struct for keeping track of the consistency of the path. +struct LanguageModelConsistencyInfo { + LanguageModelConsistencyInfo() + : punc_ref(NO_EDGE), num_punc(0), invalid_punc(false), + num_non_first_upper(0), num_lower(0), + script_id(0), inconsistent_script(false), + num_alphas(0), num_digits(0), num_other(0) {} + inline int NumInconsistentPunc() const { + return invalid_punc ? num_punc : 0; + } + inline int NumInconsistentCase() const { + return (num_non_first_upper > num_lower) ? num_lower : num_non_first_upper; + } + inline int NumInconsistentChartype() const { + return (NumInconsistentPunc() + num_other + + ((num_alphas > num_digits) ? num_digits : num_alphas)); + } + inline bool Consistent() const { + return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 && + NumInconsistentChartype() == 0 && !inconsistent_script); + } + + EDGE_REF punc_ref; + int num_punc; + bool invalid_punc; + int num_non_first_upper; + int num_lower; + int script_id; + bool inconsistent_script; + int num_alphas; + int num_digits; + int num_other; +}; + + +// The following structs are used for storing the state of the language model +// in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs +// and the links are the replationships between the underlying blobs (see +// segsearch.h for a more detailed description). +// Each of the BLOB_CHOICEs contains LanguageModelState struct, which has +// a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi +// search leading up to and including this BLOB_CHOICE. +// Each ViterbiStateEntry contains information from various components of the +// language model: dawgs in which the path is found, character ngram model +// probability of the path, script/chartype/font consistency info, state for +// language-specific heuristics (e.g. hyphenated and compund words, lower/upper +// case preferences, etc). +// Each ViterbiStateEntry also contains the parent pointer, so that the path +// that it represents (WERD_CHOICE) can be constructed by following these +// parent pointers. + +// Struct for storing additional information used by Dawg language model +// component. It stores the set of active dawgs in which the sequence of +// letters on a path can be found and the constraints that have to be +// satisfied at the end of the word (e.g. beginning/ending punctuation). +struct LanguageModelDawgInfo { + LanguageModelDawgInfo(DawgInfoVector *a, DawgInfoVector *c, + PermuterType pt) : permuter(pt) { + active_dawgs = new DawgInfoVector(*a); + constraints = new DawgInfoVector(*c); + } + ~LanguageModelDawgInfo() { + delete active_dawgs; + delete constraints; + } + DawgInfoVector *active_dawgs; + DawgInfoVector *constraints; + PermuterType permuter; +}; + +// Struct for storing additional information used by Ngram language model +// component. +struct LanguageModelNgramInfo { + LanguageModelNgramInfo(const char *c, int l, bool p, float nc) + : context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc) {} + STRING context; // context string + // Length of the context measured by advancing using UNICHAR::utf8_step() + // (should be at most the order of the character ngram model used). + int context_unichar_step_len; + // The paths with pruned set are pruned out from the perspective of the + // character ngram model. They are explored further because they represent + // a dictionary match or a top choice. Thus ngram_info is still computed + // for them in order to calculate the combined cost. + bool pruned; + // -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) + float ngram_cost; +}; + +// Struct for storing the information about a path in the segmentation graph +// explored by Viterbi search. +struct ViterbiStateEntry : public ELIST_LINK { + ViterbiStateEntry(BLOB_CHOICE *pb, ViterbiStateEntry *pe, + BLOB_CHOICE *b, float c, + const LanguageModelConsistencyInfo &ci, + const AssociateStats &as, + LanguageModelFlagsType tcf, + LanguageModelDawgInfo *d, LanguageModelNgramInfo *n) + : cost(c), parent_b(pb), parent_vse(pe), ratings_sum(b->rating()), + min_certainty(b->certainty()), length(1), consistency_info(ci), + associate_stats(as), top_choice_flags(tcf), dawg_info(d), ngram_info(n), + updated(true) { + if (pe != NULL) { + ratings_sum += pe->ratings_sum; + if (pe->min_certainty < min_certainty) { + min_certainty = pe->min_certainty; + } + length += pe->length; + } + } + ~ViterbiStateEntry() { + delete dawg_info; + delete ngram_info; + } + // Comparator function for sorting ViterbiStateEntry_LISTs in + // non-increasing order of costs. + static int Compare(const void *e1, const void *e2) { + const ViterbiStateEntry *ve1 = + *reinterpret_cast(e1); + const ViterbiStateEntry *ve2 = + *reinterpret_cast(e2); + return (ve1->cost < ve2->cost) ? -1 : 1; + } + inline bool Consistent() const { + if (dawg_info != NULL && consistency_info.NumInconsistentCase() == 0) { + return true; + } + return consistency_info.Consistent(); + } + + // The cost is an adjusted ratings sum, that is adjusted by all the language + // model components that use Viterbi search. + float cost; + + // Pointers to parent BLOB_CHOICE and ViterbiStateEntry (not owned by this). + BLOB_CHOICE *parent_b; + ViterbiStateEntry *parent_vse; + + // Various information about the characters on the path represented + // by this ViterbiStateEntry. + float ratings_sum; // sum of ratings of character on the path + float min_certainty; // minimum certainty on the path + int length; // number of characters on the path + LanguageModelConsistencyInfo consistency_info; // path consistency info + AssociateStats associate_stats; // character widths/gaps/seams + + // Flags for marking the entry as a top choice path with + // the smallest rating or lower/upper case letters). + LanguageModelFlagsType top_choice_flags; + + // Extra information maintained by Dawg laguage model component + // (owned by ViterbiStateEntry). + LanguageModelDawgInfo *dawg_info; + + // Extra information maintained by Ngram laguage model component + // (owned by ViterbiStateEntry). + LanguageModelNgramInfo *ngram_info; + + bool updated; // set to true if the entry has just been created/updated +}; + +ELISTIZEH(ViterbiStateEntry); + +// Struct to store information maintained by various language model components. +struct LanguageModelState { + LanguageModelState(int col, int row) : contained_in_col(col), + contained_in_row(row), viterbi_state_entries_prunable_length(0), + viterbi_state_entries_prunable_max_cost(MAX_FLOAT32) {} + ~LanguageModelState() {} + + // Ratings matrix cell that holds this LanguageModelState + // (needed to construct best STATE for rebuild_current_state() + // and best BLOB_CHOICE_LIST_VECTOR for AcceptableChoice()). + int contained_in_col; + int contained_in_row; + + // Storage for the Viterbi state. + ViterbiStateEntry_LIST viterbi_state_entries; + // Number and max cost of prunable paths in viterbi_state_entries. + int viterbi_state_entries_prunable_length; + float viterbi_state_entries_prunable_max_cost; + + // TODO(daria): add font consistency checking. +}; + +// Bundle together all the things pertaining to the best choice/state. +struct BestChoiceBundle { + BestChoiceBundle(STATE *s, WERD_CHOICE *bc, WERD_CHOICE *rc, + BLOB_CHOICE_LIST_VECTOR *bcc) + : best_state(s), best_choice(bc), raw_choice(rc), + best_char_choices(bcc), updated(false), best_vse(NULL), best_b(NULL) {} + + STATE *best_state; + WERD_CHOICE *best_choice; + WERD_CHOICE *raw_choice; + BLOB_CHOICE_LIST_VECTOR *best_char_choices; + bool updated; + DANGERR fixpt; + ViterbiStateEntry *best_vse; // best ViterbiStateEntry and BLOB_CHOICE + BLOB_CHOICE *best_b; // at the end of the best choice path +}; + +struct BestPathByColumn { + float avg_cost; + ViterbiStateEntry *best_vse; + BLOB_CHOICE *best_b; +}; + +// This class that contains the data structures and functions necessary +// to represent and use the knowledge about the language. +class LanguageModel { + public: + // Adjustments to pain point priority. + static const float kInitialPainPointPriorityAdjustment; + static const float kDefaultPainPointPriorityAdjustment; + static const float kBestChoicePainPointPriorityAdjustment; + static const float kCriticalPainPointPriorityAdjustment; + + // Denominator for normalizing per-letter ngram cost when deriving + // penalty adjustments. + static const float kMaxAvgNgramCost; + // Minimum word length for fixed length dawgs. + // TODO(daria): check in the new chi/jpn.traineddata without the + // fixed length dawg of length 1 and delete this variable. + static const int kMinFixedLengthDawgLength; + // If there is a significant drop in character ngram probability or a + // dangerous ambiguity make the thresholds on what blob combinations + // can be classified looser. + static const float kLooseMaxCharWhRatio; + + // Masks for interpreting which language model components + // were changed by the call to UpdateState(). + static const LanguageModelFlagsType kSmallestRatingFlag = 0x1; + static const LanguageModelFlagsType kLowerCaseFlag = 0x2; + static const LanguageModelFlagsType kUpperCaseFlag = 0x4; + static const LanguageModelFlagsType kConsistentFlag = 0x8; + static const LanguageModelFlagsType kDawgFlag = 0x10; + static const LanguageModelFlagsType kNgramFlag = 0x20; + static const LanguageModelFlagsType kJustClassifiedFlag = 0x80; + static const LanguageModelFlagsType kAllChangedFlag = 0xff; + + LanguageModel(Dict *dict, WERD_CHOICE **prev_word_best_choice); + ~LanguageModel(); + + // Updates data structures that are used for the duration of the segmentation + // search on the current word; + void InitForWord(const WERD_CHOICE *prev_word, const DENORM *denorm, + bool fixed_pitch, float best_choice_cert, + float max_char_wh_ratio, + HEAP *pain_points, CHUNKS_RECORD *chunks_record); + // Resets all the "updated" flags used by the Viterbi search that were + // "registered" during the update of the ratings matrix. + void CleanUp(); + // Deletes and sets to NULL language model states of each of the + // BLOB_CHOICEs in the given BLOB_CHOICE_LIST. + void DeleteState(BLOB_CHOICE_LIST *choices); + + // Updates language model state of the given BLOB_CHOICE_LIST (from + // the ratings matrix) a its parent. Updates pain_points if new + // problematic points are found in the segmentation graph. + // + // At most language_model_max_viterbi_list_size are kept in each + // LanguageModelState.viterbi_state_entries list. + // The entries that represent dictionary word paths are kept at the + // front of the list and do not count towards the size limit. + // The list ordered by cost that is computed collectively by several + // language model components (currently dawg and ngram components). + // + // best_path_by_column records the lowest cost path found so far for each + // column of the chunks_record->ratings matrix over all the rows. This + // array is updated if a lower cost ViterbiStateEntry is created in curr_col. + LanguageModelFlagsType UpdateState( + LanguageModelFlagsType changed, + int curr_col, int curr_row, + BLOB_CHOICE_LIST *curr_list, + BLOB_CHOICE_LIST *parent_list, + HEAP *pain_points, + BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record, + BestChoiceBundle *best_choice_bundle); + + // Generates pain points from the problematic top choice paths when the + // segmentation search is guided by the character ngram model. + // It is necessary to consider problematic the top choice paths instead of + // the problematic lowest cost paths because the character ngram model + // might assign a very high cost to very improbably paths. For example, + // "liot" might have a much lower cost than "llot", and the character ngram + // model might detect a dip in probability for p(t|lio) at the end of the + // word, but not at the beginning (p(i|l) would be ok). However, looking at + // the dips in character ngram probability of the top choices we would be + // able to stop the problematic points (p(l| l) would be low). + void GenerateNgramModelPainPointsFromColumn(int col, int row, + HEAP *pain_points, + CHUNKS_RECORD *chunks_record); + + // Generates pain points from the problematic lowest cost paths that are + // "promising" (i.e. would have the cost lower than the one recorded in + // best_path_by_column if the problematic ending of the path is removed + // and after being combined with another blob the certainty of the last + // blob is improved). + void GenerateProblematicPathPainPointsFromColumn( + int col, int row, float best_choice_cert, + HEAP *pain_points, BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record); + + // This function can be called after processing column col of the + // chunks_record->ratings matrix in order to find the promising paths + // that were terminated or made inconsistent by the character choices + // in column col. If such paths are identified, this function generates + // pain points to combine the problematic cells of the matrix. + void GeneratePainPointsFromColumn( + int col, + const GenericVector &non_empty_rows, + float best_choice_cert, + HEAP *pain_points, + BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record); + + // Generates a pain point for each problematic point on the best choice + // path. Such problematic points could be a termination of a dicionary + // word, dip in ngram probability, invalid punctuation, inconsistent + // case/chartype/script or punctuation in the middle of a word. + void GeneratePainPointsFromBestChoice( + HEAP *pain_points, + CHUNKS_RECORD *chunks_record, + BestChoiceBundle *best_choice_bundle); + + // Adds a pain point to the given pain_points queue that will cause + // the entry at chunks_record->ratings(col, row) to be classified. + // The priority of the pain point is set to be: + // + // priority_adjustment * sqrt(avg_parent_cost) + // ---------------------------------------------------- + // sqrt(dict_parent_path_length) * |worst_piece_cert| + // + // The priority is further lowered if fragmented is true. + // + void GeneratePainPoint(int col, int row, bool ok_to_extend, + float priority_adjustment, + float worst_piece_cert, + bool fragmented, + float best_choice_cert, + float max_char_wh_ratio, + BLOB_CHOICE *parent_b, + ViterbiStateEntry *parent_vse, + CHUNKS_RECORD *chunks_record, + HEAP *pain_points); + + // Returns true if an acceptable best choice was discovered. + inline bool AcceptableChoiceFound() { return acceptable_choice_found_; } + + // Fills cert with the worst certainty of the top non-fragmented choice + // of the left and right neighbor of the given col,row. + // Sets fragmented if any of the neighbors have a fragmented character + // as the top choice. + inline void GetWorstPieceCertainty(int col, int row, MATRIX *ratings, + float *cert, bool *fragmented) { + *cert = 0.0f; + *fragmented = false; + if (row > 0) { + GetPieceCertainty(ratings->get(col, row-1), cert, fragmented); + } + if (col+1 < ratings->dimension()) { + GetPieceCertainty(ratings->get(col+1, row), cert, fragmented); + } + ASSERT_HOST(*cert < 0.0f); + } + + protected: + + inline static float CertaintyScore(float cert) { return (-1.0f / cert); } + + inline bool NonAlphaOrDigitMiddle(int col, int row, int dimension, + UNICHAR_ID unichar_id) { + return (!dict_->getUnicharset().get_isalpha(unichar_id) && + !dict_->getUnicharset().get_isdigit(unichar_id) && + col > 0 && row+1 < dimension); + } + + inline bool IsFragment(BLOB_CHOICE *b) { + return dict_->getUnicharset().get_fragment(b->unichar_id()); + } + + inline bool IsHan(int script_id) { + return ((dict_->getUnicharset().han_sid() != + dict_->getUnicharset().null_sid()) && + (script_id == dict_->getUnicharset().han_sid())); + } + + // Finds the first non-fragmented character in the given BLOB_CHOICE_LIST + // and update cert if its certainty is less than the one recorded in cert. + // Sets fragmented if the first choice in BLOB_CHOICE_LIST is a fragment. + inline void GetPieceCertainty(BLOB_CHOICE_LIST *blist, + float *cert, bool *fragmented) { + if (blist == NOT_CLASSIFIED || blist->empty()) return; + BLOB_CHOICE_IT bit(blist); + while (!bit.at_last() && IsFragment(bit.data())) { + *fragmented = true; + bit.forward(); // skip fragments + } + // Each classification must have at least one non-fragmented choice. + ASSERT_HOST(!IsFragment(bit.data())); + if (bit.data()->certainty() < *cert) *cert = bit.data()->certainty(); + } + + inline float ComputeAdjustment(int num_problems, float penalty) { + if (num_problems == 0) return 0.0f; + if (num_problems == 1) return penalty; + return (penalty + (language_model_penalty_increment * + static_cast(num_problems-1))); + } + + // Computes the adjustment to the ratings sum based on the given + // consistency_info. The paths with invalid punctuation, inconsistent + // case and character type are penalized proportionally to the number + // of inconsistencies on the path. + inline float ComputeConsistencyAdjustment( + const LanguageModelDawgInfo *dawg_info, + const LanguageModelConsistencyInfo &consistency_info) { + if (dawg_info != NULL) { + return ComputeAdjustment(consistency_info.NumInconsistentCase(), + language_model_penalty_case); + } + return (ComputeAdjustment(consistency_info.NumInconsistentPunc(), + language_model_penalty_punc) + + ComputeAdjustment(consistency_info.NumInconsistentCase(), + language_model_penalty_case) + + ComputeAdjustment(consistency_info.NumInconsistentChartype(), + language_model_penalty_chartype) + + (consistency_info.inconsistent_script ? + language_model_penalty_script : 0.0f)); + } + + // Returns an andjusted ratings sum that includes inconsistency penalties. + inline float ComputeConsistencyAdjustedRatingsSum( + float ratings_sum, + const LanguageModelDawgInfo *dawg_info, + const LanguageModelConsistencyInfo &consistency_info) { + return (ratings_sum * (1.0f + ComputeConsistencyAdjustment( + dawg_info, consistency_info))); + } + + // Returns an adjusted ratings sum that includes inconsistency penalties, + // penalties for non-dicionary paths and paths with dips in ngram + // probability. + float ComputeAdjustedPathCost( + float ratings_sum, int length, float dawg_score, + const LanguageModelDawgInfo *dawg_info, + const LanguageModelNgramInfo *ngram_info, + const LanguageModelConsistencyInfo &consistency_info, + const AssociateStats &associate_stats, + ViterbiStateEntry *parent_vse); + + // Returns true if the given ViterbiStateEntry represents a problematic + // path. A path is considered problematic if the last unichar makes it + // inconsistent, introduces a dip in ngram probability or transforms a + // dictionary path into a non-dictionary one. + bool ProblematicPath(const ViterbiStateEntry &vse, + UNICHAR_ID unichar_id, bool word_end); + + // Finds the first lower and upper case character in curr_list. + // If none found, choses the first character in the list. + void GetTopChoiceLowerUpper(LanguageModelFlagsType changed, + BLOB_CHOICE_LIST *curr_list, + BLOB_CHOICE **first_lower, + BLOB_CHOICE **first_upper); + + // Helper function that computes the cost of the path composed of the + // path in the given parent ViterbiStateEntry and the given BLOB_CHOICE. + // Adds a new ViterbiStateEntry to the list of viterbi entries + // in the given BLOB_CHOICE if the new path looks good enough. + // Returns LanguageModelFlagsType that indicates which language + // model components were involved in creating the new entry. + LanguageModelFlagsType AddViterbiStateEntry( + LanguageModelFlagsType top_choice_flags, + float denom, + bool word_end, + int curr_col, int curr_row, + BLOB_CHOICE *b, + BLOB_CHOICE *parent_b, + ViterbiStateEntry *parent_vse, + HEAP *pain_points, + BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record, + BestChoiceBundle *best_choice_bundle); + + // Pretty print information in the given ViterbiStateEntry. + void PrintViterbiStateEntry(const char *msg, + ViterbiStateEntry *vse, + BLOB_CHOICE *b, + CHUNKS_RECORD *chunks_record); + + // Determines whether a potential entry is a true top choice and + // updates changed accordingly. + // + // Note: The function assumes that b, top_choice_flags and changed + // are not NULL. + void GenerateTopChoiceInfo( + float ratings_sum, + const LanguageModelDawgInfo *dawg_info, + const LanguageModelConsistencyInfo &consistency_info, + const ViterbiStateEntry *parent_vse, + BLOB_CHOICE *b, + LanguageModelFlagsType *top_choice_flags, + LanguageModelFlagsType *changed); + + // Calls dict_->LetterIsOk() with DawgArgs initialized from parent_vse and + // unichar from b.unichar_id(). Constructs and returns LanguageModelDawgInfo + // with updated active dawgs, constraints and permuter. + // + // Note: the caller is responsible for deleting the returned pointer. + LanguageModelDawgInfo *GenerateDawgInfo(bool word_end, int script_id, + int curr_col, int curr_row, + const BLOB_CHOICE &b, + const ViterbiStateEntry *parent_vse, + LanguageModelFlagsType *changed); + + // Computes p(unichar | parent context) and records it in ngram_cost. + // If b.unichar_id() is an unlikely continuation of the parent context + // sets found_small_prob to true and returns NULL. + // Otherwise creates a new LanguageModelNgramInfo entry containing the + // updated context (that includes b.unichar_id() at the end) and returns it. + // + // Note: the caller is responsible for deleting the returned pointer. + LanguageModelNgramInfo *GenerateNgramInfo(const char *unichar, + float certainty, float denom, + int curr_col, int curr_row, + const ViterbiStateEntry *parent_vse, + BLOB_CHOICE *parent_b, + LanguageModelFlagsType *changed); + + // Computes -(log(prob(classifier)) + log(prob(ngram model))) + // for the given unichar in the given context. If there are multiple + // unichars at one position - takes the average of their probabilities. + // UNICHAR::utf8_step() is used to separate out individual UTF8 characters, + // since probability_in_context() can only handle one at a time (while + // unicharset might contain ngrams and glyphs composed from multiple UTF8 + // characters). + float ComputeNgramCost(const char *unichar, float certainty, float denom, + const char *context, + int *unichar_step_len, bool *found_small_prob); + + // Computes the normalization factors for the classifier confidences + // (used by ComputeNgramCost()). + float ComputeDenom(BLOB_CHOICE_LIST *curr_list); + + // Fills the given consistenty_info based on parent_vse.consistency_info + // and on the consistency of the given unichar_id with parent_vse. + void FillConsistencyInfo( + bool word_end, UNICHAR_ID unichar_id, + ViterbiStateEntry *parent_vse, BLOB_CHOICE *parent_b, + LanguageModelConsistencyInfo *consistency_info); + + // Constructs WERD_CHOICE by recording unichar_ids of the BLOB_CHOICEs + // on the path represented by the given BLOB_CHOICE and language model + // state entries (lmse, dse). The path is re-constructed by following + // the parent pointers in the the lang model state entries). If the + // constructed WERD_CHOICE is better than the best/raw choice recorded + // in the best_choice_bundle, this function updates the corresponding + // fields and sets best_choice_bunldle->updated to true. + void UpdateBestChoice(BLOB_CHOICE *b, + ViterbiStateEntry *vse, + HEAP *pain_points, + CHUNKS_RECORD *chunks_record, + BestChoiceBundle *best_choice_bundle); + + // Constructs a WERD_CHOICE by tracing parent pointers starting with + // the given LanguageModelStateEntry. Returns the constructed word. + // Updates best_char_choices, certainties and state if they are not + // NULL (best_char_choices and certainties are assumed to have the + // length equal to lmse->length). + // The caller is resposible for freeing memory associated with the + // returned WERD_CHOICE. + WERD_CHOICE *ConstructWord(BLOB_CHOICE *b, + ViterbiStateEntry *vse, + CHUNKS_RECORD *chunks_record, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, + float certainties[], + float *dawg_score, + STATE *state); + + // This function is used for non-space delimited languages when looking + // for word endings recorded while trying to separate the path into words. + // + // The function increments covered if a valid word ending is found in + // active_dawgs (if covered is incremented, skip is set to the number + // of unichars that should be skipped because they are covered by the + // word whose ending was just discovered). + // + // dawg_score and dawg_score_done are updated if: + // -- at the end of the path we discover a valid word ending from a + // non-fixed length dawg (this means that the whole word is a + // valid word, so dawg_score is set to 1.0f + // -- word_start is true (dawg_score is set to covered / word length) + // + // Note: this function assumes that skip, covered, dawg_score and + // dawg_score_done are not NULL. + void UpdateCoveredByFixedLengthDawgs(const DawgInfoVector &active_dawgs, + int word_index, int word_length, + int *skip, int *covered, + float *dawg_score, + bool *dawg_score_done); + + // Wrapper around AssociateUtils::ComputeStats(). + inline void ComputeAssociateStats(int col, int row, + float max_char_wh_ratio, + ViterbiStateEntry *parent_vse, + CHUNKS_RECORD *chunks_record, + AssociateStats *associate_stats) { + AssociateUtils::ComputeStats( + col, row, + (parent_vse != NULL) ? &(parent_vse->associate_stats) : NULL, + (parent_vse != NULL) ? parent_vse->length : 0, + fixed_pitch_, max_char_wh_ratio, denorm_, + chunks_record, language_model_debug_level, associate_stats); + } + + // Returns true if the path with such top_choice_flags and dawg_info + // could be pruned out (i.e. is neither a dictionary nor a top choice path). + // In non-space delimited languages all paths can be "somewhat" dictionary + // words. In such languages we can not do dictionary-driven path prunning, + // so paths with non-empty dawg_info are considered prunable. + inline bool PrunablePath(LanguageModelFlagsType top_choice_flags, + const LanguageModelDawgInfo *dawg_info) { + if (top_choice_flags) return false; + if (dawg_info != NULL && + dict_->GetMaxFixedLengthDawgIndex() < 0) return false; + return true; + } + + // Returns true if the given script id indicates a path that might consist + // of non-space delimited words (e.g. when dealing with Chinese and Japanese + // languages), and fixed length dawgs were loaded. + // + // TODO(daria): generate fixed length dawgs for Thai. + inline bool UseFixedLengthDawgs(int script_id) { + if (dict_->GetMaxFixedLengthDawgIndex() < 0) return false; + if ((dict_->getUnicharset().han_sid() != + dict_->getUnicharset().null_sid()) && + script_id == dict_->getUnicharset().han_sid()) return true; + if ((dict_->getUnicharset().hiragana_sid() != + dict_->getUnicharset().null_sid()) && + script_id == dict_->getUnicharset().hiragana_sid()) return true; + if ((dict_->getUnicharset().katakana_sid() != + dict_->getUnicharset().null_sid()) && + script_id == dict_->getUnicharset().katakana_sid()) return true; + return false; + } + + // Returns true if the given ViterbiStateEntry represents an acceptable path. + inline bool AcceptablePath(const ViterbiStateEntry &vse) { + return (vse.dawg_info != NULL || vse.Consistent() || + (vse.ngram_info != NULL && !vse.ngram_info->pruned)); + } + + public: + // Parameters. + INT_VAR_H(language_model_debug_level, 0, "Language model debug level"); + BOOL_VAR_H(language_model_ngram_on, false, + "Turn on/off the use of character ngram model"); + INT_VAR_H(language_model_ngram_order, 8, + "Maximum order of the character ngram model"); + INT_VAR_H(language_model_max_viterbi_list_size, 10, + "Maximum size of viterbi lists recorded in BLOB_CHOICEs" + "(excluding entries that represent dictionary word paths)"); + double_VAR_H(language_model_ngram_small_prob, 0.000001, + "To avoid overly small denominators use this as the floor" + " of the probability returned by the ngram model"); + double_VAR_H(language_model_ngram_nonmatch_score, -40.0, + "Average classifier score of a non-matching unichar."); + BOOL_VAR_H(language_model_ngram_use_only_first_uft8_step, false, + "Use only the first UTF8 step of the given string" + " when computing log probabilities"); + double_VAR_H(language_model_ngram_scale_factor, 0.03, + "Strength of the character ngram model relative to the" + " character classifier "); + INT_VAR_H(language_model_min_compound_length, 3, + "Minimum length of compound words"); + INT_VAR_H(language_model_fixed_length_choices_depth, 3, + "Depth of blob choice lists to explore" + " when fixed length dawgs are on"); + // Penalties used for adjusting path costs and final word rating. + double_VAR_H(language_model_penalty_non_freq_dict_word, 0.1, + "Penalty for words not in the frequent word dictionary"); + double_VAR_H(language_model_penalty_non_dict_word, 0.15, + "Penalty for non-dictionary words"); + double_VAR_H(language_model_penalty_punc, 0.2, + "Penalty for inconsistent punctuation"); + double_VAR_H(language_model_penalty_case, 0.1, + "Penalty for inconsistent case"); + double_VAR_H(language_model_penalty_script, 0.5, + "Penalty for inconsistent script"); + double_VAR_H(language_model_penalty_chartype, 0.3, + "Penalty for inconsistent character type"); + double_VAR_H(language_model_penalty_increment, 0.01, "Penalty increment"); + + protected: + // Member Variables. + + // Temporary DawgArgs struct that is re-used across different words to + // avoid dynamic memory re-allocation (should be cleared before each use). + DawgArgs *dawg_args_; + // List of pointers to updated flags used by Viterbi search to mark + // recently updated ViterbiStateEntries. + GenericVector updated_flags_; + + // The following variables are set at construction time. + + // Pointer to Dict class, that is used for querying the dictionaries + // (the pointer is not owned by LanguageModel). + Dict *dict_; + // DENORM computed by Tesseract (not owned by LanguageModel). + const DENORM *denorm_; + // TODO(daria): the following variables should become LanguageModel params + // when the old code in bestfirst.cpp and heuristic.cpp is deprecated. + // + // Set to true if we are dealing with fixed pitch text + // (set to assume_fixed_pitch_char_segment). + bool fixed_pitch_; + // Max char width-to-height ratio allowed + // (set to segsearch_max_char_wh_ratio). + float max_char_wh_ratio_; + + // The following variables are initialized with InitForWord(). + + // String representation of the classificaion of the previous word + // (since this is only used by the character ngram model component, + // only the last language_model_ngram_order of the word are stored). + STRING prev_word_str_; + int prev_word_unichar_step_len_; + // Active dawg and constraints vector. + DawgInfoVector *beginning_active_dawgs_; + DawgInfoVector *beginning_constraints_; + DawgInfoVector *fixed_length_beginning_active_dawgs_; + DawgInfoVector *empty_dawg_info_vec_; + // Maximum adjustment factor for character ngram choices. + float max_penalty_adjust_; + // Set to true if acceptable choice was discovered. + // Note: it would be nice to use this to terminate the search once an + // acceptable choices is found. However we do not do that and once an + // acceptable choice is found we finish looking for alternative choices + // in the current segmentation graph and then exit the search (no more + // classifications are done after an acceptable choice is found). + // This is needed in order to let the search find the words very close to + // the best choice in rating (e.g. what/What, Cat/cat, etc) and log these + // choices. This way the stopper will know that the best choice is not + // ambiguous (i.e. there are best choices in the best choice list that have + // ratings close to the very best one) and will be less likely to mis-adapt. + bool acceptable_choice_found_; + +}; + +} // namespace tesseract + +#endif // TESSERACT_WORDREC_LANGUAGE_MODEL_H_ diff --git a/wordrec/makechop.cpp b/wordrec/makechop.cpp index ad6e6ccfa9..d6795bc3e7 100644 --- a/wordrec/makechop.cpp +++ b/wordrec/makechop.cpp @@ -27,6 +27,7 @@ ----------------------------------------------------------------------*/ #include "makechop.h" +#include "blobs.h" #include "render.h" #include "structures.h" #ifdef __UNIX__ @@ -48,70 +49,19 @@ * Split this blob into two blobs by applying the splits included in * the seam description. **********************************************************************/ -void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { - check_outline_mem(); +void apply_seam(TBLOB *blob, TBLOB *other_blob, bool italic_blob, SEAM *seam) { if (seam->split1 == NULL) { - divide_blobs (blob, other_blob, seam->location); + divide_blobs(blob, other_blob, italic_blob, seam->location); } else if (seam->split2 == NULL) { - make_split_blobs(blob, other_blob, seam); + make_split_blobs(blob, other_blob, italic_blob, seam); } else if (seam->split3 == NULL) { - make_double_split(blob, other_blob, seam); + make_double_split(blob, other_blob, italic_blob, seam); } else { - make_triple_split(blob, other_blob, seam); + make_triple_split(blob, other_blob, italic_blob, seam); } - - check_outline_mem(); -} - - -/********************************************************************** - * divide_blobs - * - * Create two blobs by grouping the outlines in the appropriate blob. - * The outlines that are beyond the location point are moved to the - * other blob. The ones whose x location is less than that point are - * retained in the original blob. - **********************************************************************/ -void divide_blobs(TBLOB *blob, TBLOB *other_blob, inT32 location) { - TESSLINE *outline; - TESSLINE *outline1 = NULL; - TESSLINE *outline2 = NULL; - - outline = blob->outlines; - blob->outlines = NULL; - - while (outline != NULL) { - if ((outline->topleft.x + outline->botright.x) / 2 < location) { - /* Outline is in 1st blob */ - if (outline1) { - outline1->next = outline; - } - else { - blob->outlines = outline; - } - outline1 = outline; - } - else { - /* Outline is in 2nd blob */ - if (outline2) { - outline2->next = outline; - } - else { - other_blob->outlines = outline; - } - outline2 = outline; - } - - outline = outline->next; - } - - if (outline1) - outline1->next = NULL; - if (outline2) - outline2->next = NULL; } @@ -121,25 +71,16 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, inT32 location) { * Group the outlines from the first blob into both of them. Do so * according to the information about the split. **********************************************************************/ -void form_two_blobs(TBLOB *blob, TBLOB *other_blob, inT32 location) { +void form_two_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + const TPOINT& location) { setup_blob_outlines(blob); - divide_blobs(blob, other_blob, location); + divide_blobs(blob, other_blob, italic_blob, location); eliminate_duplicate_outlines(blob); eliminate_duplicate_outlines(other_blob); correct_blob_order(blob, other_blob); - -#ifndef GRAPHICS_DISABLED - if (chop_debug > 2) { - display_blob(blob, Red); - #ifdef __UNIX__ - sleep (1); - #endif - display_blob(other_blob, Cyan); - } -#endif } @@ -149,10 +90,11 @@ void form_two_blobs(TBLOB *blob, TBLOB *other_blob, inT32 location) { * Create two blobs out of one by splitting the original one in half. * Return the resultant blobs for classification. **********************************************************************/ -void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { - make_single_split (blob->outlines, seam->split1); - make_single_split (blob->outlines, seam->split2); - form_two_blobs (blob, other_blob, seam->location); +void make_double_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + SEAM *seam) { + make_single_split(blob->outlines, seam->split1); + make_single_split(blob->outlines, seam->split2); + form_two_blobs(blob, other_blob, italic_blob, seam->location); } @@ -170,17 +112,15 @@ void make_single_split(TESSLINE *outlines, SPLIT *split) { while (outlines->next != NULL) outlines = outlines->next; - outlines->next = newoutline (); + outlines->next = new TESSLINE; outlines->next->loop = split->point1; - outlines->next->child = NULL; - setup_outline (outlines->next); + outlines->next->ComputeBoundingBox(); outlines = outlines->next; - outlines->next = newoutline (); + outlines->next = new TESSLINE; outlines->next->loop = split->point2; - outlines->next->child = NULL; - setup_outline (outlines->next); + outlines->next->ComputeBoundingBox(); outlines->next->next = NULL; } @@ -192,10 +132,11 @@ void make_single_split(TESSLINE *outlines, SPLIT *split) { * Create two blobs out of one by splitting the original one in half. * Return the resultant blobs for classification. **********************************************************************/ -void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { - make_single_split (blob->outlines, seam->split1); +void make_split_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + SEAM *seam) { + make_single_split(blob->outlines, seam->split1); - form_two_blobs (blob, other_blob, seam->location); + form_two_blobs (blob, other_blob, italic_blob, seam->location); } @@ -207,12 +148,13 @@ void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { * the outlines. Three of the starting outlines will produce two ending * outlines. Return the resultant blobs for classification. **********************************************************************/ -void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { - make_single_split (blob->outlines, seam->split1); - make_single_split (blob->outlines, seam->split2); - make_single_split (blob->outlines, seam->split3); +void make_triple_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + SEAM *seam) { + make_single_split(blob->outlines, seam->split1); + make_single_split(blob->outlines, seam->split2); + make_single_split(blob->outlines, seam->split3); - form_two_blobs (blob, other_blob, seam->location); + form_two_blobs(blob, other_blob, italic_blob, seam->location); } @@ -237,7 +179,8 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { while (outline->next) outline = outline->next; outline->next = other_blob->outlines; - oldblob(other_blob); + other_blob->outlines = NULL; + delete other_blob; if (seam->split1 == NULL) { } @@ -256,8 +199,6 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { setup_blob_outlines(blob); eliminate_duplicate_outlines(blob); - - check_outline_mem(); } @@ -273,15 +214,13 @@ void undo_single_split(TBLOB *blob, SPLIT *split) { /* Modify edge points */ unsplit_outlines (split->point1, split->point2); - outline1 = newoutline (); + outline1 = new TESSLINE; outline1->next = blob->outlines; blob->outlines = outline1; outline1->loop = split->point1; - outline1->child = NULL; - outline2 = newoutline (); + outline2 = new TESSLINE; outline2->next = blob->outlines; blob->outlines = outline2; outline2->loop = split->point2; - outline2->child = NULL; } diff --git a/wordrec/makechop.h b/wordrec/makechop.h index e946a75d13..1f2639cd48 100644 --- a/wordrec/makechop.h +++ b/wordrec/makechop.h @@ -42,26 +42,28 @@ **********************************************************************/ #define is_split_outline(outline,split) \ -(point_in_outline (split->point1, outline) && \ - point_in_outline (split->point2, outline)) \ +(outline->Contains(split->point1->pos) && \ + outline->Contains(split->point2->pos)) \ /*---------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------*/ -void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam); +void apply_seam(TBLOB *blob, TBLOB *other_blob, bool italic_blob, SEAM *seam); -void divide_blobs(TBLOB *blob, TBLOB *other_blob, inT32 location); +void form_two_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + const TPOINT& location); -void form_two_blobs(TBLOB *blob, TBLOB *other_blob, inT32 location); - -void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam); +void make_double_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + SEAM *seam); void make_single_split(TESSLINE *outlines, SPLIT *split); -void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam); +void make_split_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + SEAM *seam); -void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam); +void make_triple_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob, + SEAM *seam); void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam); diff --git a/wordrec/matchtab.cpp b/wordrec/matchtab.cpp index a79e6fec75..200f15aa55 100644 --- a/wordrec/matchtab.cpp +++ b/wordrec/matchtab.cpp @@ -22,10 +22,6 @@ ** limitations under the License. * *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - #include "matchtab.h" #include "blobs.h" @@ -34,25 +30,6 @@ #include "freelist.h" #include "ratngs.h" -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -typedef struct _MATCH_ -{ - int topleft; - int botright; - BLOB_CHOICE_LIST *rating; -} MATCH; - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -MATCH *match_table; -//?int missed_count = 0; - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ #define NUM_MATCH_ENTRIES 500 /* Entries in match_table */ /********************************************************************** @@ -61,49 +38,55 @@ MATCH *match_table; * Test an element in the blob match table to see if it is blank. * Return a non-zero value if it is blank. **********************************************************************/ - #define blank_entry(match_table,x) \ -(! (match_table[x].topleft | match_table[x].botright)) + (! (match_table[x].topleft | match_table[x].botright)) + + +namespace tesseract { + +BlobMatchTable::BlobMatchTable() + : been_initialized_(false), match_table_(NULL) { + init_match_table(); +} + +BlobMatchTable::~BlobMatchTable() { + end_match_table(); +} -/*---------------------------------------------------------------------- - Public Function Code -----------------------------------------------------------------------*/ /********************************************************************** * init_match_table * * Create and clear a match table to be used to speed up the splitter. **********************************************************************/ -static int been_initialized = 0; -void init_match_table() { +void BlobMatchTable::init_match_table() { int x; - if (been_initialized) { + if (been_initialized_) { /* Reclaim old choices */ for (x = 0; x < NUM_MATCH_ENTRIES; x++) { - if ((!blank_entry (match_table, x)) && match_table[x].rating) - match_table[x].rating->clear(); - delete match_table[x].rating; + if ((!blank_entry(match_table_, x)) && match_table_[x].rating) + match_table_[x].rating->clear(); + delete match_table_[x].rating; } - } - else { + } else { /* Allocate memory once */ - been_initialized = 1; - match_table = new MATCH[NUM_MATCH_ENTRIES]; + been_initialized_ = true; + match_table_ = new MATCH[NUM_MATCH_ENTRIES]; } /* Initialize the table */ for (x = 0; x < NUM_MATCH_ENTRIES; x++) { - match_table[x].topleft = 0; - match_table[x].botright = 0; - match_table[x].rating = NULL; + match_table_[x].topleft = 0; + match_table_[x].botright = 0; + match_table_[x].rating = NULL; } } -void end_match_table() { - if (been_initialized) { +void BlobMatchTable::end_match_table() { + if (been_initialized_) { init_match_table(); - delete[] match_table; - match_table = NULL; - been_initialized = 0; + delete[] match_table_; + match_table_ = NULL; + been_initialized_ = false; } } @@ -114,7 +97,7 @@ void end_match_table() { * Put a new blob and its corresponding match ratings into the match * table. **********************************************************************/ -void put_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) { +void BlobMatchTable::put_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) { unsigned int topleft; unsigned int botright; unsigned int start; @@ -130,19 +113,18 @@ void put_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) { /* Look for empty */ x = start; do { - if (blank_entry (match_table, x)) { + if (blank_entry (match_table_, x)) { /* Add this entry */ - match_table[x].topleft = topleft; - match_table[x].botright = botright; - // Copy ratings to match_table[x].rating - match_table[x].rating = new BLOB_CHOICE_LIST(); - match_table[x].rating->deep_copy(ratings, &BLOB_CHOICE::deep_copy); + match_table_[x].topleft = topleft; + match_table_[x].botright = botright; + // Copy ratings to match_table_[x].rating + match_table_[x].rating = new BLOB_CHOICE_LIST(); + match_table_[x].rating->deep_copy(ratings, &BLOB_CHOICE::deep_copy); return; } if (++x >= NUM_MATCH_ENTRIES) x = 0; - } - while (x != start); + } while (x != start); cprintf ("error: Match table is full\n"); } @@ -154,7 +136,7 @@ void put_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) { * Look up this blob in the match table to see if it needs to be * matched. If it is not present then NULL is returned. **********************************************************************/ -BLOB_CHOICE_LIST *get_match(TBLOB *blob) { +BLOB_CHOICE_LIST *BlobMatchTable::get_match(TBLOB *blob) { unsigned int topleft; unsigned int botright; TPOINT tp_topleft; @@ -173,8 +155,8 @@ BLOB_CHOICE_LIST *get_match(TBLOB *blob) { * Look up this blob in the match table to see if it needs to be * matched. If it is not present then NULL is returned. **********************************************************************/ -BLOB_CHOICE_LIST *get_match_by_bounds(unsigned int topleft, - unsigned int botright) { +BLOB_CHOICE_LIST *BlobMatchTable::get_match_by_bounds(unsigned int topleft, + unsigned int botright) { unsigned int start; int x; /* Do starting hash */ @@ -183,19 +165,18 @@ BLOB_CHOICE_LIST *get_match_by_bounds(unsigned int topleft, x = start; do { /* Not found when blank */ - if (blank_entry (match_table, x)) + if (blank_entry (match_table_, x)) break; /* Is this the match ? */ - if (match_table[x].topleft == topleft && - match_table[x].botright == botright) { + if (match_table_[x].topleft == topleft && + match_table_[x].botright == botright) { BLOB_CHOICE_LIST *blist = new BLOB_CHOICE_LIST(); - blist->deep_copy(match_table[x].rating, &BLOB_CHOICE::deep_copy); + blist->deep_copy(match_table_[x].rating, &BLOB_CHOICE::deep_copy); return blist; } if (++x >= NUM_MATCH_ENTRIES) x = 0; - } - while (x != start); + } while (x != start); return NULL; } @@ -212,7 +193,7 @@ BLOB_CHOICE_LIST *get_match_by_bounds(unsigned int topleft, * The entries that appear in the new ratings list and not in the * old one are added to the old ratings list in the match_table. **********************************************************************/ -void add_to_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) { +void BlobMatchTable::add_to_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) { unsigned int topleft; unsigned int botright; TPOINT tp_topleft; @@ -227,15 +208,15 @@ void add_to_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) { /* Search for match */ x = start; do { - if (blank_entry(match_table, x)) { + if (blank_entry(match_table_, x)) { fprintf(stderr, "Can not update uninitialized entry in match_table\n"); - ASSERT_HOST(!blank_entry(match_table, x)); + ASSERT_HOST(!blank_entry(match_table_, x)); } - if (match_table[x].topleft == topleft && - match_table[x].botright == botright) { - // Copy new ratings to match_table[x].rating. + if (match_table_[x].topleft == topleft && + match_table_[x].botright == botright) { + // Copy new ratings to match_table_[x].rating. BLOB_CHOICE_IT it; - it.set_to_list(match_table[x].rating); + it.set_to_list(match_table_[x].rating); BLOB_CHOICE_IT new_it; new_it.set_to_list(ratings); assert(it.length() <= new_it.length()); @@ -251,6 +232,7 @@ void add_to_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) { } if (++x >= NUM_MATCH_ENTRIES) x = 0; - } - while (x != start); + } while (x != start); } + +} // namespace tesseract diff --git a/wordrec/matchtab.h b/wordrec/matchtab.h index 4612258082..b42b82cecc 100644 --- a/wordrec/matchtab.h +++ b/wordrec/matchtab.h @@ -25,25 +25,36 @@ #ifndef MATCHTAB_H #define MATCHTAB_H -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - #include "ratngs.h" -#include "tessclas.h" +#include "blobs.h" + +namespace tesseract { + +typedef struct _MATCH_ +{ + int topleft; + int botright; + BLOB_CHOICE_LIST *rating; +} MATCH; -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -void init_match_table(); -void end_match_table(); -void put_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings); +class BlobMatchTable { + public: + BlobMatchTable(); + ~BlobMatchTable(); -BLOB_CHOICE_LIST *get_match(TBLOB *blob); + void init_match_table(); + void end_match_table(); + void put_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings); + BLOB_CHOICE_LIST *get_match(TBLOB *blob); + BLOB_CHOICE_LIST *get_match_by_bounds(unsigned int topleft, + unsigned int botright); + void add_to_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings); -BLOB_CHOICE_LIST *get_match_by_bounds(unsigned int topleft, - unsigned int botright); + private: + bool been_initialized_; + MATCH* match_table_; +}; -void add_to_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings); +} #endif diff --git a/wordrec/metrics.cpp b/wordrec/metrics.cpp deleted file mode 100644 index 1c3611ef9e..0000000000 --- a/wordrec/metrics.cpp +++ /dev/null @@ -1,369 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: metrics.c (Formerly metrics.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Jul 30 17:02:07 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "metrics.h" -#include "bestfirst.h" -#include "associate.h" -#include "tally.h" -#include "plotseg.h" -#include "globals.h" -#include "wordclass.h" -#include "intmatcher.h" -#include "freelist.h" -#include "callcpp.h" -#include "ndminx.h" -#include "wordrec.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -static int states_timed_out1; /* Counters */ -static int states_timed_out2; -static int words_segmented1; -static int words_segmented2; -static int segmentation_states1; -static int segmentation_states2; -static int save_priorities; - -int words_chopped1; -int words_chopped2; -int chops_attempted1; -int chops_performed1; -int chops_attempted2; -int chops_performed2; - -int character_count; -int word_count; -int chars_classified; - -MEASUREMENT num_pieces; -MEASUREMENT width_measure; - -MEASUREMENT width_priority_range;/* Help to normalize */ -MEASUREMENT match_priority_range; - -TALLY states_before_best; -TALLY best_certainties[2]; -TALLY character_widths; /* Width histogram */ - -FILE *priority_file_1; /* Output to cluster */ -FILE *priority_file_2; -FILE *priority_file_3; - -STATE *known_best_state = NULL; /* The right answer */ - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -#define CERTAINTY_BUCKET_SIZE -0.5 -#define CERTAINTY_BUCKETS 40 - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/********************************************************************** - * init_metrics - * - * Set up the appropriate variables to record information about the - * OCR process. Later calls will log the data and save a summary. - **********************************************************************/ -void init_metrics() { - words_chopped1 = 0; - words_chopped2 = 0; - chops_performed1 = 0; - chops_performed2 = 0; - chops_attempted1 = 0; - chops_attempted2 = 0; - - words_segmented1 = 0; - words_segmented2 = 0; - states_timed_out1 = 0; - states_timed_out2 = 0; - segmentation_states1 = 0; - segmentation_states2 = 0; - - save_priorities = 0; - - character_count = 0; - word_count = 0; - chars_classified = 0; - permutation_count = 0; - - end_metrics(); - - states_before_best = new_tally (MIN (100, wordrec_num_seg_states)); - - best_certainties[0] = new_tally (CERTAINTY_BUCKETS); - best_certainties[1] = new_tally (CERTAINTY_BUCKETS); - reset_width_tally(); -} - -void end_metrics() { - if (states_before_best != NULL) { - memfree(states_before_best); - memfree(best_certainties[0]); - memfree(best_certainties[1]); - memfree(character_widths); - states_before_best = NULL; - best_certainties[0] = NULL; - best_certainties[1] = NULL; - character_widths = NULL; - } -} - - -/********************************************************************** - * record_certainty - * - * Maintain a record of the best certainty values achieved on each - * word recognition. - **********************************************************************/ -void record_certainty(float certainty, int pass) { - int bucket; - - if (certainty / CERTAINTY_BUCKET_SIZE < MAX_INT32) - bucket = (int) (certainty / CERTAINTY_BUCKET_SIZE); - else - bucket = MAX_INT32; - - inc_tally_bucket (best_certainties[pass - 1], bucket); -} - - -/********************************************************************** - * record_search_status - * - * Record information about each iteration of the search. This data - * is kept in global memory and accumulated over multiple segmenter - * searches. - **********************************************************************/ -void record_search_status(int num_states, int before_best, float closeness) { - inc_tally_bucket(states_before_best, before_best); - - if (first_pass) { - if (num_states == wordrec_num_seg_states + 1) - states_timed_out1++; - segmentation_states1 += num_states; - words_segmented1++; - } - else { - if (num_states == wordrec_num_seg_states + 1) - states_timed_out2++; - segmentation_states2 += num_states; - words_segmented2++; - } -} - - -/********************************************************************** - * save_summary - * - * Save the summary information into the file "file.sta". - **********************************************************************/ -namespace tesseract { -void Wordrec::save_summary(inT32 elapsed_time) { - #ifndef SECURE_NAMES - STRING outfilename; - FILE *f; - int x; - int total; - - outfilename = imagefile + ".sta"; - f = open_file (outfilename.string(), "w"); - - fprintf (f, INT32FORMAT " seconds elapsed\n", elapsed_time); - fprintf (f, "\n"); - - fprintf (f, "%d characters\n", character_count); - fprintf (f, "%d words\n", word_count); - fprintf (f, "\n"); - - fprintf (f, "%d permutations performed\n", permutation_count); - fprintf (f, "%d characters classified\n", chars_classified); - fprintf (f, "%4.0f%% classification overhead\n", - (float) chars_classified / character_count * 100.0 - 100.0); - fprintf (f, "\n"); - - fprintf (f, "%d words chopped (pass 1) ", words_chopped1); - fprintf (f, " (%0.0f%%)\n", (float) words_chopped1 / word_count * 100); - fprintf (f, "%d chops performed\n", chops_performed1); - fprintf (f, "%d chops attempted\n", chops_attempted1); - fprintf (f, "\n"); - - fprintf (f, "%d words joined (pass 1)", words_segmented1); - fprintf (f, " (%0.0f%%)\n", (float) words_segmented1 / word_count * 100); - fprintf (f, "%d segmentation states\n", segmentation_states1); - fprintf (f, "%d segmentations timed out\n", states_timed_out1); - fprintf (f, "\n"); - - fprintf (f, "%d words chopped (pass 2) ", words_chopped2); - fprintf (f, " (%0.0f%%)\n", (float) words_chopped2 / word_count * 100); - fprintf (f, "%d chops performed\n", chops_performed2); - fprintf (f, "%d chops attempted\n", chops_attempted2); - fprintf (f, "\n"); - - fprintf (f, "%d words joined (pass 2)", words_segmented2); - fprintf (f, " (%0.0f%%)\n", (float) words_segmented2 / word_count * 100); - fprintf (f, "%d segmentation states\n", segmentation_states2); - fprintf (f, "%d segmentations timed out\n", states_timed_out2); - fprintf (f, "\n"); - - total = 0; - iterate_tally (states_before_best, x) - total += (tally_entry (states_before_best, x) * x); - fprintf (f, "segmentations (before best) = %d\n", total); - if (total != 0.0) - fprintf (f, "%4.0f%% segmentation overhead\n", - (float) (segmentation_states1 + segmentation_states2) / - total * 100.0 - 100.0); - fprintf (f, "\n"); - - print_tally (f, "segmentations (before best)", states_before_best); - - iterate_tally (best_certainties[0], x) - cprintf ("best certainty of %8.4f = %4d %4d\n", - x * CERTAINTY_BUCKET_SIZE, - tally_entry (best_certainties[0], x), - tally_entry (best_certainties[1], x)); - - PrintIntMatcherStats(f); - dj_statistics(f); - fclose(f); - #endif -} -} // namespace tesseract - - -/********************************************************************** - * record_priorities - * - * If the record mode is set then record the priorities returned by - * each of the priority voters. Save them in a file that is set up for - * doing clustering. - **********************************************************************/ -void record_priorities(SEARCH_RECORD *the_search, - FLOAT32 priority_1, - FLOAT32 priority_2) { - record_samples(priority_1, priority_2); -} - - -/********************************************************************** - * record_samples - * - * Remember the priority samples to summarize them later. - **********************************************************************/ -void record_samples(FLOAT32 match_pri, FLOAT32 width_pri) { - ADD_SAMPLE(match_priority_range, match_pri); - ADD_SAMPLE(width_priority_range, width_pri); -} - - -/********************************************************************** - * reset_width_tally - * - * Create a tally record and initialize it. - **********************************************************************/ -void reset_width_tally() { - character_widths = new_tally (20); - new_measurement(width_measure); - width_measure.num_samples = 158; - width_measure.sum_of_samples = 125.0; - width_measure.sum_of_squares = 118.0; -} - - -#ifndef GRAPHICS_DISABLED -/********************************************************************** - * save_best_state - * - * Save this state away to be compared later. - **********************************************************************/ -void save_best_state(CHUNKS_RECORD *chunks_record) { - STATE state; - SEARCH_STATE chunk_groups; - int num_joints; - - if (save_priorities) { - num_joints = chunks_record->ratings->dimension() - 1; - - state.part1 = 0xffffffff; - state.part2 = 0xffffffff; - - chunk_groups = bin_to_chunks (&state, num_joints); - display_segmentation (chunks_record->chunks, chunk_groups); - memfree(chunk_groups); - - cprintf ("Enter the correct segmentation > "); - fflush(stdout); - state.part1 = 0; - scanf ("%x", &state.part2); - - chunk_groups = bin_to_chunks (&state, num_joints); - display_segmentation (chunks_record->chunks, chunk_groups); - memfree(chunk_groups); - window_wait(segm_window); /* == 'n') */ - - if (known_best_state) - free_state(known_best_state); - known_best_state = new_state (&state); - } -} -#endif - - -/********************************************************************** - * start_record - * - * Set up everything needed to record the priority voters. - **********************************************************************/ -void start_recording() { - if (save_priorities) { - priority_file_1 = open_file ("Priorities1", "w"); - priority_file_2 = open_file ("Priorities2", "w"); - priority_file_3 = open_file ("Priorities3", "w"); - } -} - - -/********************************************************************** - * stop_recording - * - * Put an end to the priority recording mechanism. - **********************************************************************/ -void stop_recording() { - if (save_priorities) { - fclose(priority_file_1); - fclose(priority_file_2); - fclose(priority_file_3); - } -} diff --git a/wordrec/metrics.h b/wordrec/metrics.h deleted file mode 100644 index 066d75d11f..0000000000 --- a/wordrec/metrics.h +++ /dev/null @@ -1,125 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: metrics.h (Formerly metrics.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Tue Jul 30 17:02:48 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef METRICS_H -#define METRICS_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "measure.h" -#include "bestfirst.h" -#include "states.h" - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -extern int words_chopped1; -extern int words_chopped2; -extern int chops_attempted1; -extern int chops_performed1; -extern int chops_attempted2; -extern int chops_performed2; -extern int permutation_count; - -extern int character_count; -extern int word_count; -extern int chars_classified; - -extern MEASUREMENT width_measure; -extern MEASUREMENT width_priority_range; -extern MEASUREMENT match_priority_range; - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -void init_metrics(); -void end_metrics(); - -void record_certainty(float certainty, int pass); - -void record_search_status(int num_states, int before_best, float closeness); - -void record_priorities(SEARCH_RECORD *the_search, - FLOAT32 priority_1, - FLOAT32 priority_2); - -void record_samples(FLOAT32 match_pri, FLOAT32 width_pri); - -void reset_width_tally(); - -void save_best_state(CHUNKS_RECORD *chunks_record); - -void start_recording(); - -void stop_recording(); - -/* -#if defined(__STDC__) || defined(__cplusplus) || MAC_OR_DOS -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* metrics.c -void init_metrics - _ARGS((void)); - -void record_certainty - _ARGS((float certainty, - int pass)); - -void record_search_status - _ARGS((int num_states, - int before_best, - float closeness)); - -void save_summary - _ARGS((inT32 elapsed_time)); - -void record_priorities - _ARGS((SEARCH_RECORD *the_search, - STATE *old_state, - FLOAT32 priority_1, - FLOAT32 priority_2)); - -void record_samples - _ARGS((FLOAT32 match_pri, - FLOAT32 width_pri)); - -void reset_width_tally - _ARGS((void)); - -void save_best_state - _ARGS((CHUNKS_RECORD *chunks_record)); - -void start_recording - _ARGS((void)); - -void stop_recording - _ARGS((void)); - -#undef _ARGS -*/ -#endif diff --git a/wordrec/mfvars.cpp b/wordrec/mfvars.cpp deleted file mode 100644 index f5d80655f9..0000000000 --- a/wordrec/mfvars.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/****************************************************************************** - ** Filename: mfvars.c - ** Purpose: Hooks global microfeature variables into the wo system. - ** Author: Dan Johnson - ** History: Fri Jan 12 12:47:20 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#include "blobclass.h" -#include "extract.h" -#include "adaptmatch.h" -#include "wordrec.h" - -/**---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------**/ -namespace tesseract { -void Wordrec::mfeature_init() { - SetupExtractors(); - InitAdaptiveClassifier(); -} -} diff --git a/wordrec/mfvars.h b/wordrec/mfvars.h deleted file mode 100644 index 42ce11b9c0..0000000000 --- a/wordrec/mfvars.h +++ /dev/null @@ -1,24 +0,0 @@ -/****************************************************************************** - ** Filename: mfvars.h - ** Purpose: Routines to install global variables into wo system. - ** Author: Dan Johnson - ** History: Fri Jan 12 13:23:46 1990, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef MFVARS_H -#define MFVARS_H - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -#endif diff --git a/wordrec/olutil.cpp b/wordrec/olutil.cpp index 78cff8d0bc..5beadd839e 100644 --- a/wordrec/olutil.cpp +++ b/wordrec/olutil.cpp @@ -37,26 +37,6 @@ /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -/********************************************************************** - * check_outline_mem - * - * Check the memory allocator for outlines. - **********************************************************************/ -void check_outline_mem() { - TESSLINE *outline; - - outline = newoutline (); - outline->next = newoutline (); - oldoutline (outline->next); - oldoutline(outline); - - outline = newoutline (); - outline->next = newoutline (); - oldoutline (outline->next); - oldoutline(outline); -} - - /********************************************************************** * correct_blob_order * @@ -97,48 +77,15 @@ void eliminate_duplicate_outlines(TBLOB *blob) { if (same_outline_bounds (outline, other_outline)) { last_outline->next = other_outline->next; - oldoutline(other_outline); + // This doesn't leak - the outlines share the EDGEPTs. + other_outline->loop = NULL; + delete other_outline; other_outline = last_outline; } } } } - -/********************************************************************** - * setup_outline - * - * Create a new outline structure from this - **********************************************************************/ -void setup_outline(TESSLINE *outline) { - register EDGEPT *this_edge; - register int minx = MAX_INT32; - register int miny = MAX_INT32; - register int maxx = -MAX_INT32; - register int maxy = -MAX_INT32; - - /* Find boundaries */ - this_edge = outline->loop; - do { - if (this_edge->pos.x < minx) - minx = this_edge->pos.x; - if (this_edge->pos.y < miny) - miny = this_edge->pos.y; - if (this_edge->pos.x > maxx) - maxx = this_edge->pos.x; - if (this_edge->pos.y > maxy) - maxy = this_edge->pos.y; - this_edge = this_edge->next; - } - while (this_edge != outline->loop); - /* Reset bounds */ - outline->topleft.x = minx; - outline->topleft.y = maxy; - outline->botright.x = maxx; - outline->botright.y = miny; -} - - /********************************************************************** * setup_blob_outlines * @@ -148,6 +95,6 @@ void setup_blob_outlines(TBLOB *blob) { TESSLINE *outline; for (outline = blob->outlines; outline; outline = outline->next) { - setup_outline(outline); + outline->ComputeBoundingBox(); } } diff --git a/wordrec/olutil.h b/wordrec/olutil.h index f42a2c4a7d..c7eeecd235 100644 --- a/wordrec/olutil.h +++ b/wordrec/olutil.h @@ -28,8 +28,7 @@ /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ -#include "tessclas.h" -#include "general.h" +#include "blobs.h" /*---------------------------------------------------------------------- M a c r o s @@ -46,21 +45,6 @@ #define is_inside_angle(pt) \ (angle_change ((pt)->prev, (pt), (pt)->next) < chop_inside_angle) -/********************************************************************** - * point_in_outline - * - * Check to see if this point falls within the bounding box of this - * outline. Note that this does not totally ensure that the edge - * point falls on this outline. - **********************************************************************/ - -#define point_in_outline(p,o) \ -((p)->pos.x >= (o)->topleft.x && \ - (p)->pos.y <= (o)->topleft.y && \ - (p)->pos.x <= (o)->botright.x && \ - (p)->pos.y >= (o)->botright.y) \ - - /********************************************************************** * same_outline_bounds * @@ -89,14 +73,10 @@ /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -void check_outline_mem(); - void correct_blob_order(TBLOB *blob1, TBLOB *blob2); void eliminate_duplicate_outlines(TBLOB *blob); -void setup_outline(TESSLINE *outline); - void setup_blob_outlines(TBLOB *blob); #endif diff --git a/wordrec/outlines.cpp b/wordrec/outlines.cpp index 9148f4686b..134e20c7ec 100644 --- a/wordrec/outlines.cpp +++ b/wordrec/outlines.cpp @@ -29,11 +29,13 @@ I n c l u d e s ----------------------------------------------------------------------*/ #include "outlines.h" +#include "wordrec.h" #ifdef __UNIX__ #include #endif +namespace tesseract { /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ @@ -43,9 +45,9 @@ * Check to see if this line crosses over this outline. If it does * return TRUE. **********************************************************************/ -int crosses_outline(EDGEPT *p0, /* Start of line */ - EDGEPT *p1, /* End of line */ - EDGEPT *outline) { /* Outline to check */ +int Wordrec::crosses_outline(EDGEPT *p0, /* Start of line */ + EDGEPT *p1, /* End of line */ + EDGEPT *outline) { /* Outline to check */ EDGEPT *pt = outline; do { if (is_crossed (p0->pos, p1->pos, pt->pos, pt->next->pos)) @@ -65,7 +67,7 @@ int crosses_outline(EDGEPT *p0, /* Start of line */ * point of intersection lies on both of the line segments. If it does * then these two segments cross. **********************************************************************/ -int is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1) { +int Wordrec::is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1) { int b0a1xb0b1, b0b1xb0a0; int a1b1xa1a0, a1a0xa1b0; @@ -99,7 +101,7 @@ int is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1) { * * Return true if the points are identical. **********************************************************************/ -int is_same_edgept(EDGEPT *p1, EDGEPT *p2) { +int Wordrec::is_same_edgept(EDGEPT *p1, EDGEPT *p2) { return (p1 == p2); } @@ -110,7 +112,8 @@ int is_same_edgept(EDGEPT *p1, EDGEPT *p2) { * Find the point on a line segment that is closest to a point not on * the line segment. Return that point. **********************************************************************/ -EDGEPT *near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1) { +EDGEPT *Wordrec::near_point(EDGEPT *point, + EDGEPT *line_pt_0, EDGEPT *line_pt_1) { TPOINT p; float slope; @@ -153,7 +156,7 @@ EDGEPT *near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1) { * counter-clockwise and vice versa. Do this by swapping each of the * next and prev fields of each edge point. **********************************************************************/ -void reverse_outline(EDGEPT *outline) { +void Wordrec::reverse_outline(EDGEPT *outline) { EDGEPT *edgept = outline; EDGEPT *temp; @@ -170,3 +173,5 @@ void reverse_outline(EDGEPT *outline) { } while (edgept != outline); } + +} // namespace tesseract diff --git a/wordrec/outlines.h b/wordrec/outlines.h index f063584407..bcb675e51c 100644 --- a/wordrec/outlines.h +++ b/wordrec/outlines.h @@ -131,18 +131,4 @@ #define within_range(x,x0,x1) \ (((x0 <= x) && (x <= x1)) || ((x1 <= x) && (x <= x0))) -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -int crosses_outline(EDGEPT *p0, /* Start of line */ - EDGEPT *p1, /* End of line */ - EDGEPT *outline); - -int is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1); - -int is_same_edgept(EDGEPT *p1, EDGEPT *p2); - -EDGEPT *near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1); - -void reverse_outline(EDGEPT *outline); #endif diff --git a/wordrec/pieces.cpp b/wordrec/pieces.cpp index 4d41d87098..4008c5541a 100644 --- a/wordrec/pieces.cpp +++ b/wordrec/pieces.cpp @@ -29,7 +29,6 @@ #include "blobs.h" #include "freelist.h" -#include "hideedge.h" #include "matchtab.h" #include "ndminx.h" #include "plotseg.h" @@ -70,173 +69,6 @@ /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -/********************************************************************** - * break_pieces - * - * Break up the blobs in this chain so that they are all independent. - * This operation should undo the affect of join_pieces. - **********************************************************************/ -void break_pieces(TBLOB *blobs, SEAMS seams, inT16 start, inT16 end) { - TESSLINE *outline = blobs->outlines; - TBLOB *next_blob; - inT16 x; - - for (x = start; x < end; x++) - reveal_seam ((SEAM *) array_value (seams, x)); - - next_blob = blobs->next; - - while (outline && next_blob) { - if (outline->next == next_blob->outlines) { - outline->next = NULL; - outline = next_blob->outlines; - next_blob = next_blob->next; - } - else { - outline = outline->next; - } - } -} - - -/********************************************************************** - * join_pieces - * - * Join a group of base level pieces into a single blob that can then - * be classified. - **********************************************************************/ -void join_pieces(TBLOB *piece_blobs, SEAMS seams, inT16 start, inT16 end) { - TBLOB *next_blob; - TBLOB *blob; - inT16 x; - TESSLINE *outline; - SEAM *seam; - - for (x = 0, blob = piece_blobs; x < start; x++) - blob = blob->next; - next_blob = blob->next; - outline = blob->outlines; - if (!outline) - return; - - while (x < end) { - seam = (SEAM *) array_value (seams, x); - if (x - seam->widthn >= start && x + seam->widthp < end) - hide_seam(seam); - while (outline->next) - outline = outline->next; - outline->next = next_blob->outlines; - next_blob = next_blob->next; - - x++; - } -} - - -/********************************************************************** - * hide_seam - * - * Change the edge points that are referenced by this seam to make - * them hidden edges. - **********************************************************************/ -void hide_seam(SEAM *seam) { - if (seam == NULL || seam->split1 == NULL) - return; - hide_edge_pair (seam->split1->point1, seam->split1->point2); - - if (seam->split2 == NULL) - return; - hide_edge_pair (seam->split2->point1, seam->split2->point2); - - if (seam->split3 == NULL) - return; - hide_edge_pair (seam->split3->point1, seam->split3->point2); -} - - -/********************************************************************** - * hide_edge_pair - * - * Change the edge points that are referenced by this seam to make - * them hidden edges. - **********************************************************************/ -void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2) { - EDGEPT *edgept; - - edgept = pt1; - do { - hide_edge(edgept); - edgept = edgept->next; - } - while (!exact_point (edgept, pt2) && edgept != pt1); - if (edgept == pt1) { - /* cprintf("Hid entire outline at (%d,%d)!!\n", - edgept->pos.x,edgept->pos.y); */ - } - edgept = pt2; - do { - hide_edge(edgept); - edgept = edgept->next; - } - while (!exact_point (edgept, pt1) && edgept != pt2); - if (edgept == pt2) { - /* cprintf("Hid entire outline at (%d,%d)!!\n", - edgept->pos.x,edgept->pos.y); */ - } -} - - -/********************************************************************** - * reveal_seam - * - * Change the edge points that are referenced by this seam to make - * them hidden edges. - **********************************************************************/ -void reveal_seam(SEAM *seam) { - if (seam == NULL || seam->split1 == NULL) - return; - reveal_edge_pair (seam->split1->point1, seam->split1->point2); - - if (seam->split2 == NULL) - return; - reveal_edge_pair (seam->split2->point1, seam->split2->point2); - - if (seam->split3 == NULL) - return; - reveal_edge_pair (seam->split3->point1, seam->split3->point2); -} - - -/********************************************************************** - * reveal_edge_pair - * - * Change the edge points that are referenced by this seam to make - * them hidden edges. - **********************************************************************/ -void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2) { - EDGEPT *edgept; - - edgept = pt1; - do { - reveal_edge(edgept); - edgept = edgept->next; - } - while (!exact_point (edgept, pt2) && edgept != pt1); - if (edgept == pt1) { - /* cprintf("Hid entire outline at (%d,%d)!!\n", - edgept->pos.x,edgept->pos.y); */ - } - edgept = pt2; - do { - reveal_edge(edgept); - edgept = edgept->next; - } - while (!exact_point (edgept, pt1) && edgept != pt2); - if (edgept == pt2) { - /* cprintf("Hid entire outline at (%d,%d)!!\n", - edgept->pos.x,edgept->pos.y); */ - } -} /********************************************************************** @@ -279,29 +111,23 @@ BLOB_CHOICE_LIST *Wordrec::classify_piece(TBLOB *pieces, SEAMS seams, inT16 start, inT16 end) { - STATE current_state; BLOB_CHOICE_LIST *choices; - TBLOB *pblob; TBLOB *blob; - TBLOB *nblob; inT16 x; - SEARCH_STATE chunk_groups; - - set_n_ones (¤t_state, array_count (seams)); join_pieces(pieces, seams, start, end); - for (blob = pieces, pblob = NULL, x = 0; x < start; x++) { - pblob = blob; + for (blob = pieces, x = 0; x < start; x++) { blob = blob->next; } - for (nblob = blob->next; x < end; x++) - nblob = nblob->next; - choices = classify_blob (pblob, blob, nblob, NULL, "pieces:", White); + choices = classify_blob(blob, "pieces:", White); break_pieces(blob, seams, start, end); #ifndef GRAPHICS_DISABLED if (wordrec_display_segmentations > 2) { - chunk_groups = bin_to_chunks (¤t_state, array_count (seams)); + STATE current_state; + SEARCH_STATE chunk_groups; + set_n_ones (¤t_state, array_count(seams)); + chunk_groups = bin_to_chunks(¤t_state, array_count(seams)); display_segmentation(pieces, chunk_groups); window_wait(segm_window); memfree(chunk_groups); @@ -332,10 +158,13 @@ BLOB_CHOICE_LIST *Wordrec::get_piece_rating(MATRIX *ratings, start, end); ratings->put(start, end, choices); + if (wordrec_debug_level > 0) { + tprintf("get_piece_rating(): updated ratings matrix\n"); + ratings->print(getDict().getUnicharset()); + } } return (choices); } -} // namespace tesseract /********************************************************************** @@ -344,8 +173,7 @@ BLOB_CHOICE_LIST *Wordrec::get_piece_rating(MATRIX *ratings, * Set up and initialize an array that holds the bounds of a set of * blobs. **********************************************************************/ -BOUNDS_LIST record_blob_bounds(TBLOB *blobs) { - TBLOB *blob; +BOUNDS_LIST Wordrec::record_blob_bounds(TBLOB *blobs) { BOUNDS_LIST bounds; TPOINT topleft; TPOINT botright; @@ -353,7 +181,7 @@ BOUNDS_LIST record_blob_bounds(TBLOB *blobs) { bounds = (BOUNDS_LIST) memalloc (count_blobs (blobs) * sizeof (BOUNDS)); - iterate_blobs(blob, blobs) { + for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) { blob_bounding_box(blob, &topleft, &botright); set_bounds_entry(bounds, x, topleft, botright); x++; @@ -370,7 +198,7 @@ BOUNDS_LIST record_blob_bounds(TBLOB *blobs) { * matrix is created. The indices correspond to the starting and * ending initial piece number. **********************************************************************/ -MATRIX *record_piece_ratings(TBLOB *blobs) { +MATRIX *Wordrec::record_piece_ratings(TBLOB *blobs) { BOUNDS_LIST bounds; inT16 num_blobs; inT16 x; @@ -391,7 +219,7 @@ MATRIX *record_piece_ratings(TBLOB *blobs) { bounds_of_piece(bounds, x, y, &tp_topleft, &tp_botright); topleft = *(unsigned int *) &tp_topleft; botright = *(unsigned int *) &tp_botright; - choices = get_match_by_bounds (topleft, botright); + choices = blob_match_table.get_match_by_bounds (topleft, botright); if (choices != NULL) { ratings->put(x, y, choices); } @@ -400,3 +228,5 @@ MATRIX *record_piece_ratings(TBLOB *blobs) { memfree(bounds); return (ratings); } + +} // namespace tesseract diff --git a/wordrec/pieces.h b/wordrec/pieces.h index a3fa4629a8..78252c3b04 100644 --- a/wordrec/pieces.h +++ b/wordrec/pieces.h @@ -46,17 +46,6 @@ typedef BOUNDS *BOUNDS_LIST; /* BOUNDS_LIST */ /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -void break_pieces(TBLOB *blobs, SEAMS seams, inT16 start, inT16 end); - -void join_pieces(TBLOB *piece_blobs, SEAMS seams, inT16 start, inT16 end); - -void hide_seam(SEAM *seam); - -void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2); - -void reveal_seam(SEAM *seam); - -void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2); void bounds_of_piece(BOUNDS_LIST bounds, inT16 start, @@ -64,70 +53,4 @@ void bounds_of_piece(BOUNDS_LIST bounds, TPOINT *extreme_tl, TPOINT *extreme_br); -BOUNDS_LIST record_blob_bounds(TBLOB *blobs); - -MATRIX *record_piece_ratings(TBLOB *blobs); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* pieces.c * -void break_pieces - _ARGS((BLOB *blobs, - SEAMS seams)); - -void join_pieces - _ARGS((BLOB *piece_blobs, - SEAMS seams, - inT16 start, - inT16 end)); - -void hide_seam - _ARGS((SEAM *seam)); - -void reveal_seam - _ARGS((SEAM *seam)); - -void bounds_of_piece - _ARGS((BOUNDS_LIST bounds, - inT16 start, - inT16 end, - TPOINT *extreme_tl, - TPOINT *extreme_br)); - -CHOICES classify_piece - _ARGS((BLOB *pieces, - SEAMS seams, - inT16 start, - inT16 end, - inT32 fx, - STATE* this_state, - STATE* best_state, - inT32 pass, - inT32 blob_index)); - -CHOICES get_piece_rating - _ARGS((MATRIX ratings, - BLOB *blobs, - SEAMS seams, - inT16 start, - inT16 end, - inT32 fx, - STATE* this_state, - STATE* best_state, - inT32 pass, - inT32 blob_index)); - -BOUNDS_LIST record_blob_bounds - _ARGS((BLOB *blobs)); - -MATRIX record_piece_ratings - _ARGS((BLOB *blobs)); - -#undef _ARGS -*/ #endif diff --git a/wordrec/plotedges.cpp b/wordrec/plotedges.cpp index 3aacd42651..0aa02c37fb 100644 --- a/wordrec/plotedges.cpp +++ b/wordrec/plotedges.cpp @@ -76,7 +76,7 @@ void display_edgepts(LIST outlines) { **********************************************************************/ void draw_blob_edges(TBLOB *blob) { TESSLINE *ol; - LIST edge_list = NIL; + LIST edge_list = NIL_LIST; if (wordrec_display_splits) { for (ol = blob->outlines; ol != NULL; ol = ol->next) diff --git a/wordrec/plotedges.h b/wordrec/plotedges.h index 2125fc1be8..d0ca40be77 100644 --- a/wordrec/plotedges.h +++ b/wordrec/plotedges.h @@ -27,7 +27,7 @@ #include "callcpp.h" #include "oldlist.h" -#include "tessclas.h" +#include "blobs.h" #include "split.h" /*---------------------------------------------------------------------- diff --git a/wordrec/plotseg.cpp b/wordrec/plotseg.cpp index ebc83b4697..a873f6a0bc 100644 --- a/wordrec/plotseg.cpp +++ b/wordrec/plotseg.cpp @@ -28,7 +28,6 @@ #include "plotseg.h" #include "callcpp.h" #include "scrollview.h" -#include "tessclas.h" #include "blobs.h" #include "const.h" #include @@ -90,7 +89,7 @@ void render_segmentation(ScrollView *window, // Find bounding box. blobs_bounding_box(chunks, &topleft, &botright); - iterate_blobs(blob, chunks) { + for (blob = chunks; blob != NULL; blob = blob->next) { if (chunks_left-- == 0) { color = color_list[++char_num % NUM_COLORS]; diff --git a/wordrec/render.cpp b/wordrec/render.cpp index c40a4c8487..56bb47f87d 100644 --- a/wordrec/render.cpp +++ b/wordrec/render.cpp @@ -132,9 +132,6 @@ void render_outline(void *window, render_edgepts (window, outline->loop, color); /* Add on next outlines */ render_outline (window, outline->next, color); - - /* Add on child outlines */ - render_outline(window, outline->child, Grey); } #endif // GRAPHICS_DISABLED diff --git a/wordrec/render.h b/wordrec/render.h index 0361e8e32f..3faa398d42 100644 --- a/wordrec/render.h +++ b/wordrec/render.h @@ -27,7 +27,7 @@ #include "host.h" #include "callcpp.h" -#include "tessclas.h" +#include "blobs.h" /*---------------------------------------------------------------------- V a r i a b l e s diff --git a/wordrec/segsearch.cpp b/wordrec/segsearch.cpp new file mode 100644 index 0000000000..95b229e0fd --- /dev/null +++ b/wordrec/segsearch.cpp @@ -0,0 +1,286 @@ +/////////////////////////////////////////////////////////////////////// +// File: segsearch.h +// Description: Segmentation search functions. +// Author: Daria Antonova +// Created: Mon Jun 23 11:26:43 PDT 2008 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "wordrec.h" + +#include "associate.h" +#include "baseline.h" +#include "language_model.h" +#include "matrix.h" +#include "oldheap.h" +#include "params.h" +#include "ratngs.h" +#include "states.h" + +ELISTIZE(SEG_SEARCH_PENDING); + +namespace tesseract { + +void Wordrec::SegSearch(CHUNKS_RECORD *chunks_record, + WERD_CHOICE *best_choice, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, + WERD_CHOICE *raw_choice, + STATE *output_best_state) { + int row, col = 0; + if (segsearch_debug_level > 0) { + tprintf("Starting SegSearch on ratings matrix:\n"); + chunks_record->ratings->print(getDict().getUnicharset()); + } + // Start with a fresh best_choice since rating adjustments + // used by the chopper and the new segmentation search are not compatible. + best_choice->set_rating(WERD_CHOICE::kBadRating); + // Clear best choice accumulator (that is used for adaption), so that + // choices adjusted by chopper do not interfere with the results from the + // segmentation search. + getDict().ClearBestChoiceAccum(); + + MATRIX *ratings = chunks_record->ratings; + // Priority queue containing pain points generated by the language model + // The priority is set by the language model components, adjustments like + // seam cost and width priority are factored into the priority. + HEAP *pain_points = MakeHeap(segsearch_max_pain_points); + + // best_path_by_column records the lowest cost path found so far for each + // column of the chunks_record->ratings matrix over all the rows. + BestPathByColumn *best_path_by_column = + new BestPathByColumn[ratings->dimension()]; + for (col = 0; col < ratings->dimension(); ++col) { + best_path_by_column[col].avg_cost = WERD_CHOICE::kBadRating; + best_path_by_column[col].best_vse = NULL; + } + + language_model_->InitForWord(prev_word_best_choice_, &denorm_, + assume_fixed_pitch_char_segment, + best_choice->certainty(), + segsearch_max_char_wh_ratio, + pain_points, chunks_record); + + MATRIX_COORD *pain_point; + float pain_point_priority; + BestChoiceBundle best_choice_bundle( + output_best_state, best_choice, raw_choice, best_char_choices); + + // pending[i] stores a list of the parent/child pair of BLOB_CHOICE_LISTs, + // where i is the column of the child. Initially all the classified entries + // in the ratings matrix from column 0 (with parent NULL) are inserted into + // pending[0]. As the language model state is updated, new child/parent + // pairs are inserted into the lists. Next, the entries in pending[1] are + // considered, and so on. It is important that during the update the + // children are considered in the non-decreasing order of their column, since + // this guarantess that all the parents would be up to date before an update + // of a child is done. + SEG_SEARCH_PENDING_LIST *pending = + new SEG_SEARCH_PENDING_LIST[ratings->dimension()]; + + // Search for the ratings matrix for the initial best path. + for (row = 0; row < ratings->dimension(); ++row) { + if (ratings->get(0, row) != NOT_CLASSIFIED) { + pending[0].add_sorted( + SEG_SEARCH_PENDING::compare, true, + new SEG_SEARCH_PENDING(row, NULL, LanguageModel::kAllChangedFlag)); + } + } + UpdateSegSearchNodes(0, &pending, &best_path_by_column, chunks_record, + pain_points, &best_choice_bundle); + + // Keep trying to find a better path by fixing the "pain points". + int num_futile_classifications = 0; + while (!(language_model_->AcceptableChoiceFound() || + num_futile_classifications >= + segsearch_max_futile_classifications)) { + // Get the next valid "pain point". + int pop; + while (true) { + pop = HeapPop(pain_points, &pain_point_priority, &pain_point); + if (pop == EMPTY) break; + if (pain_point->Valid(*ratings) && + ratings->get(pain_point->col, pain_point->row) == NOT_CLASSIFIED) { + break; + } else { + delete pain_point; + } + } + if (pop == EMPTY) { + if (segsearch_debug_level > 0) tprintf("Pain points queue is empty\n"); + break; + } + if (segsearch_debug_level > 0) { + tprintf("Classifying pain point priority=%.4f, col=%d, row=%d\n", + pain_point_priority, pain_point->col, pain_point->row); + } + BLOB_CHOICE_LIST *classified = classify_piece( + chunks_record->chunks, chunks_record->splits, + pain_point->col, pain_point->row); + ratings->put(pain_point->col, pain_point->row, classified); + + if (segsearch_debug_level > 0) { + print_ratings_list("Updated ratings matrix with a new entry:", + ratings->get(pain_point->col, pain_point->row), + getDict().getUnicharset()); + chunks_record->ratings->print(getDict().getUnicharset()); + } + + // Insert initial "pain points" to join the newly classified blob + // with its left and right neighbors. + if (!classified->empty()) { + float worst_piece_cert; + bool fragmented; + if (pain_point->col > 0) { + language_model_->GetWorstPieceCertainty( + pain_point->col-1, pain_point->row, chunks_record->ratings, + &worst_piece_cert, &fragmented); + language_model_->GeneratePainPoint( + pain_point->col-1, pain_point->row, false, + LanguageModel::kInitialPainPointPriorityAdjustment, + worst_piece_cert, fragmented, best_choice->certainty(), + segsearch_max_char_wh_ratio, NULL, NULL, + chunks_record, pain_points); + } + if (pain_point->row+1 < ratings->dimension()) { + language_model_->GetWorstPieceCertainty( + pain_point->col, pain_point->row+1, chunks_record->ratings, + &worst_piece_cert, &fragmented); + language_model_->GeneratePainPoint( + pain_point->col, pain_point->row+1, true, + LanguageModel::kInitialPainPointPriorityAdjustment, + worst_piece_cert, fragmented, best_choice->certainty(), + segsearch_max_char_wh_ratio, NULL, NULL, + chunks_record, pain_points); + } + } + + // Record a pending entry with the pain_point and each of its parents. + int parent_row = pain_point->col - 1; + if (parent_row < 0) { // this node has no parents + pending[pain_point->col].add_sorted( + SEG_SEARCH_PENDING::compare, true, + new SEG_SEARCH_PENDING(pain_point->row, NULL, + LanguageModel::kAllChangedFlag)); + } else { + for (int parent_col = 0; parent_col < pain_point->col; ++parent_col) { + if (ratings->get(parent_col, parent_row) != NOT_CLASSIFIED) { + pending[pain_point->col].add_sorted( + SEG_SEARCH_PENDING::compare, true, + new SEG_SEARCH_PENDING(pain_point->row, + ratings->get(parent_col, parent_row), + LanguageModel::kAllChangedFlag)); + } + } + } + UpdateSegSearchNodes(pain_point->col, &pending, &best_path_by_column, + chunks_record, pain_points, &best_choice_bundle); + if (!best_choice_bundle.updated) ++num_futile_classifications; + + if (segsearch_debug_level > 0) { + tprintf("num_futile_classifications %d\n", num_futile_classifications); + } + + // Clean up + best_choice_bundle.updated = false; + delete pain_point; // done using this pain point + } + + if (segsearch_debug_level > 0) { + tprintf("Done with SegSearch (AcceptableChoiceFound: %d\n", + language_model_->AcceptableChoiceFound()); + } + + // Clean up. + FreeHeapData(pain_points, MATRIX_COORD::Delete); + delete[] best_path_by_column; + delete[] pending; + for (row = 0; row < ratings->dimension(); ++row) { + for (col = 0; col <= row; ++col) { + BLOB_CHOICE_LIST *rating = ratings->get(col, row); + if (rating != NOT_CLASSIFIED) language_model_->DeleteState(rating); + } + } +} + +void Wordrec::UpdateSegSearchNodes( + int starting_col, + SEG_SEARCH_PENDING_LIST *pending[], + BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record, + HEAP *pain_points, + BestChoiceBundle *best_choice_bundle) { + MATRIX *ratings = chunks_record->ratings; + for (int col = starting_col; col < ratings->dimension(); ++col) { + if (segsearch_debug_level > 0) { + tprintf("\n\nUpdateSegSearchNodes: evaluate children in col=%d\n", col); + } + // Iterate over the pending list for this column. + SEG_SEARCH_PENDING_LIST *pending_list = &((*pending)[col]); + SEG_SEARCH_PENDING_IT pending_it(pending_list); + GenericVector non_empty_rows; + while (!pending_it.empty()) { + // Update language model state of this child+parent pair. + SEG_SEARCH_PENDING *p = pending_it.extract(); + if (non_empty_rows.length() == 0 || + non_empty_rows[non_empty_rows.length()-1] != p->child_row) { + non_empty_rows.push_back(p->child_row); + } + BLOB_CHOICE_LIST *current_node = ratings->get(col, p->child_row); + LanguageModelFlagsType new_changed = + language_model_->UpdateState(p->changed, col, p->child_row, + current_node, p->parent, pain_points, + best_path_by_column, + chunks_record, best_choice_bundle); + if (new_changed) { + // Since the language model state of this entry changed, add all the + // pairs with it as a parent and each of its children to pending, so + // that the children are updated as well. + int child_col = p->child_row + 1; + for (int child_row = child_col; + child_row < ratings->dimension(); ++child_row) { + if (ratings->get(child_col, child_row) != NOT_CLASSIFIED) { + SEG_SEARCH_PENDING *new_pending = + new SEG_SEARCH_PENDING(child_row, current_node, 0); + SEG_SEARCH_PENDING *actual_new_pending = + reinterpret_cast( + (*pending)[child_col].add_sorted_and_find( + SEG_SEARCH_PENDING::compare, true, new_pending)); + if (new_pending != actual_new_pending) delete new_pending; + actual_new_pending->changed |= new_changed; + if (segsearch_debug_level > 0) { + tprintf("Added child(col=%d row=%d) parent(col=%d row=%d)" + " changed=0x%x to pending\n", child_col, + actual_new_pending->child_row, + col, p->child_row, actual_new_pending->changed); + } + } + } + } // end if new_changed + delete p; // clean up + pending_it.forward(); + } // end while !pending_it.empty() + language_model_->GeneratePainPointsFromColumn( + col, non_empty_rows, best_choice_bundle->best_choice->certainty(), + pain_points, best_path_by_column, chunks_record); + } // end for col + + if (best_choice_bundle->updated) { + language_model_->GeneratePainPointsFromBestChoice( + pain_points, chunks_record, best_choice_bundle); + } + + language_model_->CleanUp(); +} + +} // namespace tesseract diff --git a/wordrec/tessinit.cpp b/wordrec/tessinit.cpp deleted file mode 100644 index 8cf2ddbc45..0000000000 --- a/wordrec/tessinit.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/********************************************************************** - * File: tessinit.c (Formerly tessinit.c) - * Description: Stuff from recog.c needed by tessedit. - * Author: Ray Smith - * Created: Thu Jan 23 09:33:59 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ -#include "globals.h" - -#include -#include -#include "tessinit.h" -#include "wordrec.h" -#include "varable.h" - -/*---------------------------------------------------------------------- - Variables -----------------------------------------------------------------------*/ - -INT_VAR(wordrec_plots_fx, 0, "plots_fx"); -INT_VAR(wordrec_plots_ocr, 0, "plots_ocr"); -INT_VAR(wordrec_debugs_fx, 0, "debugs_fx"); -INT_VAR(wordrec_debugs_ocr, 0, "debugs_ocr"); -INT_VAR(wordrec_acts_fx, 2048, "acts_fx"); -INT_VAR(wordrec_acts_ocr, 32, "acts_ocr"); -INT_VAR(wordrec_corner_0, 0, "wordrec_corner_0"); -INT_VAR(wordrec_corner_1, 0, "wordrec_corner_1"); -INT_VAR(wordrec_corner_2, 2550, "wordrec_corner_2"); -INT_VAR(wordrec_corner_3, 3508, "wordrec_corner_3"); -INT_VAR(wordrec_resolution, 300, "Resolution"); -STRING_VAR(wordrec_debugfile, "debugfile", "Debug file name"); - -int row_number; - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ - -namespace tesseract { -/** - * @name program_init - * - * Initialize all the things in the program that need to be initialized. - */ -void Wordrec::program_init() { - /* Plots flags */ - plots[OCR] = wordrec_plots_ocr; - debugs[OCR] = wordrec_debugs_ocr; - acts[OCR] = wordrec_acts_ocr; - plots[FX] = wordrec_plots_fx; - debugs[FX] = wordrec_debugs_fx; - acts[FX] = wordrec_acts_fx; - - corners[0] = wordrec_corner_0; - corners[1] = wordrec_corner_1; - corners[2] = wordrec_corner_2; - corners[3] = wordrec_corner_3; -} -} // namespace tesseract - - -/** - * @name matherr - * - * Trap procedure for the Standard Math library - */ -#ifdef __UNIX -int -matherr (error) -struct exception *error; -{ - if (error->type == DOMAIN) { - if (!strcmp (error->name, "sqrt")) { - cprintf ("Sqrt:Domain error!!\n"); - abort(); - } - if (!strcmp (error->name, "atan2")) { - cprintf ("Arc Tangent error: atan2 (%ld, %ld)\n", - error->arg1, error->arg2); - abort(); - } - } - return 0; /*all others default */ -} -#endif diff --git a/wordrec/tessinit.h b/wordrec/tessinit.h deleted file mode 100644 index bde20a9e96..0000000000 --- a/wordrec/tessinit.h +++ /dev/null @@ -1,36 +0,0 @@ -/********************************************************************** - * File: tessinit.h (Formerly tessinit.h) - * Description: Stuff from recog needed by tessedit. - * Author: Ray Smith - * Created: Thu Jan 23 09:36:25 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ -#ifndef TESSINIT_H -#define TESSINIT_H - -#include "varable.h" - -#undef _ARGS -extern int row_number; -extern FILE *boxfp; - -/*--------------------------------------------------------------------------- - Variables -----------------------------------------------------------------------------*/ -extern INT_VAR_H(wordrec_corner_0, "corner_0", 0); -extern INT_VAR_H(wordrec_corner_1, "corner_1", 0); -extern INT_VAR_H(wordrec_corner_2, "corner_2", 2550); -extern INT_VAR_H(wordrec_corner_3, "corner_3", 3508); - -#endif diff --git a/wordrec/tface.cpp b/wordrec/tface.cpp index b53c6b3b13..aef2f43324 100644 --- a/wordrec/tface.cpp +++ b/wordrec/tface.cpp @@ -16,65 +16,29 @@ ** limitations under the License. * **********************************************************************/ -#include "tface.h" + +#include "bestfirst.h" +#include "callcpp.h" +#include "chop.h" +#include "chopper.h" #include "danerror.h" +#include "fxdefs.h" #include "globals.h" -#include "tordvars.h" /* Feature stuff */ -#include "fxid.h" -#include "wordclass.h" -#include "bestfirst.h" -#include "context.h" #include "gradechop.h" -/* includes for init */ -#include "tessinit.h" -#include "mfvars.h" -#include "metrics.h" -#include "adaptmatch.h" #include "matchtab.h" -#include "chopper.h" -#include "permdawg.h" +#include "pageres.h" #include "permute.h" -#include "chop.h" -#include "callcpp.h" -#include "badwords.h" +#include "wordclass.h" #include "wordrec.h" +#include "featdefs.h" #include #ifdef __UNIX__ #include #endif -const int kReallyBadCertainty = -20; namespace tesseract { - class Tesseract; -} - -//extern "C" int record_matcher_output; - -/*---------------------------------------------------------------------- - Variables -----------------------------------------------------------------------*/ -static PRIORITY pass2_ok_split; -static int pass2_seg_states; - -BOOL_VAR(wordrec_no_block, false, "Don't output block information"); - -/*---------------------------------------------------------------------- - Function Code -----------------------------------------------------------------------*/ -namespace tesseract { -/** - * @name start_recog - * - * Startup recog program ready to recognize words. - */ -int Wordrec::start_recog(const char *textbase) { - - program_editup(textbase, true); - return (0); -} - /** * @name program_editup @@ -83,64 +47,23 @@ int Wordrec::start_recog(const char *textbase) { * init_permute determines whether to initialize the permute functions * and Dawg models. */ -void Wordrec::program_editup(const char *textbase, bool init_permute) { - if (textbase != NULL) { - imagefile = textbase; - /* Read in data files */ - edit_with_ocr(textbase); - } - - /* Initialize subsystems */ - program_init(); - mfeature_init(); // assumes that imagefile is initialized - if (init_permute) - getDict().init_permute(); - setup_cp_maps(); - - init_metrics(); +void Wordrec::program_editup(const char *textbase, + bool init_classifier, + bool init_dict) { + if (textbase != NULL) imagefile = textbase; + InitFeatureDefs(&feature_defs_); + SetupExtractors(&feature_defs_); + InitAdaptiveClassifier(init_classifier); + if (init_dict) getDict().Load(); pass2_ok_split = chop_ok_split; pass2_seg_states = wordrec_num_seg_states; } -} // namespace tesseract - - -/** - * @name edit_with_ocr - * - * Initialize all the things in the program needed before the classifier - * code is called. - */ -void edit_with_ocr(const char *imagename) { - char name[FILENAMESIZE]; /*base name of file */ - - if (tord_write_output) { - strcpy(name, imagename); - strcat (name, ".txt"); - //xiaofan - textfile = open_file (name, "w"); - } - if (tord_write_raw_output) { - strcpy(name, imagename); - strcat (name, ".raw"); - rawfile = open_file (name, "w"); - } - if (record_matcher_output) { - strcpy(name, imagename); - strcat (name, ".mlg"); - matcher_fp = open_file (name, "w"); - strcpy(name, imagename); - strcat (name, ".ctx"); - correct_fp = open_file (name, "r"); - } -} - /** * @name end_recog * * Cleanup and exit the recog program. */ -namespace tesseract { int Wordrec::end_recog() { program_editdown (0); @@ -155,36 +78,10 @@ int Wordrec::end_recog() { * program. */ void Wordrec::program_editdown(inT32 elasped_time) { - dj_cleanup(); - if (tord_display_text) - cprintf ("\n"); - if (!wordrec_no_block && tord_write_output) - fprintf (textfile, "\n"); - if (tord_write_raw_output) - fprintf (rawfile, "\n"); - if (tord_write_output) { - #ifdef __UNIX__ - fsync (fileno (textfile)); - #endif - fclose(textfile); - } - if (tord_write_raw_output) { - #ifdef __UNIX__ - fsync (fileno (rawfile)); - #endif - fclose(rawfile); - } - close_choices(); - if (tessedit_save_stats) - save_summary (elasped_time); - end_match_table(); + EndAdaptiveClassifier(); + blob_match_table.end_match_table(); getDict().InitChoiceAccum(); - if (global_hash != NULL) { - free_mem(global_hash); - global_hash = NULL; - } - end_metrics(); - getDict().end_permute(); + getDict().End(); } @@ -194,11 +91,9 @@ void Wordrec::program_editdown(inT32 elasped_time) { * Get ready to do some pass 1 stuff. */ void Wordrec::set_pass1() { - tord_blob_skip.set_value(false); chop_ok_split.set_value(70.0); wordrec_num_seg_states.set_value(15); SettupPass1(); - first_pass = 1; } @@ -208,11 +103,9 @@ void Wordrec::set_pass1() { * Get ready to do some pass 2 stuff. */ void Wordrec::set_pass2() { - tord_blob_skip.set_value(false); chop_ok_split.set_value(pass2_ok_split); wordrec_num_seg_states.set_value(pass2_seg_states); SettupPass2(); - first_pass = 0; } @@ -221,34 +114,12 @@ void Wordrec::set_pass2() { * * Recognize a word. */ -BLOB_CHOICE_LIST_VECTOR *Wordrec::cc_recog(TWERD *tessword, - WERD_CHOICE *best_choice, - WERD_CHOICE *best_raw_choice, - BOOL8 tester, - BOOL8 trainer, - bool last_word_on_line) { - int fx; - BLOB_CHOICE_LIST_VECTOR *results; /*matcher results */ - - if (SetErrorTrap (NULL)) { - cprintf ("Tess copped out!\n"); - ReleaseErrorTrap(); - class_string (best_choice) = NULL; - return NULL; - } +BLOB_CHOICE_LIST_VECTOR *Wordrec::cc_recog(WERD_RES *word) { getDict().InitChoiceAccum(); - getDict().reset_hyphen_vars(last_word_on_line); - init_match_table(); - for (fx = 0; fx < MAX_FX && (acts[OCR] & (FXSELECT << fx)) == 0; fx++); - results = - chop_word_main(tessword, - fx, - best_choice, - best_raw_choice, - tester, - trainer); + getDict().reset_hyphen_vars(word->word->flag(W_EOL)); + blob_match_table.init_match_table(); + BLOB_CHOICE_LIST_VECTOR *results = chop_word_main(word); getDict().DebugWordChoices(); - ReleaseErrorTrap(); return results; } @@ -267,197 +138,49 @@ int Wordrec::dict_word(const WERD_CHOICE &word) { * @name call_matcher * * Called from Tess with a blob in tess form. - * Convert the blob to editor form. - * Call the matcher setup by the segmenter in tess_matcher. - * Convert the output choices back to tess form. + * The blob may need rotating to the correct orientation for classification. */ -BLOB_CHOICE_LIST *Wordrec::call_matcher(TBLOB *ptblob, //< previous blob - TBLOB *tessblob, //< blob to match - TBLOB *ntblob, //< next blob - void *, //< unused parameter - TEXTROW * //< always null anyway - ) { - PBLOB *pblob; //converted blob - PBLOB *blob; //converted blob - PBLOB *nblob; //converted blob - BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); // matcher result - - blob = make_ed_blob (tessblob);//convert blob - if (blob == NULL) { - // Since it is actually possible to get a NULL blob here, due to invalid - // segmentations, fake a really bad classification. - BLOB_CHOICE *choice = - new BLOB_CHOICE(0, static_cast(MAX_NUM_INT_FEATURES), - static_cast(-MAX_FLOAT32), 0, NULL); - BLOB_CHOICE_IT temp_it; - temp_it.set_to_list(ratings); - temp_it.add_after_stay_put(choice); - return ratings; - } - pblob = ptblob != NULL ? make_ed_blob (ptblob) : NULL; - nblob = ntblob != NULL ? make_ed_blob (ntblob) : NULL; - // Because of the typedef for tess_matcher, the object on which it is called - // must be of type Tesseract*. With a Wordrec type it seems it doesn't work. - (reinterpret_cast(this)->*tess_matcher) - (pblob, blob, nblob, tess_word, tess_denorm, ratings, NULL); - - //match it - delete blob; //don't need that now - if (pblob != NULL) - delete pblob; - if (nblob != NULL) - delete nblob; - return ratings; -} - -/** - * @name make_ed_blob - * - * Make an editor format blob from the tess style blob. - */ - -PBLOB *make_ed_blob( //construct blob - TBLOB *tessblob //< blob to convert - ) { - TESSLINE *tessol; //tess outline - FRAGMENT_LIST fragments; //list of fragments - OUTLINE *outline; //current outline - OUTLINE_LIST out_list; //list of outlines - OUTLINE_IT out_it = &out_list; //iterator - - for (tessol = tessblob->outlines; tessol != NULL; tessol = tessol->next) { - //stick in list - register_outline(tessol, &fragments); - } - while (!fragments.empty ()) { - outline = make_ed_outline (&fragments); - if (outline != NULL) { - out_it.add_after_then_move (outline); +BLOB_CHOICE_LIST *Wordrec::call_matcher(TBLOB *tessblob) { + TBLOB* rotated_blob = NULL; + // If necessary, copy the blob and rotate it. + if (denorm_.block() != NULL && + denorm_.block()->classify_rotation().y() != 0.0) { + TBOX box = tessblob->bounding_box(); + int src_width = box.width(); + int src_height = box.height(); + src_width = static_cast(src_width / denorm_.scale() + 0.5); + src_height = static_cast(src_height / denorm_.scale() + 0.5); + int x_middle = (box.left() + box.right()) / 2; + int y_middle = (box.top() + box.bottom()) / 2; + rotated_blob = new TBLOB(*tessblob); + rotated_blob->Move(ICOORD(-x_middle, -y_middle)); + rotated_blob->Rotate(denorm_.block()->classify_rotation()); + tessblob = rotated_blob; + ICOORD median_size = denorm_.block()->median_size(); + int tolerance = median_size.x() / 8; + // TODO(dsl/rays) find a better normalization solution. In the mean time + // make it work for CJK by normalizing for Cap height in the same way + // as is applied in compute_block_xheight when the row is presumed to + // be ALLCAPS, i.e. the x-height is the fixed fraction + // blob height * CCStruct::kXHeightFraction / + // (CCStruct::kXHeightFraction + CCStruct::kXAscenderFraction) + if (NearlyEqual(src_width, static_cast(median_size.x()), tolerance) && + NearlyEqual(src_height, static_cast(median_size.y()), tolerance)) { + float target_height = kBlnXHeight * + (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction) / + CCStruct::kXHeightFraction; + rotated_blob->Scale(target_height / box.width()); + rotated_blob->Move(ICOORD(0, + kBlnBaselineOffset - + rotated_blob->bounding_box().bottom())); } } - if (out_it.empty()) - return NULL; //couldn't do it - return new PBLOB (&out_list); //turn to blob -} -/** - * @name make_ed_outline - * - * Make an editor format outline from the list of fragments. - */ - -OUTLINE *make_ed_outline( //constructoutline - FRAGMENT_LIST *list //< list of fragments - ) { - FRAGMENT *fragment; //current fragment - EDGEPT *edgept; //current point - ICOORD headpos; //coords of head - ICOORD tailpos; //coords of tail - FCOORD pos; //coords of edgept - FCOORD vec; //empty - POLYPT *polypt; //current point - POLYPT_LIST poly_list; //list of point - POLYPT_IT poly_it = &poly_list;//iterator - FRAGMENT_IT fragment_it = list;//fragment - - headpos = fragment_it.data ()->head; - do { - fragment = fragment_it.data (); - edgept = fragment->headpt; //start of segment - do { - pos = FCOORD (edgept->pos.x, edgept->pos.y); - vec = FCOORD (edgept->vec.x, edgept->vec.y); - polypt = new POLYPT (pos, vec); - //add to list - poly_it.add_after_then_move (polypt); - edgept = edgept->next; - } - while (edgept != fragment->tailpt); - tailpos = ICOORD (edgept->pos.x, edgept->pos.y); - //get rid of it - delete fragment_it.extract (); - if (tailpos != headpos) { - if (fragment_it.empty ()) { - return NULL; - } - fragment_it.forward (); - //find next segment - for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list () && - fragment_it.data ()->head != tailpos; - fragment_it.forward ()); - if (fragment_it.data ()->head != tailpos) { - // It is legitimate for the heads to not all match to tails, - // since not all combinations of seams always make sense. - for (fragment_it.mark_cycle_pt (); - !fragment_it.cycled_list (); fragment_it.forward ()) { - fragment = fragment_it.extract (); - delete fragment; - } - return NULL; //can't do it - } - } - } - while (tailpos != headpos); - return new OUTLINE (&poly_it); //turn to outline -} -/** - * @name register_outline - * - * Add the fragments in the given outline to the list - */ - -void register_outline( //add fragments - TESSLINE *outline, //< tess format - FRAGMENT_LIST *list //< list to add to - ) { - EDGEPT *startpt; //start of outline - EDGEPT *headpt; //start of fragment - EDGEPT *tailpt; //end of fragment - FRAGMENT *fragment; //new fragment - FRAGMENT_IT it = list; //iterator - - startpt = outline->loop; - do { - startpt = startpt->next; - if (startpt == NULL) - return; //illegal! - } - while (startpt->flags[0] == 0 && startpt != outline->loop); - headpt = startpt; - do - startpt = startpt->next; - while (startpt->flags[0] != 0 && startpt != headpt); - if (startpt->flags[0] != 0) - return; //all hidden! - - headpt = startpt; - do { - tailpt = headpt; - do - tailpt = tailpt->next; - while (tailpt->flags[0] == 0 && tailpt != startpt); - fragment = new FRAGMENT (headpt, tailpt); - it.add_after_then_move (fragment); - while (tailpt->flags[0] != 0) - tailpt = tailpt->next; - headpt = tailpt; - } - while (tailpt != startpt); + BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); // matcher result + AdaptiveClassifier(tessblob, ratings, NULL); + if (rotated_blob != NULL) + delete rotated_blob; + return ratings; } -ELISTIZE (FRAGMENT) - -/** - * @name FRAGMENT::FRAGMENT - * - * Constructor for fragments. - */ -FRAGMENT::FRAGMENT ( //constructor -EDGEPT * head_pt, //< start point -EDGEPT * tail_pt //< end point -):head (head_pt->pos.x, head_pt->pos.y), tail (tail_pt->pos.x, -tail_pt->pos.y) { - headpt = head_pt; // save ptrs - tailpt = tail_pt; -} } // namespace tesseract diff --git a/wordrec/tface.h b/wordrec/tface.h deleted file mode 100644 index 2363289a5b..0000000000 --- a/wordrec/tface.h +++ /dev/null @@ -1,35 +0,0 @@ -/********************************************************************** - * File: tface.h - * Description: C side of the Tess/tessedit C/C++ interface. - * Author: Ray Smith - * Created: Mon Apr 27 11:57:06 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TFACE_H -#define TFACE_H - -#include "cutil.h" -#include "host.h" -#include "ratngs.h" -#include "tessclas.h" - -extern BOOL_VAR_H(wordrec_no_block, false, "Don't output block information"); - -/*---------------------------------------------------------------------------- - Function Prototypes -----------------------------------------------------------------------------*/ -void edit_with_ocr(const char *imagename); - -#endif diff --git a/wordrec/wordclass.cpp b/wordrec/wordclass.cpp index acfd3089db..e689a1b4d9 100644 --- a/wordrec/wordclass.cpp +++ b/wordrec/wordclass.cpp @@ -31,16 +31,10 @@ #endif #include "wordclass.h" -#include "fxid.h" -#include "tordvars.h" #include "associate.h" #include "render.h" -#include "metrics.h" #include "matchtab.h" -//#include "tfacepp.h" #include "permute.h" -#include "context.h" -#include "badwords.h" #include "callcpp.h" #include #include "wordrec.h" @@ -50,22 +44,6 @@ #include "config_auto.h" #endif -extern TBLOB *newblob(); - -/*---------------------------------------------------------------------- - Variables -----------------------------------------------------------------------*/ -inT16 first_pass; - -/*---------------------------------------------------------------------- - C o n s t a n t s -----------------------------------------------------------------------*/ - -#define BOLD_ON "&dB(s3B" -#define BOLD_OFF "&d@(s0B" -#define UNDERLINE_ON "&dD" -#define UNDERLINE_OFF "&d@" - /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ @@ -77,35 +55,24 @@ namespace tesseract { * table. Attempt to recognize this blob as a character. The recognition * rating for this blob will be stored as a part of the blob. This value * will also be returned to the caller. - * - * @param pblob Previous blob * @param blob Current blob - * @param nlob Next blob - * @param row The row to process * @param string The string to display in ScrollView * @param color The colour to use when displayed with ScrollView */ -BLOB_CHOICE_LIST *Wordrec::classify_blob(TBLOB *pblob, - TBLOB *blob, - TBLOB *nblob, - TEXTROW *row, - const char *string, - C_COL color) { +BLOB_CHOICE_LIST *Wordrec::classify_blob(TBLOB *blob, + const char *string, C_COL color) { BLOB_CHOICE_LIST *choices = NULL; - chars_classified++; /* Global value */ - if (tord_blob_skip) - return (NULL); #ifndef GRAPHICS_DISABLED if (wordrec_display_all_blobs) display_blob(blob, color); #endif - choices = get_match(blob); + choices = blob_match_table.get_match(blob); if (choices == NULL) { - choices = call_matcher(pblob, blob, nblob, NULL, row); - put_match(blob, choices); + choices = call_matcher(blob); + blob_match_table.put_match(blob, choices); } #ifndef GRAPHICS_DISABLED - if (tord_display_ratings && string) + if (classify_debug_level && string) print_ratings_list(string, choices, getDict().getUnicharset()); if (wordrec_blob_pause) @@ -115,6 +82,16 @@ BLOB_CHOICE_LIST *Wordrec::classify_blob(TBLOB *pblob, return (choices); } +// Returns a valid BLOB_CHOICE_LIST representing the given result. +BLOB_CHOICE_LIST *Wordrec::fake_classify_blob(UNICHAR_ID class_id, + float rating, float certainty) { + BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); // matcher result + BLOB_CHOICE *choice = new BLOB_CHOICE(class_id, rating, certainty, -1, -1, 0); + BLOB_CHOICE_IT temp_it(ratings); + temp_it.add_after_stay_put(choice); + return ratings; +} + /** * @name update_blob_classifications * @@ -127,54 +104,8 @@ void Wordrec::update_blob_classifications( int index = 0; for (; tblob != NULL && index < choices.length(); tblob = tblob->next, index++) { - add_to_match(tblob, choices.get(index)); + blob_match_table.add_to_match(tblob, choices.get(index)); } } } // namespace tesseract; - - -/** - * @name write_text_files - * - * Write an answer to the output file that is the raw guess (without - * context) directly from the classifier. - */ -void write_text_files(TWERD *word, - char *raw_choice, - int same_row, - int good_word, - int firstpass) { - int x; - /* Raw output */ - if (tord_write_raw_output) { - if (same_row) - fprintf (rawfile, "\n"); - if (raw_choice && strlen (raw_choice)) { - fprintf (rawfile, "%s ", raw_choice); - fflush(rawfile); - } - } - /* Text file output */ - if (tord_write_output) { - if (same_row) - fprintf (textfile, "\n"); - if (word->guess && strlen (word->guess)) { - for (x = 0; x < word->blanks; x++) - fprintf (textfile, " "); - if (!firstpass) - fprintf(textfile, BOLD_ON); - if (!good_word) - fprintf(textfile, UNDERLINE_ON); - fprintf (textfile, "%s", word->guess); - if (!good_word) - fprintf(textfile, UNDERLINE_OFF); - if (!firstpass) - fprintf(textfile, BOLD_OFF); - fflush(textfile); - } - } - /* Global counters */ - character_count += (word->guess ? strlen (word->guess) : 0); - word_count++; -} diff --git a/wordrec/wordclass.h b/wordrec/wordclass.h index b9e0c87699..e5fd7e9457 100644 --- a/wordrec/wordclass.h +++ b/wordrec/wordclass.h @@ -25,19 +25,11 @@ #ifndef WERDCLASSH #define WERDCLASSH +#include "blobs.h" #include "callcpp.h" #include "ratngs.h" #include "states.h" -#include "tessclas.h" -/*---------------------------------------------------------------------- - Variables -----------------------------------------------------------------------*/ -extern inT16 first_pass; - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ void write_text_files(TWERD *word, char *raw_choice, int same_row, diff --git a/wordrec/wordrec.cpp b/wordrec/wordrec.cpp index 0365e29612..297d748a73 100644 --- a/wordrec/wordrec.cpp +++ b/wordrec/wordrec.cpp @@ -18,7 +18,133 @@ #include "wordrec.h" +#include "language_model.h" +#include "params.h" + + namespace tesseract { -Wordrec::Wordrec() {} -Wordrec::~Wordrec() {} +Wordrec::Wordrec() : + // control parameters + BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information", + this->params()), + BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable", + this->params()), + BOOL_MEMBER(force_word_assoc, FALSE, + "force associator to run regardless of what enable_assoc is." + "This is used for CJK where component grouping is necessary.", + this->params()), + INT_MEMBER(wordrec_num_seg_states, 30, "Segmentation states", + this->params()), + double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state", + this->params()), + BOOL_MEMBER(fragments_guide_chopper, FALSE, + "Use information from fragments to guide chopping process", + this->params()), + INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", + this->params()), + double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", + this->params()), + INT_MEMBER(chop_debug, 0, "Chop debug", + this->params()), + BOOL_MEMBER(chop_enable, 1, "Chop enable", + this->params()), + BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", + this->params()), + INT_MEMBER(chop_split_length, 10000, "Split Length", + this->params()), + INT_MEMBER(chop_same_distance, 2, "Same distance", + this->params()), + INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", + this->params()), + INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", + this->params()), + INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", + this->params()), + double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", + this->params()), + double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", + this->params()), + double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", + this->params()), + double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", + this->params()), + double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", + this->params()), + double_MEMBER(chop_ok_split, 100.0, "OK split limit", + this->params()), + double_MEMBER(chop_good_split, 50.0, "Good split limit", + this->params()), + INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", + this->params()), + INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug", + this->params()), + BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE, + "include fixed-pitch heuristics in char segmentation", + this->params()), + BOOL_MEMBER(use_new_state_cost, FALSE, + "use new state cost heuristics for segmentation state evaluation", + this->params()), + double_MEMBER(heuristic_segcost_rating_base, 1.25, + "base factor for adding segmentation cost into word rating." + "It's a multiplying factor, the larger the value above 1, " + "the bigger the effect of segmentation cost.", + this->params()), + double_MEMBER(heuristic_weight_rating, 1.0, + "weight associated with char rating in combined cost of state", + this->params()), + double_MEMBER(heuristic_weight_width, 1000.0, + "weight associated with width evidence in combined cost of" + " state", this->params()), + double_MEMBER(heuristic_weight_seamcut, 0.0, + "weight associated with seam cut in combined cost of state", + this->params()), + double_MEMBER(heuristic_max_char_wh_ratio, 2.0, + "max char width-to-height ratio allowed in segmentation", + this->params()), + INT_MEMBER(wordrec_debug_level, 0, + "Debug level for wordrec", this->params()), + BOOL_INIT_MEMBER(enable_new_segsearch, false, + "Enable new segmentation search path.", this->params()), + INT_MEMBER(segsearch_debug_level, 0, + "SegSearch debug level", this->params()), + INT_MEMBER(segsearch_max_pain_points, 2000, + "Maximum number of pain points stored in the queue", + this->params()), + INT_MEMBER(segsearch_max_futile_classifications, 10, + "Maximum number of pain point classifications per word that" + "did not result in finding a better word choice.", + this->params()), + double_MEMBER(segsearch_max_char_wh_ratio, 2.0, + "Maximum character width-to-height ratio", this->params()), + double_MEMBER(segsearch_max_fixed_pitch_char_wh_ratio, 2.0, + "Maximum character width-to-height ratio for" + " fixed-pitch fonts", + this->params()) { + states_before_best = NULL; + best_certainties[0] = NULL; + best_certainties[1] = NULL; + character_widths = NULL; + language_model_ = new LanguageModel(&(getDict()), + &(prev_word_best_choice_)); + pass2_seg_states = 0; + num_joints = 0; + num_pushed = 0; + num_popped = 0; +} + +Wordrec::~Wordrec() { + delete language_model_; } + +void Wordrec::CopyCharChoices(const BLOB_CHOICE_LIST_VECTOR &from, + BLOB_CHOICE_LIST_VECTOR *to) { + to->delete_data_pointers(); + to->clear(); + for (int i = 0; i < from.size(); ++i) { + BLOB_CHOICE_LIST *cc_list = new BLOB_CHOICE_LIST(); + cc_list->deep_copy(from[i], &BLOB_CHOICE::deep_copy); + to->push_back(cc_list); + } +} + +} // namespace tesseract diff --git a/wordrec/wordrec.h b/wordrec/wordrec.h index eb99857b58..3ca48d016c 100644 --- a/wordrec/wordrec.h +++ b/wordrec/wordrec.h @@ -19,55 +19,169 @@ #ifndef TESSERACT_WORDREC_WORDREC_H__ #define TESSERACT_WORDREC_WORDREC_H__ +#include "associate.h" #include "classify.h" +#include "dict.h" +#include "language_model.h" #include "ratngs.h" #include "matrix.h" +#include "matchtab.h" +#include "oldheap.h" +#include "gradechop.h" #include "seam.h" -#include "callback.h" +#include "findseam.h" +#include "callcpp.h" #include "associate.h" -#include "badwords.h" +#include "pieces.h" +#include "ratngs.h" +#include "tally.h" struct CHUNKS_RECORD; struct SEARCH_RECORD; +class WERD_RES; + +// A struct for storing child/parent pairs of the BLOB_CHOICE_LISTs +// to be processed by the segmentation search. +struct SEG_SEARCH_PENDING : public ELIST_LINK { + SEG_SEARCH_PENDING(int child_row_arg, + BLOB_CHOICE_LIST *parent_arg, + tesseract::LanguageModelFlagsType changed_arg) : + child_row(child_row_arg), parent(parent_arg), changed(changed_arg) {} + + // Comparator function for add_sorted(). + static int compare(const void *p1, const void *p2) { + const SEG_SEARCH_PENDING *e1 = *reinterpret_cast< + const SEG_SEARCH_PENDING * const *>(p1); + const SEG_SEARCH_PENDING *e2 = *reinterpret_cast< + const SEG_SEARCH_PENDING * const *>(p2); + if (e1->child_row == e2->child_row && + e1->parent == e2->parent) return 0; + return (e1->child_row < e2->child_row) ? -1 : 1; + } + + int child_row; // row of the child in the ratings matrix + BLOB_CHOICE_LIST *parent; // pointer to the parent BLOB_CHOICE_LIST + // Flags that indicate which language model components are still active + // on the parent path (i.e. recorded some changes to the language model + // state) and need to be invoked for this pending entry. + // This field is used as an argument to LanguageModel::UpdateState() + // in Wordrec::UpdateSegSearchNodes(). + tesseract::LanguageModelFlagsType changed; +}; + +ELISTIZEH(SEG_SEARCH_PENDING); + namespace tesseract { + +/* ccmain/tstruct.cpp *********************************************************/ +class FRAGMENT:public ELIST_LINK +{ + public: + FRAGMENT() { //constructor + } + FRAGMENT(EDGEPT *head_pt, //start + EDGEPT *tail_pt); //end + + ICOORD head; //coords of start + ICOORD tail; //coords of end + EDGEPT *headpt; //start point + EDGEPT *tailpt; //end point + + NEWDELETE2 (FRAGMENT) +}; +ELISTIZEH (FRAGMENT) + + class Wordrec : public Classify { public: + // config parameters ******************************************************* + BOOL_VAR_H(wordrec_no_block, FALSE, "Don't output block information"); + BOOL_VAR_H(wordrec_enable_assoc, TRUE, "Associator Enable"); + BOOL_VAR_H(force_word_assoc, FALSE, + "force associator to run regardless of what enable_assoc is." + "This is used for CJK where component grouping is necessary."); + INT_VAR_H(wordrec_num_seg_states, 30, "Segmentation states"); + double_VAR_H(wordrec_worst_state, 1, "Worst segmentation state"); + BOOL_VAR_H(fragments_guide_chopper, FALSE, + "Use information from fragments to guide chopping process"); + INT_VAR_H(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped"); + double_VAR_H(tessedit_certainty_threshold, -2.25, "Good blob limit"); + INT_VAR_H(chop_debug, 0, "Chop debug"); + BOOL_VAR_H(chop_enable, 1, "Chop enable"); + BOOL_VAR_H(chop_vertical_creep, 0, "Vertical creep"); + INT_VAR_H(chop_split_length, 10000, "Split Length"); + INT_VAR_H(chop_same_distance, 2, "Same distance"); + INT_VAR_H(chop_min_outline_points, 6, "Min Number of Points on Outline"); + INT_VAR_H(chop_inside_angle, -50, "Min Inside Angle Bend"); + INT_VAR_H(chop_min_outline_area, 2000, "Min Outline Area"); + double_VAR_H(chop_split_dist_knob, 0.5, "Split length adjustment"); + double_VAR_H(chop_overlap_knob, 0.9, "Split overlap adjustment"); + double_VAR_H(chop_center_knob, 0.15, "Split center adjustment"); + double_VAR_H(chop_sharpness_knob, 0.06, "Split sharpness adjustment"); + double_VAR_H(chop_width_change_knob, 5.0, "Width change adjustment"); + double_VAR_H(chop_ok_split, 100.0, "OK split limit"); + double_VAR_H(chop_good_split, 50.0, "Good split limit"); + INT_VAR_H(chop_x_y_weight, 3, "X / Y length weight"); + INT_VAR_H(segment_adjust_debug, 0, "Segmentation adjustment debug"); + BOOL_VAR_H(assume_fixed_pitch_char_segment, FALSE, + "include fixed-pitch heuristics in char segmentation"); + BOOL_VAR_H(use_new_state_cost, FALSE, + "use new state cost heuristics for segmentation state evaluation"); + double_VAR_H(heuristic_segcost_rating_base, 1.25, + "base factor for adding segmentation cost into word rating." + "It's a multiplying factor, the larger the value above 1, " + "the bigger the effect of segmentation cost."); + double_VAR_H(heuristic_weight_rating, 1, + "weight associated with char rating in combined cost of state"); + double_VAR_H(heuristic_weight_width, 0, + "weight associated with width evidence in combined cost of state"); + double_VAR_H(heuristic_weight_seamcut, 0, + "weight associated with seam cut in combined cost of state"); + double_VAR_H(heuristic_max_char_wh_ratio, 2.0, + "max char width-to-height ratio allowed in segmentation"); + INT_VAR_H(wordrec_debug_level, 0, "Debug level for wordrec"); + BOOL_VAR_H(enable_new_segsearch, false, + "Enable new segmentation search path."); + INT_VAR_H(segsearch_debug_level, 0, "SegSearch debug level"); + INT_VAR_H(segsearch_max_pain_points, 2000, + "Maximum number of pain points stored in the queue"); + INT_VAR_H(segsearch_max_futile_classifications, 10, + "Maximum number of pain point classifications per word."); + double_VAR_H(segsearch_max_char_wh_ratio, 2.0, + "Maximum character width-to-height ratio"); + double_VAR_H(segsearch_max_fixed_pitch_char_wh_ratio, 2.0, + "Maximum character width-to-height ratio for" + "fixed pitch fonts"); + + // methods from wordrec/*.cpp *********************************************** Wordrec(); - ~Wordrec(); - void save_summary(inT32 elapsed_time); - /* tface.cpp ***************************************************************/ - void program_editup(const char *textbase, bool init_permute); - BLOB_CHOICE_LIST_VECTOR *cc_recog(TWERD *tessword, - WERD_CHOICE *best_choice, - WERD_CHOICE *best_raw_choice, - BOOL8 tester, - BOOL8 trainer, - bool last_word_on_line); + virtual ~Wordrec(); + + void CopyCharChoices(const BLOB_CHOICE_LIST_VECTOR &from, + BLOB_CHOICE_LIST_VECTOR *to); + + // tface.cpp + void program_editup(const char *textbase, + bool init_classifier, + bool init_permute); + BLOB_CHOICE_LIST_VECTOR *cc_recog(WERD_RES *word); void program_editdown(inT32 elasped_time); void set_pass1(); void set_pass2(); int end_recog(); - int start_recog(const char *textbase); - BLOB_CHOICE_LIST *call_matcher( //call a matcher - TBLOB *ptblob, //previous - TBLOB *tessblob, //blob to match - TBLOB *ntblob, //next - void *, //unused parameter - TEXTROW * //always null anyway - ); - /* tessinit.cpp ************************************************************/ - void program_init(); - /* wordclass.cpp ***********************************************************/ - BLOB_CHOICE_LIST *classify_blob(TBLOB *pblob, - TBLOB *blob, - TBLOB *nblob, - TEXTROW *row, + BLOB_CHOICE_LIST *call_matcher(TBLOB* blob); + int dict_word(const WERD_CHOICE &word); + // wordclass.cpp + BLOB_CHOICE_LIST *classify_blob(TBLOB *blob, const char *string, C_COL color); + BLOB_CHOICE_LIST *fake_classify_blob(UNICHAR_ID class_id, + float rating, float certainty); void update_blob_classifications(TWERD *word, const BLOB_CHOICE_LIST_VECTOR &choices); - /* bestfirst.cpp ***********************************************************/ + + // bestfirst.cpp BLOB_CHOICE_LIST_VECTOR *evaluate_chunks(CHUNKS_RECORD *chunks_record, SEARCH_STATE search_state); void update_ratings(const BLOB_CHOICE_LIST_VECTOR &new_choices, @@ -76,33 +190,124 @@ class Wordrec : public Classify { inT16 evaluate_state(CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search, DANGERR *fixpt); + SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record, + int num_joints, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, + WERD_CHOICE *best_choice, + WERD_CHOICE *raw_choice, + STATE *state); void best_first_search(CHUNKS_RECORD *chunks_record, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, + WERD_RES *word, STATE *state, DANGERR *fixpt, STATE *best_state); + void delete_search(SEARCH_RECORD *the_search); void expand_node(FLOAT32 worst_priority, CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search); + void replace_char_widths(CHUNKS_RECORD *chunks_record, + SEARCH_STATE state); + // Transfers the given state to the word's output fields: rebuild_word, + // best_state, box_word, and returns the corresponding blob choices. BLOB_CHOICE_LIST_VECTOR *rebuild_current_state( - TBLOB *blobs, - SEAMS seam_list, + WERD_RES *word, STATE *state, BLOB_CHOICE_LIST_VECTOR *char_choices, - int fx, - bool force_rebuild, - const WERD_CHOICE &best_choice, - const MATRIX *ratings); + MATRIX *ratings); + // Creates a fake blob choice from the combination of the given fragments. + // unichar is the class to be made from the combination, + // expanded_fragment_lengths[choice_index] is the number of fragments to use. + // old_choices[choice_index] has the classifier output for each fragment. + // choice index initially indexes the last fragment and should be decremented + // expanded_fragment_lengths[choice_index] times to get the earlier fragments. + // Guarantees to return something non-null, or abort! + BLOB_CHOICE* rebuild_fragments( + const char* unichar, + const char* expanded_fragment_lengths, + int choice_index, + BLOB_CHOICE_LIST_VECTOR *old_choices); + // Creates a joined copy of the blobs between x and y (inclusive) and + // insert into the rebuild_word in word. + // Returns a deep copy of the classifier results for the blob. BLOB_CHOICE_LIST *join_blobs_and_classify( - TBLOB *blobs, SEAMS seam_list, - int x, int y, int fx, const MATRIX *ratings, + WERD_RES* word, int x, int y, int choice_index, MATRIX *ratings, BLOB_CHOICE_LIST_VECTOR *old_choices); + STATE *pop_queue(HEAP *queue); + void push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority, + FLOAT32 priority, bool debug); - /* chopper.cpp *************************************************************/ + // segsearch.cpp + // SegSearch works on the lower diagonal matrix of BLOB_CHOICE_LISTs. + // Each entry in the matrix represents the classification choice + // for a chunk, i.e. an entry in row 2, column 1 represents the list + // of ratings for the chunks 1 and 2 classified as a single blob. + // The entries on the diagonal of the matrix are classifier choice lists + // for a single chunk from the maximal segmentation. + // + // The ratings matrix given to SegSearch represents the segmentation + // graph / trellis for the current word. The nodes in the graph are the + // individual BLOB_CHOICEs in each of the BLOB_CHOICE_LISTs in the ratings + // matrix. The children of each node (nodes connected by outgoing links) + // are the entries in the column that is equal to node's row+1. The parents + // (nodes connected by the incoming links) are the entries in the row that + // is equal to the node's column-1. Here is an example ratings matrix: + // + // 0 1 2 3 4 + // ------------------------- + // 0| c,( | + // 1| d l,1 | + // 2| o | + // 3| c,( | + // 4| g,y l,1 | + // ------------------------- + // + // In the example above node "o" has children (outgoing connection to nodes) + // "c","(","g","y" and parents (incoming connections from nodes) "l","1","d". + // + // The objective of the search is to find the least cost path, where the cost + // is determined by the language model components and the properties of the + // cut between the blobs on the path. SegSearch starts by populating the + // matrix with the all the entries that were classified by the chopper and + // finding the initial best path. Based on the classifier ratings, language + // model scores and the properties of each cut, a list of "pain points" is + // constructed - those are the points on the path where the choices do not + // look consistent with the neighboring choices, the cuts look particularly + // problematic, or the certainties of the blobs are low. The most troublesome + // "pain point" is picked from the list and the new entry in the ratings + // matrix corresponding to this "pain point" is filled in. Then the language + // model state is updated to reflect the new classification and the new + // "pain points" are added to the list and the next most troublesome + // "pain point" is determined. This continues until either the word choice + // composed from the best paths in the segmentation graph is "good enough" + // (e.g. above a certain certainty threshold, is an unambiguous dictionary + // word, etc) or there are no more "pain points" to explore. + void SegSearch(CHUNKS_RECORD *chunks_record, + WERD_CHOICE *best_choice, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, + WERD_CHOICE *raw_choice, + STATE *output_best_state); + + // chop.cpp + PRIORITY point_priority(EDGEPT *point); + void add_point_to_list(POINT_GROUP point_list, EDGEPT *point); + int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3); + int is_little_chunk(EDGEPT *point1, EDGEPT *point2); + int is_small_area(EDGEPT *point1, EDGEPT *point2); + EDGEPT *pick_close_point(EDGEPT *critical_point, + EDGEPT *vertical_point, + int *best_dist); + void prioritize_points(TESSLINE *outline, POINT_GROUP points); + void new_min_point(EDGEPT *local_min, POINT_GROUP points); + void new_max_point(EDGEPT *local_max, POINT_GROUP points); + void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, + EDGEPT** best_point); + + // chopper.cpp + SEAM *attempt_blob_chop(TWERD *word, inT32 blob_number, bool italic_blob, + SEAMS seam_list); bool improve_one_blob(TWERD *word, BLOB_CHOICE_LIST_VECTOR *char_choices, - int fx, inT32 *blob_number, SEAMS *seam_list, DANGERR *fixpt, @@ -114,57 +319,81 @@ class Wordrec : public Classify { inT32 *blob_number, SEAMS *seam_list, int *right_chop_index); - BLOB_CHOICE_LIST_VECTOR *chop_word_main(register TWERD *word, - int fx, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, - BOOL8 tester, - BOOL8 trainer); - void improve_by_chopping(register TWERD *word, + BLOB_CHOICE_LIST_VECTOR *chop_word_main(WERD_RES *word); + void improve_by_chopping(WERD_RES *word, BLOB_CHOICE_LIST_VECTOR *char_choices, - int fx, STATE *best_state, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, - SEAMS *seam_list, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, - STATE *chop_states, - inT32 *state_count); - MATRIX *word_associator(TBLOB *blobs, - SEAMS seams, + bool *updated_best_choice); + MATRIX *word_associator(WERD_RES *word, STATE *state, - int fxid, - WERD_CHOICE *best_choice, - WERD_CHOICE *raw_choice, - char *correct, + BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, STATE *best_state); inT16 select_blob_to_split(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_ceiling, bool split_next_to_fragment); - /* mfvars.cpp **************************************************************/ - void mfeature_init(); - /* pieces.cpp **************************************************************/ - BLOB_CHOICE_LIST *classify_piece(TBLOB *pieces, - SEAMS seams, - inT16 start, - inT16 end); + + // findseam.cpp + void junk_worst_seam(SEAM_QUEUE seams, SEAM *new_seam, float new_priority); + void choose_best_seam(SEAM_QUEUE seam_queue, + SEAM_PILE *seam_pile, + SPLIT *split, + PRIORITY priority, + SEAM **seam_result, + TBLOB *blob); + void combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, SEAM *seam); + inT16 constrained_split(SPLIT *split, TBLOB *blob); + void delete_seam_pile(SEAM_PILE seam_pile); + SEAM *pick_good_seam(TBLOB *blob); + PRIORITY seam_priority(SEAM *seam, inT16 xmin, inT16 xmax); + void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS], + inT16 num_points, + SEAM_QUEUE seam_queue, + SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob); + void try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], + inT16 num_points, + SEAM_QUEUE seam_queue, + SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob); + + // gradechop.cpp + PRIORITY full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax); + PRIORITY grade_center_of_blob(register BOUNDS_RECT rect); + PRIORITY grade_overlap(register BOUNDS_RECT rect); + PRIORITY grade_split_length(register SPLIT *split); + PRIORITY grade_sharpness(register SPLIT *split); + PRIORITY grade_width_change(register BOUNDS_RECT rect); + void set_outline_bounds(register EDGEPT *point1, + register EDGEPT *point2, + BOUNDS_RECT rect); + + // outlines.cpp + int crosses_outline(EDGEPT *p0, EDGEPT *p1, EDGEPT *outline); + int is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1); + int is_same_edgept(EDGEPT *p1, EDGEPT *p2); + EDGEPT *near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1); + void reverse_outline(EDGEPT *outline); + + // pieces.cpp + virtual BLOB_CHOICE_LIST *classify_piece(TBLOB *pieces, + SEAMS seams, + inT16 start, + inT16 end); BLOB_CHOICE_LIST *get_piece_rating(MATRIX *ratings, TBLOB *blobs, SEAMS seams, inT16 start, inT16 end); - /* djmenus.cpp **************************************************************/ - // Prints out statistics gathered. - void dj_statistics(FILE *File) { - PrintAdaptiveStatistics(File); - PrintBadWords(File); - } - // Does clean up (should be called at the end of the program). - void dj_cleanup() { EndAdaptiveClassifier(); } - + BOUNDS_LIST record_blob_bounds(TBLOB *blobs); + MATRIX *record_piece_ratings(TBLOB *blobs); - /* heuristic.cpp ************************************************************/ + // heuristic.cpp + WIDTH_RECORD* state_char_widths(WIDTH_RECORD *chunk_widths, + STATE *state, + int num_joints); + FLOAT32 get_width_variance(WIDTH_RECORD *wrec, float norm_height); + FLOAT32 get_gap_variance(WIDTH_RECORD *wrec, float norm_height); FLOAT32 prioritize_state(CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search); FLOAT32 width_priority(CHUNKS_RECORD *chunks_record, @@ -177,46 +406,58 @@ class Wordrec : public Classify { STATE *state, int num_joints); - /* member variables *********************************************************/ - /* tface.cpp ****************************************************************/ - POLY_MATCHER tess_matcher;//current matcher - POLY_TESTER tess_tester; //current tester - POLY_TESTER tess_trainer; //current trainer - DENORM *tess_denorm; //current denorm - WERD *tess_word; //current word - int dict_word(const WERD_CHOICE &word); -}; + // Member variables. + LanguageModel *language_model_; + PRIORITY pass2_ok_split; + int pass2_seg_states; + int num_joints; + int num_pushed; + int num_popped; + TALLY states_before_best; + TALLY best_certainties[2]; + TALLY character_widths; /* Width histogram */ + BlobMatchTable blob_match_table; + EVALUATION_ARRAY last_segmentation; + // Stores the best choice for the previous word in the paragraph. + // This variable is modified by PAGE_RES_IT when iterating over + // words to OCR on the page. + WERD_CHOICE *prev_word_best_choice_; - -/* ccmain/tstruct.cpp *********************************************************/ -class FRAGMENT:public ELIST_LINK -{ - public: - FRAGMENT() { //constructor - } - FRAGMENT(EDGEPT *head_pt, //start - EDGEPT *tail_pt); //end - - ICOORD head; //coords of start - ICOORD tail; //coords of end - EDGEPT *headpt; //start point - EDGEPT *tailpt; //end point - - NEWDELETE2 (FRAGMENT) + protected: + // Updates the language model state recorded for the child entries specified + // in pending[starting_col]. Enqueues the children of the updated entries + // into pending and proceedes to update (and remove from pending) all the + // remaining entries in pending[col] (col >= starting_col). Upon termination + // of this function all the pending[col] lists will be empty. + // + // The arguments: + // + // starting_col: index of the column in chunks_record->ratings from + // which the update should be started + // + // pending: list of entries listing chunks_record->ratings entries + // that should be updated + // + // pain_points: priority heap listing the pain points generated by + // the language model + // + // temp_pain_points: temporary storage for tentative pain points generated + // by the language model after a single call to LanguageModel::UpdateState() + // (the agrument is passed in rather than created before each + // LanguageModel::UpdateState() call to avoid dynamic memory re-allocation) + // + // best_choice_bundle: a collection of variables that should be updated + // if a new best choice is found + // + void UpdateSegSearchNodes(int starting_col, + SEG_SEARCH_PENDING_LIST *pending[], + BestPathByColumn *best_path_by_column[], + CHUNKS_RECORD *chunks_record, + HEAP *pain_points, + BestChoiceBundle *best_choice_bundle); }; -ELISTIZEH (FRAGMENT) -PBLOB *make_ed_blob( //construct blob - TBLOB *tessblob //blob to convert - ); -OUTLINE *make_ed_outline( //constructoutline - FRAGMENT_LIST *list //list of fragments - ); -void register_outline( //add fragments - TESSLINE *outline, //tess format - FRAGMENT_LIST *list //list to add to - ); } // namespace tesseract