Skip to content

Commit

Permalink
add test-tesseract-cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
fancyerii committed Mar 12, 2019
1 parent 6f298b1 commit 313c59a
Show file tree
Hide file tree
Showing 10 changed files with 232 additions and 0 deletions.
2 changes: 2 additions & 0 deletions test-tesseract-cpp/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
nbproject
build
20 changes: 20 additions & 0 deletions test-tesseract-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
cmake_minimum_required (VERSION 3.1)
set (CMAKE_CXX_STANDARD 11)
project (test-tesseract)
add_executable(Basic basic.cpp)
target_link_libraries (Basic lept tesseract)

add_executable(GetComponent getcomp.cpp)
target_link_libraries (GetComponent lept tesseract)

add_executable(WordIter worditer.cpp)
target_link_libraries (WordIter lept tesseract)

add_executable(BasicCn basiccn.cpp)
target_link_libraries (BasicCn lept tesseract)

add_executable(Osd osd.cpp)
target_link_libraries (Osd lept tesseract)

add_executable(ClassIter clsiter.cpp)
target_link_libraries (ClassIter lept tesseract)
30 changes: 30 additions & 0 deletions test-tesseract-cpp/basic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
char *outText;

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}

// Open input image with leptonica library
Pix *image = pixRead("../test.png");
api->SetImage(image);
// Get OCR result
outText = api->GetUTF8Text();
printf("OCR output:\n%s", outText);

// Destroy used object and release memory
api->End();
delete [] outText;
pixDestroy(&image);

return 0;
}


30 changes: 30 additions & 0 deletions test-tesseract-cpp/basiccn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
char *outText;

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "chi_sim")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}

// Open input image with leptonica library
Pix *image = pixRead("../testcn.png");
api->SetImage(image);
// Get OCR result
outText = api->GetUTF8Text();
printf("OCR output:\n%s", outText);

// Destroy used object and release memory
api->End();
delete [] outText;
pixDestroy(&image);

return 0;
}


49 changes: 49 additions & 0 deletions test-tesseract-cpp/clsiter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main() {

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}

// Open input image with leptonica library
Pix *image = pixRead("../test.png");
api->SetImage(image);
api->SetVariable("save_blob_choices", "T");
//api->SetRectangle(37, 228, 548, 31);
api->Recognize(NULL);

tesseract::ResultIterator* ri = api->GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;
if (ri != 0) {
do {
const char* symbol = ri->GetUTF8Text(level);
float conf = ri->Confidence(level);
if (symbol != 0) {
printf("symbol %s, conf: %f", symbol, conf);
bool indent = false;
tesseract::ChoiceIterator ci(*ri);
do {
if (indent) printf("\t\t ");
printf("\t- ");
const char* choice = ci.GetUTF8Text();
printf("%s conf: %f\n", choice, ci.Confidence());
indent = true;
} while (ci.Next());
}
printf("---------------------------------------------\n");
delete[] symbol;
} while ((ri->Next(level)));
}
// Destroy used object and release memory
api->End();
pixDestroy(&image);

return 0;
}


36 changes: 36 additions & 0 deletions test-tesseract-cpp/getcomp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main() {

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}

// Open input image with leptonica library
Pix *image = pixRead("../test.png");
api->SetImage(image);

Boxa* boxes = api->GetComponentImages(tesseract::RIL_TEXTLINE, true, NULL, NULL);
printf("Found %d textline image components.\n", boxes->n);
for (int i = 0; i < boxes->n; i++) {
BOX* box = boxaGetBox(boxes, i, L_CLONE);
api->SetRectangle(box->x, box->y, box->w, box->h);
char* ocrResult = api->GetUTF8Text();
int conf = api->MeanTextConf();
fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",
i, box->x, box->y, box->w, box->h, conf, ocrResult);
}


// Destroy used object and release memory
api->End();
pixDestroy(&image);

return 0;
}


29 changes: 29 additions & 0 deletions test-tesseract-cpp/osd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main() {
tesseract::Orientation orientation;
tesseract::WritingDirection direction;
tesseract::TextlineOrder order;
float deskew_angle;

PIX *image = pixRead("../test.png");
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
api->Init(NULL, "eng");
api->SetPageSegMode(tesseract::PSM_AUTO_OSD);
api->SetImage(image);
api->Recognize(0);

tesseract::PageIterator* it = api->AnalyseLayout();
it->Orientation(&orientation, &direction, &order, &deskew_angle);
printf("Orientation: %d;\nWritingDirection: %d\nTextlineOrder: %d\n" \
"Deskew angle: %.4f\n",
orientation, direction, order, deskew_angle);
api->End();
pixDestroy(&image);


return 0;
}


Binary file added test-tesseract-cpp/test.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test-tesseract-cpp/testcn.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
36 changes: 36 additions & 0 deletions test-tesseract-cpp/worditer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main() {
char *outText;

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}

// Open input image with leptonica library
Pix *image = pixRead("../test.png");
api->SetImage(image);
api->Recognize(0);
tesseract::ResultIterator* ri = api->GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
if (ri != 0) {
do {
const char* word = ri->GetUTF8Text(level);
float conf = ri->Confidence(level);
int x1, y1, x2, y2;
ri->BoundingBox(level, &x1, &y1, &x2, &y2);
printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
word, conf, x1, y1, x2, y2);
delete[] word;
} while (ri->Next(level));
}
api->End();
pixDestroy(&image);
return 0;
}


0 comments on commit 313c59a

Please sign in to comment.