tesseract  4.00.00dev
Advanced API

Functions

void tesseract::TessBaseAPI::SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void tesseract::TessBaseAPI::SetImage (Pix *pix)
 
void tesseract::TessBaseAPI::SetSourceResolution (int ppi)
 
void tesseract::TessBaseAPI::SetRectangle (int left, int top, int width, int height)
 
void tesseract::TessBaseAPI::SetThresholder (ImageThresholder *thresholder)
 
Pix * tesseract::TessBaseAPI::GetThresholdedImage ()
 
Boxa * tesseract::TessBaseAPI::GetRegions (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetStrips (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetWords (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetConnectedComponents (Pixa **cc)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor () const
 
void tesseract::TessBaseAPI::DumpPGM (const char *filename)
 
PageIteratortesseract::TessBaseAPI::AnalyseLayout ()
 
PageIteratortesseract::TessBaseAPI::AnalyseLayout (bool merge_similar_words)
 
int tesseract::TessBaseAPI::Recognize (ETEXT_DESC *monitor)
 
int tesseract::TessBaseAPI::RecognizeForChopTest (ETEXT_DESC *monitor)
 
bool tesseract::TessBaseAPI::ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIteratortesseract::TessBaseAPI::GetIterator ()
 
MutableIteratortesseract::TessBaseAPI::GetMutableIterator ()
 
char * tesseract::TessBaseAPI::GetUTF8Text ()
 
char * tesseract::TessBaseAPI::GetHOCRText (ETEXT_DESC *monitor, int page_number)
 
char * tesseract::TessBaseAPI::GetHOCRText (int page_number)
 
char * tesseract::TessBaseAPI::GetTSVText (int page_number)
 
char * tesseract::TessBaseAPI::GetBoxText (int page_number)
 
char * tesseract::TessBaseAPI::GetUNLVText ()
 
bool tesseract::TessBaseAPI::DetectOrientationScript (int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
 
char * tesseract::TessBaseAPI::GetOsdText (int page_number)
 
int tesseract::TessBaseAPI::MeanTextConf ()
 
inttesseract::TessBaseAPI::AllWordConfidences ()
 
bool tesseract::TessBaseAPI::AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void tesseract::TessBaseAPI::Clear ()
 
void tesseract::TessBaseAPI::End ()
 
static void tesseract::TessBaseAPI::ClearPersistentCache ()
 
int tesseract::TessBaseAPI::IsValidWord (const char *word)
 
bool tesseract::TessBaseAPI::IsValidCharacter (const char *utf8_character)
 
bool tesseract::TessBaseAPI::GetTextDirection (int *out_offset, float *out_slope)
 
void tesseract::TessBaseAPI::SetDictFunc (DictFunc f)
 
void tesseract::TessBaseAPI::SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
void tesseract::TessBaseAPI::SetFillLatticeFunc (FillLatticeFunc f)
 
bool tesseract::TessBaseAPI::DetectOS (OSResults *)
 
void tesseract::TessBaseAPI::GetFeaturesForBlob (TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
 
static ROWtesseract::TessBaseAPI::FindRowForBox (BLOCK_LIST *blocks, int left, int top, int right, int bottom)
 
void tesseract::TessBaseAPI::RunAdaptiveClassifier (TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
 
const char * tesseract::TessBaseAPI::GetUnichar (int unichar_id)
 
const Dawgtesseract::TessBaseAPI::GetDawg (int i) const
 
int tesseract::TessBaseAPI::NumDawgs () const
 
static ROWtesseract::TessBaseAPI::MakeTessOCRRow (float baseline, float xheight, float descender, float ascender)
 
static TBLOBtesseract::TessBaseAPI::MakeTBLOB (Pix *pix)
 
static void tesseract::TessBaseAPI::NormalizeTBLOB (TBLOB *tblob, ROW *row, bool numeric_mode)
 
Tesseracttesseract::TessBaseAPI::tesseract () const
 
OcrEngineMode tesseract::TessBaseAPI::oem () const
 
void tesseract::TessBaseAPI::InitTruthCallback (TruthCallback *cb)
 
void tesseract::TessBaseAPI::set_min_orientation_margin (double margin)
 
void tesseract::TessBaseAPI::GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ()
 
static void tesseract::TessBaseAPI::DeleteBlockList (BLOCK_LIST *block_list)
 

Detailed Description

The following methods break TesseractRect into pieces, so you can get hold of the thresholded image, get the text in different formats, get bounding boxes, confidences etc.

Function Documentation

◆ AdaptToWordStr()

bool tesseract::TessBaseAPI::AdaptToWordStr ( PageSegMode  mode,
const char *  wordstr 
)

Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved. Returns false if adaption was not possible for some reason.

Definition at line 1979 of file baseapi.cpp.

1979  {
1980  int debug = 0;
1981  GetIntVariable("applybox_debug", &debug);
1982  bool success = true;
1983  PageSegMode current_psm = GetPageSegMode();
1985  SetVariable("classify_enable_learning", "0");
1986  const std::unique_ptr<const char[]> text(GetUTF8Text());
1987  if (debug) {
1988  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
1989  }
1990  if (text != NULL) {
1991  PAGE_RES_IT it(page_res_);
1992  WERD_RES* word_res = it.word();
1993  if (word_res != NULL) {
1994  word_res->word->set_text(wordstr);
1995  } else {
1996  success = false;
1997  }
1998  // Check to see if text matches wordstr.
1999  int w = 0;
2000  int t = 0;
2001  for (t = 0; text[t] != '\0'; ++t) {
2002  if (text[t] == '\n' || text[t] == ' ')
2003  continue;
2004  while (wordstr[w] == ' ') ++w;
2005  if (text[t] != wordstr[w])
2006  break;
2007  ++w;
2008  }
2009  if (text[t] != '\0' || wordstr[w] != '\0') {
2010  // No match.
2011  delete page_res_;
2012  GenericVector<TBOX> boxes;
2016  PAGE_RES_IT pr_it(page_res_);
2017  if (pr_it.word() == NULL)
2018  success = false;
2019  else
2020  word_res = pr_it.word();
2021  } else {
2022  word_res->BestChoiceToCorrectText();
2023  }
2024  if (success) {
2025  tesseract_->EnableLearning = true;
2026  tesseract_->LearnWord(NULL, word_res);
2027  }
2028  } else {
2029  success = false;
2030  }
2031  SetPageSegMode(current_psm);
2032  return success;
2033 }
void ReSegmentByClassification(PAGE_RES *page_res)
Definition: applybox.cpp:509
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:218
#define tprintf(...)
Definition: tprintf.h:31
void BestChoiceToCorrectText()
Definition: pageres.cpp:918
void set_text(const char *new_text)
Definition: werd.h:126
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
void TidyUp(PAGE_RES *page_res)
Definition: applybox.cpp:706
const char int mode
Definition: ioapi.h:38
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:482
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:489
WERD * word
Definition: pageres.h:175
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:217
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:230
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:244

◆ AllWordConfidences()

int * tesseract::TessBaseAPI::AllWordConfidences ( )

Returns all word confidences (between 0 and 100) in an array, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space- delimited words in GetUTF8Text.

Returns an array of all word confidences, terminated by -1.

Definition at line 1945 of file baseapi.cpp.

1945  {
1946  if (tesseract_ == NULL ||
1947  (!recognition_done_ && Recognize(NULL) < 0))
1948  return NULL;
1949  int n_word = 0;
1950  PAGE_RES_IT res_it(page_res_);
1951  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward())
1952  n_word++;
1953 
1954  int* conf = new int[n_word+1];
1955  n_word = 0;
1956  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward()) {
1957  WERD_RES *word = res_it.word();
1958  WERD_CHOICE* choice = word->best_choice;
1959  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1960  // This is the eq for converting Tesseract confidence to 1..100
1961  if (w_conf < 0) w_conf = 0;
1962  if (w_conf > 100) w_conf = 100;
1963  conf[n_word++] = w_conf;
1964  }
1965  conf[n_word] = -1;
1966  return conf;
1967 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:807
float certainty() const
Definition: ratngs.h:328
WERD * word
Definition: pageres.h:175
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883

◆ AnalyseLayout() [1/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( )

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns NULL on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 787 of file baseapi.cpp.

787 { return AnalyseLayout(false); }
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:787

◆ AnalyseLayout() [2/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( bool  merge_similar_words)

Definition at line 789 of file baseapi.cpp.

789  {
790  if (FindLines() == 0) {
791  if (block_list_->empty())
792  return NULL; // The page was empty.
793  page_res_ = new PAGE_RES(merge_similar_words, block_list_, NULL);
794  DetectParagraphs(false);
795  return new PageIterator(
799  }
800  return NULL;
801 }
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2580
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2236
int GetScaledYResolution() const
Definition: thresholder.h:93

◆ Clear()

void tesseract::TessBaseAPI::Clear ( )

Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.

Definition at line 2041 of file baseapi.cpp.

2041  {
2042  if (thresholder_ != NULL)
2043  thresholder_->Clear();
2044  ClearResults();
2045  if (tesseract_ != NULL) SetInputImage(NULL);
2046 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:920
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:45

◆ ClearPersistentCache()

void tesseract::TessBaseAPI::ClearPersistentCache ( )
static

Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.

Definition at line 2090 of file baseapi.cpp.

2090  {
2092 }
void DeleteUnusedDawgs()
Definition: dawg_cache.h:43
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:198

◆ DeleteBlockList()

void tesseract::TessBaseAPI::DeleteBlockList ( BLOCK_LIST *  block_list)
static

Delete a block list. This is to keep BLOCK_LIST pointer opaque and let go of including the other headers.

Definition at line 2446 of file baseapi.cpp.

2446  {
2447  delete block_list;
2448 }

◆ DetectOrientationScript()

bool tesseract::TessBaseAPI::DetectOrientationScript ( int orient_deg,
float *  orient_conf,
const char **  script_name,
float *  script_conf 
)

Detect the orientation of the input image and apparent script (alphabet). orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) orient_conf is the confidence (15.0 is reasonably confident) script_name is an ASCII string, the name of the script, e.g. "Latin" script_conf is confidence level in the script Returns true on success and writes values to each parameter as an output

Definition at line 1873 of file baseapi.cpp.

1875  {
1876  OSResults osr;
1877 
1878  bool osd = DetectOS(&osr);
1879  if (!osd) {
1880  return false;
1881  }
1882 
1883  int orient_id = osr.best_result.orientation_id;
1884  int script_id = osr.get_best_script(orient_id);
1885  if (orient_conf) *orient_conf = osr.best_result.oconfidence;
1886  if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees
1887 
1888  if (script_name) {
1889  const char* script = osr.unicharset->get_script_from_script_id(script_id);
1890 
1891  *script_name = script;
1892  }
1893 
1894  if (script_conf) *script_conf = osr.best_result.sconfidence;
1895 
1896  return true;
1897 }
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:114
int orientation_id
Definition: osdetect.h:41
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:814
UNICHARSET * unicharset
Definition: osdetect.h:78
float oconfidence
Definition: osdetect.h:44
float sconfidence
Definition: osdetect.h:43
OSBestResult best_result
Definition: osdetect.h:79
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2355

◆ DetectOS()

bool tesseract::TessBaseAPI::DetectOS ( OSResults osr)

Estimates the Orientation And Script of the image.

Returns
true if the image was processed successfully.

Estimates the Orientation And Script of the image. Returns true if the image was processed successfully.

Definition at line 2355 of file baseapi.cpp.

2355  {
2356  if (tesseract_ == NULL)
2357  return false;
2358  ClearResults();
2359  if (tesseract_->pix_binary() == NULL &&
2361  return false;
2362  }
2363  if (input_file_ == NULL)
2364  input_file_ = new STRING(kInputFile);
2366 }
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878
const char * kInputFile
Definition: baseapi.cpp:96
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
Definition: strngs.h:45
virtual TESS_LOCAL bool Threshold(Pix **pix)
Definition: baseapi.cpp:2192
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:191

◆ DumpPGM()

void tesseract::TessBaseAPI::DumpPGM ( const char *  filename)

Dump the internal binary image to a PGM file.

Deprecated:
Use GetThresholdedImage and write the image using pixWrite instead if possible.

Dump the internal binary image to a PGM file.

Definition at line 754 of file baseapi.cpp.

754  {
755  if (tesseract_ == NULL)
756  return;
757  FILE *fp = fopen(filename, "wb");
758  Pix* pix = tesseract_->pix_binary();
759  int width = pixGetWidth(pix);
760  int height = pixGetHeight(pix);
761  l_uint32* data = pixGetData(pix);
762  fprintf(fp, "P5 %d %d 255\n", width, height);
763  for (int y = 0; y < height; ++y, data += pixGetWpl(pix)) {
764  for (int x = 0; x < width; ++x) {
765  uint8_t b = GET_DATA_BIT(data, x) ? 0 : 255;
766  fwrite(&b, 1, 1, fp);
767  }
768  }
769  fclose(fp);
770 }
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
const char * filename
Definition: ioapi.h:38

◆ End()

void tesseract::TessBaseAPI::End ( )

Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.

Definition at line 2054 of file baseapi.cpp.

2054  {
2055  Clear();
2056  delete thresholder_;
2057  thresholder_ = NULL;
2058  delete page_res_;
2059  page_res_ = NULL;
2060  delete block_list_;
2061  block_list_ = NULL;
2062  if (paragraph_models_ != NULL) {
2064  delete paragraph_models_;
2065  paragraph_models_ = NULL;
2066  }
2067  if (osd_tesseract_ == tesseract_)
2068  osd_tesseract_ = nullptr;
2069  delete tesseract_;
2070  tesseract_ = nullptr;
2071  delete osd_tesseract_;
2072  osd_tesseract_ = NULL;
2073  delete equ_detect_;
2074  equ_detect_ = NULL;
2075  delete input_file_;
2076  input_file_ = NULL;
2077  delete output_file_;
2078  output_file_ = NULL;
2079  delete datapath_;
2080  datapath_ = NULL;
2081  delete language_;
2082  language_ = NULL;
2083 }
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:879
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:872
STRING * language_
Last initialized language.
Definition: baseapi.h:881
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:871
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:880
void delete_data_pointers()
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:875

◆ FindLinesCreateBlockList()

BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ( )

Find lines from the image making the BLOCK_LIST.

Definition at line 2434 of file baseapi.cpp.

2434  {
2435  FindLines();
2436  BLOCK_LIST* result = block_list_;
2437  block_list_ = NULL;
2438  return result;
2439 }
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2236

◆ FindRowForBox()

ROW * tesseract::TessBaseAPI::FindRowForBox ( BLOCK_LIST *  blocks,
int  left,
int  top,
int  right,
int  bottom 
)
static

This method returns the row to which a box of specified dimensions would belong. If no good match is found, it returns NULL.

Definition at line 2746 of file baseapi.cpp.

2747  {
2748  TBOX box(left, bottom, right, top);
2749  BLOCK_IT b_it(blocks);
2750  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2751  BLOCK* block = b_it.data();
2752  if (!box.major_overlap(block->bounding_box()))
2753  continue;
2754  ROW_IT r_it(block->row_list());
2755  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2756  ROW* row = r_it.data();
2757  if (!box.major_overlap(row->bounding_box()))
2758  continue;
2759  WERD_IT w_it(row->word_list());
2760  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2761  WERD* word = w_it.data();
2762  if (box.major_overlap(word->bounding_box()))
2763  return row;
2764  }
2765  }
2766  }
2767  return NULL;
2768 }
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
WERD_LIST * word_list()
Definition: ocrrow.h:52
TBOX bounding_box() const
Definition: werd.cpp:160
TBOX bounding_box() const
Definition: ocrrow.h:85
Definition: rect.h:30
Definition: werd.h:60
Definition: ocrrow.h:32
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Definition: ocrblock.h:30

◆ GetBlockTextOrientations()

void tesseract::TessBaseAPI::GetBlockTextOrientations ( int **  block_orientation,
bool **  vertical_writing 
)

Return text orientation of each block as determined by an earlier run of layout analysis.

Return text orientation of each block as determined in an earlier page layout analysis operation. Orientation is returned as the number of ccw 90-degree rotations (in [0..3]) required to make the text in the block upright (readable). Note that this may not necessary be the block orientation preferred for recognition (such as the case of vertical CJK text).

Also returns whether the text in the block is believed to have vertical writing direction (when in an upright page orientation).

The returned array is of length equal to the number of text blocks, which may be less than the total number of blocks. The ordering is intended to be consistent with GetTextLines().

Definition at line 2386 of file baseapi.cpp.

2387  {
2388  delete[] *block_orientation;
2389  *block_orientation = NULL;
2390  delete[] *vertical_writing;
2391  *vertical_writing = NULL;
2392  BLOCK_IT block_it(block_list_);
2393 
2394  block_it.move_to_first();
2395  int num_blocks = 0;
2396  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2397  if (!block_it.data()->poly_block()->IsText()) {
2398  continue;
2399  }
2400  ++num_blocks;
2401  }
2402  if (!num_blocks) {
2403  tprintf("WARNING: Found no blocks\n");
2404  return;
2405  }
2406  *block_orientation = new int[num_blocks];
2407  *vertical_writing = new bool[num_blocks];
2408  block_it.move_to_first();
2409  int i = 0;
2410  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2411  block_it.forward()) {
2412  if (!block_it.data()->poly_block()->IsText()) {
2413  continue;
2414  }
2415  FCOORD re_rotation = block_it.data()->re_rotation();
2416  float re_theta = re_rotation.angle();
2417  FCOORD classify_rotation = block_it.data()->classify_rotation();
2418  float classify_theta = classify_rotation.angle();
2419  double rot_theta = - (re_theta - classify_theta) * 2.0 / PI;
2420  if (rot_theta < 0) rot_theta += 4;
2421  int num_rotations = static_cast<int>(rot_theta + 0.5);
2422  (*block_orientation)[i] = num_rotations;
2423  // The classify_rotation is non-zero only if the text has vertical
2424  // writing direction.
2425  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2426  ++i;
2427  }
2428 }
Definition: points.h:189
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
float angle() const
find angle
Definition: points.h:249
#define tprintf(...)
Definition: tprintf.h:31
#define PI
Definition: const.h:19
float y() const
Definition: points.h:212

◆ GetBoxText()

char * tesseract::TessBaseAPI::GetBoxText ( int  page_number)

The recognized text is returned as a char* which is coded in the same format as a box file used in training. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

The recognized text is returned as a char* which is coded as a UTF8 box file. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 1709 of file baseapi.cpp.

1709  {
1710  if (tesseract_ == NULL ||
1711  (!recognition_done_ && Recognize(NULL) < 0))
1712  return NULL;
1713  int blob_count;
1714  int utf8_length = TextLength(&blob_count);
1715  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1717  char* result = new char[total_length];
1718  result[0] = '\0';
1719  int output_length = 0;
1720  LTRResultIterator* it = GetLTRIterator();
1721  do {
1722  int left, top, right, bottom;
1723  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1724  const std::unique_ptr</*non-const*/ char[]> text(it->GetUTF8Text(RIL_SYMBOL));
1725  // Tesseract uses space for recognition failure. Fix to a reject
1726  // character, kTesseractReject so we don't create illegal box files.
1727  for (int i = 0; text[i] != '\0'; ++i) {
1728  if (text[i] == ' ')
1729  text[i] = kTesseractReject;
1730  }
1731  snprintf(result + output_length, total_length - output_length,
1732  "%s %d %d %d %d %d\n",
1733  text.get(), left, image_height_ - bottom,
1734  right, image_height_ - top, page_number);
1735  output_length += strlen(result + output_length);
1736  // Just in case...
1737  if (output_length + kMaxBytesPerLine > total_length)
1738  break;
1739  }
1740  } while (it->Next(RIL_SYMBOL));
1741  delete it;
1742  return result;
1743 }
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1219
const int kMaxBytesPerLine
Definition: baseapi.cpp:1700
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1691
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:807
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2325
const char kTesseractReject
Definition: baseapi.cpp:87
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883

◆ GetComponentImages() [1/2]

Boxa * tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If blockids is not NULL, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned.

Definition at line 664 of file baseapi.cpp.

668  {
669  PageIterator* page_it = GetIterator();
670  if (page_it == NULL)
671  page_it = AnalyseLayout();
672  if (page_it == NULL)
673  return NULL; // Failed.
674 
675  // Count the components to get a size for the arrays.
676  int component_count = 0;
677  int left, top, right, bottom;
678 
679  TessResultCallback<bool>* get_bbox = NULL;
680  if (raw_image) {
681  // Get bounding box in original raw image with padding.
683  level, raw_padding,
684  &left, &top, &right, &bottom);
685  } else {
686  // Get bounding box from binarized imaged. Note that this could be
687  // differently scaled from the original image.
688  get_bbox = NewPermanentTessCallback(page_it,
690  level, &left, &top, &right, &bottom);
691  }
692  do {
693  if (get_bbox->Run() &&
694  (!text_only || PTIsTextType(page_it->BlockType())))
695  ++component_count;
696  } while (page_it->Next(level));
697 
698  Boxa* boxa = boxaCreate(component_count);
699  if (pixa != NULL)
700  *pixa = pixaCreate(component_count);
701  if (blockids != NULL)
702  *blockids = new int[component_count];
703  if (paraids != NULL)
704  *paraids = new int[component_count];
705 
706  int blockid = 0;
707  int paraid = 0;
708  int component_index = 0;
709  page_it->Begin();
710  do {
711  if (get_bbox->Run() &&
712  (!text_only || PTIsTextType(page_it->BlockType()))) {
713  Box* lbox = boxCreate(left, top, right - left, bottom - top);
714  boxaAddBox(boxa, lbox, L_INSERT);
715  if (pixa != NULL) {
716  Pix* pix = NULL;
717  if (raw_image) {
718  pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
719  &top);
720  } else {
721  pix = page_it->GetBinaryImage(level);
722  }
723  pixaAddPix(*pixa, pix, L_INSERT);
724  pixaAddBox(*pixa, lbox, L_CLONE);
725  }
726  if (paraids != NULL) {
727  (*paraids)[component_index] = paraid;
728  if (page_it->IsAtFinalElement(RIL_PARA, level))
729  ++paraid;
730  }
731  if (blockids != NULL) {
732  (*blockids)[component_index] = blockid;
733  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
734  ++blockid;
735  paraid = 0;
736  }
737  }
738  ++component_index;
739  }
740  } while (page_it->Next(level));
741  delete page_it;
742  delete get_bbox;
743  return boxa;
744 }
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:787
ResultIterator * GetIterator()
Definition: baseapi.cpp:1236
virtual R Run()=0
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:70
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const

◆ GetComponentImages() [2/2]

Boxa* tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 465 of file baseapi.h.

467  {
468  return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
469  }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:664

◆ GetConnectedComponents()

Boxa * tesseract::TessBaseAPI::GetConnectedComponents ( Pixa **  pixa)

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. Note: the caller is responsible for calling boxaDestroy() on the returned Boxa array and pixaDestroy() on cc array.

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 652 of file baseapi.cpp.

652  {
653  return GetComponentImages(RIL_SYMBOL, true, pixa, NULL);
654 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:664

◆ GetDawg()

const Dawg * tesseract::TessBaseAPI::GetDawg ( int  i) const

Return the pointer to the i-th dawg loaded into tesseract_ object.

Definition at line 2799 of file baseapi.cpp.

2799  {
2800  if (tesseract_ == NULL || i >= NumDawgs()) return NULL;
2801  return tesseract_->getDict().GetDawg(i);
2802 }
Dict & getDict()
Definition: classify.h:65
int NumDawgs() const
Definition: baseapi.cpp:2805
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:414

◆ GetFeaturesForBlob()

void tesseract::TessBaseAPI::GetFeaturesForBlob ( TBLOB blob,
INT_FEATURE_STRUCT int_features,
int num_features,
int feature_outline_index 
)

This method returns the features associated with the input image.

This method returns the features associated with the input blob.

Definition at line 2718 of file baseapi.cpp.

2721  {
2722  GenericVector<int> outline_counts;
2725  INT_FX_RESULT_STRUCT fx_info;
2726  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2727  &cn_features, &fx_info, &outline_counts);
2728  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
2729  *num_features = 0;
2730  return; // Feature extraction failed.
2731  }
2732  *num_features = cn_features.size();
2733  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2734  // TODO(rays) Pass outline_counts back and simplify the calling code.
2735  if (feature_outline_index != NULL) {
2736  int f = 0;
2737  for (int i = 0; i < outline_counts.size(); ++i) {
2738  while (f < outline_counts[i])
2739  feature_outline_index[f++] = i;
2740  }
2741  }
2742 }
bool empty() const
Definition: genericvector.h:90
int size() const
Definition: genericvector.h:72
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:445
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132

◆ GetHOCRText() [1/2]

char * tesseract::TessBaseAPI::GetHOCRText ( ETEXT_DESC monitor,
int  page_number 
)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. monitor can be used to cancel the recognition receive progress callbacks Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 1411 of file baseapi.cpp.

1411  {
1412  if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0))
1413  return NULL;
1414 
1415  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1416  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
1417  bool para_is_ltr = true; // Default direction is LTR
1418  const char* paragraph_lang = NULL;
1419  bool font_info = false;
1420  GetBoolVariable("hocr_font_info", &font_info);
1421 
1422  STRING hocr_str("");
1423 
1424  if (input_file_ == NULL)
1425  SetInputName(NULL);
1426 
1427 #ifdef _WIN32
1428  // convert input name from ANSI encoding to utf-8
1429  int str16_len =
1430  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, 0);
1431  wchar_t *uni16_str = new WCHAR[str16_len];
1432  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1433  uni16_str, str16_len);
1434  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, 0,
1435  NULL, NULL);
1436  char *utf8_str = new char[utf8_len];
1437  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
1438  utf8_len, NULL, NULL);
1439  *input_file_ = utf8_str;
1440  delete[] uni16_str;
1441  delete[] utf8_str;
1442 #endif
1443 
1444  hocr_str += " <div class='ocr_page'";
1445  AddIdTohOCR(&hocr_str, "page", page_id, -1);
1446  hocr_str += " title='image \"";
1447  if (input_file_) {
1448  hocr_str += HOcrEscape(input_file_->string());
1449  } else {
1450  hocr_str += "unknown";
1451  }
1452  hocr_str.add_str_int("\"; bbox ", rect_left_);
1453  hocr_str.add_str_int(" ", rect_top_);
1454  hocr_str.add_str_int(" ", rect_width_);
1455  hocr_str.add_str_int(" ", rect_height_);
1456  hocr_str.add_str_int("; ppageno ", page_number);
1457  hocr_str += "'>\n";
1458 
1459  ResultIterator *res_it = GetIterator();
1460  while (!res_it->Empty(RIL_BLOCK)) {
1461  if (res_it->Empty(RIL_WORD)) {
1462  res_it->Next(RIL_WORD);
1463  continue;
1464  }
1465 
1466  // Open any new block/paragraph/textline.
1467  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1468  para_is_ltr = true; // reset to default direction
1469  hocr_str += " <div class='ocr_carea'";
1470  AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
1471  AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
1472  }
1473  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1474  hocr_str += "\n <p class='ocr_par'";
1475  para_is_ltr = res_it->ParagraphIsLtr();
1476  if (!para_is_ltr) {
1477  hocr_str += " dir='rtl'";
1478  }
1479  AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
1480  paragraph_lang = res_it->WordRecognitionLanguage();
1481  if (paragraph_lang) {
1482  hocr_str += " lang='";
1483  hocr_str += paragraph_lang;
1484  hocr_str += "'";
1485  }
1486  AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
1487  }
1488  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1489  hocr_str += "\n <span class='ocr_line'";
1490  AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
1491  AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
1492  }
1493 
1494  // Now, process the word...
1495  hocr_str += "<span class='ocrx_word'";
1496  AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
1497  int left, top, right, bottom;
1498  bool bold, italic, underlined, monospace, serif, smallcaps;
1499  int pointsize, font_id;
1500  const char *font_name;
1501  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1502  font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
1503  &monospace, &serif, &smallcaps,
1504  &pointsize, &font_id);
1505  hocr_str.add_str_int(" title='bbox ", left);
1506  hocr_str.add_str_int(" ", top);
1507  hocr_str.add_str_int(" ", right);
1508  hocr_str.add_str_int(" ", bottom);
1509  hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
1510  if (font_info) {
1511  if (font_name) {
1512  hocr_str += "; x_font ";
1513  hocr_str += HOcrEscape(font_name);
1514  }
1515  hocr_str.add_str_int("; x_fsize ", pointsize);
1516  }
1517  hocr_str += "'";
1518  const char* lang = res_it->WordRecognitionLanguage();
1519  if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
1520  hocr_str += " lang='";
1521  hocr_str += lang;
1522  hocr_str += "'";
1523  }
1524  switch (res_it->WordDirection()) {
1525  // Only emit direction if different from current paragraph direction
1526  case DIR_LEFT_TO_RIGHT:
1527  if (!para_is_ltr) hocr_str += " dir='ltr'";
1528  break;
1529  case DIR_RIGHT_TO_LEFT:
1530  if (para_is_ltr) hocr_str += " dir='rtl'";
1531  break;
1532  case DIR_MIX:
1533  case DIR_NEUTRAL:
1534  default: // Do nothing.
1535  break;
1536  }
1537  hocr_str += ">";
1538  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
1539  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
1540  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
1541  if (bold) hocr_str += "<strong>";
1542  if (italic) hocr_str += "<em>";
1543  do {
1544  const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
1545  if (grapheme && grapheme[0] != 0) {
1546  hocr_str += HOcrEscape(grapheme.get());
1547  }
1548  res_it->Next(RIL_SYMBOL);
1549  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1550  if (italic) hocr_str += "</em>";
1551  if (bold) hocr_str += "</strong>";
1552  hocr_str += "</span> ";
1553  wcnt++;
1554  // Close any ending block/paragraph/textline.
1555  if (last_word_in_line) {
1556  hocr_str += "\n </span>";
1557  lcnt++;
1558  }
1559  if (last_word_in_para) {
1560  hocr_str += "\n </p>\n";
1561  pcnt++;
1562  para_is_ltr = true; // back to default direction
1563  }
1564  if (last_word_in_block) {
1565  hocr_str += " </div>\n";
1566  bcnt++;
1567  }
1568  }
1569  hocr_str += " </div>\n";
1570 
1571  char *ret = new char[hocr_str.length() + 1];
1572  strcpy(ret, hocr_str.string());
1573  delete res_it;
1574  return ret;
1575 }
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878
void add_str_int(const char *str, int number)
Definition: strngs.cpp:381
const char * string() const
Definition: strngs.cpp:198
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:238
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:807
Definition: strngs.h:45
ResultIterator * GetIterator()
Definition: baseapi.cpp:1236
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2810
void SetInputName(const char *name)
Definition: baseapi.cpp:203

◆ GetHOCRText() [2/2]

char * tesseract::TessBaseAPI::GetHOCRText ( int  page_number)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 1398 of file baseapi.cpp.

1398  {
1399  return GetHOCRText(NULL, page_number);
1400 }
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
Definition: baseapi.cpp:1411

◆ GetIterator()

ResultIterator * tesseract::TessBaseAPI::GetIterator ( )

Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1236 of file baseapi.cpp.

1236  {
1237  if (tesseract_ == NULL || page_res_ == NULL)
1238  return NULL;
1239  return ResultIterator::StartOfParagraph(LTRResultIterator(
1243 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
int GetScaledYResolution() const
Definition: thresholder.h:93

◆ GetMutableIterator()

MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ( )

Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1253 of file baseapi.cpp.

1253  {
1254  if (tesseract_ == NULL || page_res_ == NULL)
1255  return NULL;
1256  return new MutableIterator(page_res_, tesseract_,
1260 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int GetScaledYResolution() const
Definition: thresholder.h:93

◆ GetOsdText()

char * tesseract::TessBaseAPI::GetOsdText ( int  page_number)

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator. page_number is a 0-based page index that will appear in the osd file.

Definition at line 1904 of file baseapi.cpp.

1904  {
1905  int orient_deg;
1906  float orient_conf;
1907  const char* script_name;
1908  float script_conf;
1909 
1910  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
1911  &script_conf))
1912  return NULL;
1913 
1914  // clockwise rotation needed to make the page upright
1915  int rotate = OrientationIdToValue(orient_deg / 90);
1916 
1917  const int kOsdBufsize = 255;
1918  char* osd_buf = new char[kOsdBufsize];
1919  snprintf(osd_buf, kOsdBufsize,
1920  "Page number: %d\n"
1921  "Orientation in degrees: %d\n"
1922  "Rotate: %d\n"
1923  "Orientation confidence: %.2f\n"
1924  "Script: %s\n"
1925  "Script confidence: %.2f\n",
1926  page_number, orient_deg, rotate, orient_conf, script_name,
1927  script_conf);
1928 
1929  return osd_buf;
1930 }
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
Definition: baseapi.cpp:1873
int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:565

◆ GetRegions()

Boxa * tesseract::TessBaseAPI::GetRegions ( Pixa **  pixa)

Get the result of page layout analysis as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 607 of file baseapi.cpp.

607  {
608  return GetComponentImages(RIL_BLOCK, false, pixa, NULL);
609 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:664

◆ GetStrips()

Boxa * tesseract::TessBaseAPI::GetStrips ( Pixa **  pixa,
int **  blockids 
)

Get textlines and strips of image regions as a leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use.

Definition at line 633 of file baseapi.cpp.

633  {
634  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
635 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:664

◆ GetTextDirection()

bool tesseract::TessBaseAPI::GetTextDirection ( int out_offset,
float *  out_slope 
)

Definition at line 2109 of file baseapi.cpp.

2109  {
2110  PageIterator* it = AnalyseLayout();
2111  if (it == NULL) {
2112  return false;
2113  }
2114  int x1, x2, y1, y2;
2115  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
2116  // Calculate offset and slope (NOTE: Kind of ugly)
2117  if (x2 <= x1) x2 = x1 + 1;
2118  // Convert the point pair to slope/offset of the baseline (in image coords.)
2119  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
2120  *out_offset = static_cast<int>(y1 - *out_slope * x1);
2121  // Get the y-coord of the baseline at the left and right edges of the
2122  // textline's bounding box.
2123  int left, top, right, bottom;
2124  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
2125  delete it;
2126  return false;
2127  }
2128  int left_y = IntCastRounded(*out_slope * left + *out_offset);
2129  int right_y = IntCastRounded(*out_slope * right + *out_offset);
2130  // Shift the baseline down so it passes through the nearest bottom-corner
2131  // of the textline's bounding box. This is the difference between the y
2132  // at the lowest (max) edge of the box and the actual box bottom.
2133  *out_offset += bottom - MAX(left_y, right_y);
2134  // Switch back to bottom-up tesseract coordinates. Requires negation of
2135  // the slope and height - offset for the offset.
2136  *out_slope = -*out_slope;
2137  *out_offset = rect_height_ - *out_offset;
2138  delete it;
2139 
2140  return true;
2141 }
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:787
int IntCastRounded(double x)
Definition: helpers.h:179
#define MAX(x, y)
Definition: ndminx.h:24

◆ GetTextlines() [1/2]

Boxa * tesseract::TessBaseAPI::GetTextlines ( const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If raw_image is true, then extract from the original image instead of the thresholded image and pad by raw_padding pixels. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Definition at line 619 of file baseapi.cpp.

620  {
621  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
622  pixa, blockids, paraids);
623 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:664

◆ GetTextlines() [2/2]

Boxa* tesseract::TessBaseAPI::GetTextlines ( Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 416 of file baseapi.h.

416  {
417  return GetTextlines(false, 0, pixa, blockids, NULL);
418  }
Boxa * GetTextlines(const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:619

◆ GetThresholdedImage()

Pix * tesseract::TessBaseAPI::GetThresholdedImage ( )

Get a copy of the internal thresholded image from Tesseract. Caller takes ownership of the Pix and must pixDestroy it. May be called any time after SetImage, or after TesseractRect.

ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.

Definition at line 593 of file baseapi.cpp.

593  {
594  if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
595  if (tesseract_->pix_binary() == nullptr &&
597  return nullptr;
598  }
599  return pixClone(tesseract_->pix_binary());
600 }
Pix * pix_binary() const
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
virtual TESS_LOCAL bool Threshold(Pix **pix)
Definition: baseapi.cpp:2192

◆ GetThresholdedImageScaleFactor()

int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor ( ) const

Returns the scale factor of the thresholded image that would be returned by GetThresholdedImage() and the various GetX() methods that call GetComponentImages(). Returns 0 if no thresholder has been set.

Definition at line 746 of file baseapi.cpp.

746  {
747  if (thresholder_ == NULL) {
748  return 0;
749  }
750  return thresholder_->GetScaleFactor();
751 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874

◆ GetTSVText()

char * tesseract::TessBaseAPI::GetTSVText ( int  page_number)

Make a TSV-formatted string from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Definition at line 1582 of file baseapi.cpp.

1582  {
1583  if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0))
1584  return NULL;
1585 
1586  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1587  int page_id = page_number + 1; // we use 1-based page numbers.
1588 
1589  STRING tsv_str("");
1590 
1591  int page_num = page_id, block_num = 0, par_num = 0, line_num = 0,
1592  word_num = 0;
1593 
1594  tsv_str.add_str_int("1\t", page_num); // level 1 - page
1595  tsv_str.add_str_int("\t", block_num);
1596  tsv_str.add_str_int("\t", par_num);
1597  tsv_str.add_str_int("\t", line_num);
1598  tsv_str.add_str_int("\t", word_num);
1599  tsv_str.add_str_int("\t", rect_left_);
1600  tsv_str.add_str_int("\t", rect_top_);
1601  tsv_str.add_str_int("\t", rect_width_);
1602  tsv_str.add_str_int("\t", rect_height_);
1603  tsv_str += "\t-1\t\n";
1604 
1605  ResultIterator* res_it = GetIterator();
1606  while (!res_it->Empty(RIL_BLOCK)) {
1607  if (res_it->Empty(RIL_WORD)) {
1608  res_it->Next(RIL_WORD);
1609  continue;
1610  }
1611 
1612  // Add rows for any new block/paragraph/textline.
1613  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1614  block_num++, par_num = 0, line_num = 0, word_num = 0;
1615  tsv_str.add_str_int("2\t", page_num); // level 2 - block
1616  tsv_str.add_str_int("\t", block_num);
1617  tsv_str.add_str_int("\t", par_num);
1618  tsv_str.add_str_int("\t", line_num);
1619  tsv_str.add_str_int("\t", word_num);
1620  AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
1621  tsv_str += "\t-1\t\n"; // end of row for block
1622  }
1623  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1624  par_num++, line_num = 0, word_num = 0;
1625  tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
1626  tsv_str.add_str_int("\t", block_num);
1627  tsv_str.add_str_int("\t", par_num);
1628  tsv_str.add_str_int("\t", line_num);
1629  tsv_str.add_str_int("\t", word_num);
1630  AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
1631  tsv_str += "\t-1\t\n"; // end of row for para
1632  }
1633  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1634  line_num++, word_num = 0;
1635  tsv_str.add_str_int("4\t", page_num); // level 4 - line
1636  tsv_str.add_str_int("\t", block_num);
1637  tsv_str.add_str_int("\t", par_num);
1638  tsv_str.add_str_int("\t", line_num);
1639  tsv_str.add_str_int("\t", word_num);
1640  AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
1641  tsv_str += "\t-1\t\n"; // end of row for line
1642  }
1643 
1644  // Now, process the word...
1645  int left, top, right, bottom;
1646  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1647  word_num++;
1648  tsv_str.add_str_int("5\t", page_num); // level 5 - word
1649  tsv_str.add_str_int("\t", block_num);
1650  tsv_str.add_str_int("\t", par_num);
1651  tsv_str.add_str_int("\t", line_num);
1652  tsv_str.add_str_int("\t", word_num);
1653  tsv_str.add_str_int("\t", left);
1654  tsv_str.add_str_int("\t", top);
1655  tsv_str.add_str_int("\t", right - left);
1656  tsv_str.add_str_int("\t", bottom - top);
1657  tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
1658  tsv_str += "\t";
1659 
1660  // Increment counts if at end of block/paragraph/textline.
1661  if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
1662  if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
1663  if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
1664 
1665  do {
1666  tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
1667  res_it->Next(RIL_SYMBOL);
1668  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1669  tsv_str += "\n"; // end of row
1670  wcnt++;
1671  }
1672 
1673  char* ret = new char[tsv_str.length() + 1];
1674  strcpy(ret, tsv_str.string());
1675  delete res_it;
1676  return ret;
1677 }
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:807
Definition: strngs.h:45
ResultIterator * GetIterator()
Definition: baseapi.cpp:1236

◆ GetUnichar()

const char * tesseract::TessBaseAPI::GetUnichar ( int  unichar_id)

This method returns the string form of the specified unichar.

Definition at line 2794 of file baseapi.cpp.

2794  {
2795  return tesseract_->unicharset.id_to_unichar(unichar_id);
2796 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
UNICHARSET unicharset
Definition: ccutil.h:68

◆ GetUNLVText()

char * tesseract::TessBaseAPI::GetUNLVText ( )

The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes. Returned string must be freed with the delete [] operator.

Definition at line 1763 of file baseapi.cpp.

1763  {
1764  if (tesseract_ == NULL ||
1765  (!recognition_done_ && Recognize(NULL) < 0))
1766  return NULL;
1767  bool tilde_crunch_written = false;
1768  bool last_char_was_newline = true;
1769  bool last_char_was_tilde = false;
1770 
1771  int total_length = TextLength(NULL);
1772  PAGE_RES_IT page_res_it(page_res_);
1773  char* result = new char[total_length];
1774  char* ptr = result;
1775  for (page_res_it.restart_page(); page_res_it.word () != NULL;
1776  page_res_it.forward()) {
1777  WERD_RES *word = page_res_it.word();
1778  // Process the current word.
1779  if (word->unlv_crunch_mode != CR_NONE) {
1780  if (word->unlv_crunch_mode != CR_DELETE &&
1781  (!tilde_crunch_written ||
1782  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1783  word->word->space() > 0 &&
1784  !word->word->flag(W_FUZZY_NON) &&
1785  !word->word->flag(W_FUZZY_SP)))) {
1786  if (!word->word->flag(W_BOL) &&
1787  word->word->space() > 0 &&
1788  !word->word->flag(W_FUZZY_NON) &&
1789  !word->word->flag(W_FUZZY_SP)) {
1790  /* Write a space to separate from preceding good text */
1791  *ptr++ = ' ';
1792  last_char_was_tilde = false;
1793  }
1794  if (!last_char_was_tilde) {
1795  // Write a reject char.
1796  last_char_was_tilde = true;
1797  *ptr++ = kUNLVReject;
1798  tilde_crunch_written = true;
1799  last_char_was_newline = false;
1800  }
1801  }
1802  } else {
1803  // NORMAL PROCESSING of non tilde crunched words.
1804  tilde_crunch_written = false;
1806  const char* wordstr = word->best_choice->unichar_string().string();
1807  const STRING& lengths = word->best_choice->unichar_lengths();
1808  int length = lengths.length();
1809  int i = 0;
1810  int offset = 0;
1811 
1812  if (last_char_was_tilde &&
1813  word->word->space() == 0 && wordstr[offset] == ' ') {
1814  // Prevent adjacent tilde across words - we know that adjacent tildes
1815  // within words have been removed.
1816  // Skip the first character.
1817  offset = lengths[i++];
1818  }
1819  if (i < length && wordstr[offset] != 0) {
1820  if (!last_char_was_newline)
1821  *ptr++ = ' ';
1822  else
1823  last_char_was_newline = false;
1824  for (; i < length; offset += lengths[i++]) {
1825  if (wordstr[offset] == ' ' ||
1826  wordstr[offset] == kTesseractReject) {
1827  *ptr++ = kUNLVReject;
1828  last_char_was_tilde = true;
1829  } else {
1830  if (word->reject_map[i].rejected())
1831  *ptr++ = kUNLVSuspect;
1832  UNICHAR ch(wordstr + offset, lengths[i]);
1833  int uni_ch = ch.first_uni();
1834  for (int j = 0; kUniChs[j] != 0; ++j) {
1835  if (kUniChs[j] == uni_ch) {
1836  uni_ch = kLatinChs[j];
1837  break;
1838  }
1839  }
1840  if (uni_ch <= 0xff) {
1841  *ptr++ = static_cast<char>(uni_ch);
1842  last_char_was_tilde = false;
1843  } else {
1844  *ptr++ = kUNLVReject;
1845  last_char_was_tilde = true;
1846  }
1847  }
1848  }
1849  }
1850  }
1851  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1852  /* Add a new line output */
1853  *ptr++ = '\n';
1854  tilde_crunch_written = false;
1855  last_char_was_newline = true;
1856  last_char_was_tilde = false;
1857  }
1858  }
1859  *ptr++ = '\n';
1860  *ptr = '\0';
1861  return result;
1862 }
const char kUNLVSuspect
Definition: baseapi.cpp:91
WERD_CHOICE * best_choice
Definition: pageres.h:219
const STRING & unichar_lengths() const
Definition: ratngs.h:546
Definition: werd.h:36
const char * string() const
Definition: strngs.cpp:198
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
voidpf uLong offset
Definition: ioapi.h:42
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:305
inT32 length() const
Definition: strngs.cpp:193
const char kUNLVReject
Definition: baseapi.cpp:89
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
const int kUniChs[]
Definition: baseapi.cpp:1750
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:807
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2325
Definition: strngs.h:45
const int kLatinChs[]
Definition: baseapi.cpp:1754
Definition: werd.h:35
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
const STRING & unichar_string() const
Definition: ratngs.h:539
WERD * word
Definition: pageres.h:175
const char kTesseractReject
Definition: baseapi.cpp:87
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
REJMAP reject_map
Definition: pageres.h:271
uinT8 space()
Definition: werd.h:104

◆ GetUTF8Text()

char * tesseract::TessBaseAPI::GetUTF8Text ( )

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Make a text string from the internal data structures.

Definition at line 1263 of file baseapi.cpp.

1263  {
1264  if (tesseract_ == NULL ||
1265  (!recognition_done_ && Recognize(NULL) < 0))
1266  return NULL;
1267  STRING text("");
1268  ResultIterator *it = GetIterator();
1269  do {
1270  if (it->Empty(RIL_PARA)) continue;
1271  const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
1272  text += para_text.get();
1273  } while (it->Next(RIL_PARA));
1274  char* result = new char[text.length() + 1];
1275  strncpy(result, text.string(), text.length() + 1);
1276  delete it;
1277  return result;
1278 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:807
Definition: strngs.h:45
ResultIterator * GetIterator()
Definition: baseapi.cpp:1236
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883

◆ GetWords()

Boxa * tesseract::TessBaseAPI::GetWords ( Pixa **  pixa)

Get the words as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 642 of file baseapi.cpp.

642  {
643  return GetComponentImages(RIL_WORD, true, pixa, NULL);
644 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:664

◆ InitTruthCallback()

void tesseract::TessBaseAPI::InitTruthCallback ( TruthCallback cb)
inline

Definition at line 773 of file baseapi.h.

773 { truth_cb_ = cb; }
TruthCallback * truth_cb_
Definition: baseapi.h:884

◆ IsValidCharacter()

bool tesseract::TessBaseAPI::IsValidCharacter ( const char *  utf8_character)

Definition at line 2102 of file baseapi.cpp.

2102  {
2103  return tesseract_->unicharset.contains_unichar(utf8_character);
2104 }
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
UNICHARSET unicharset
Definition: ccutil.h:68

◆ IsValidWord()

int tesseract::TessBaseAPI::IsValidWord ( const char *  word)

Check whether a word is valid according to Tesseract's language model

Returns
0 if the word is invalid, non-zero if valid.
Warning
temporary! This function will be removed from here and placed in a separate API at some future time.

Check whether a word is valid according to Tesseract's language model returns 0 if the word is invalid, non-zero if valid

Definition at line 2098 of file baseapi.cpp.

2098  {
2099  return tesseract_->getDict().valid_word(word);
2100 }
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:750

◆ MakeTBLOB()

TBLOB * tesseract::TessBaseAPI::MakeTBLOB ( Pix *  pix)
static

Returns a TBLOB corresponding to the entire input image.

Creates a TBLOB* from the whole pix.

Definition at line 2468 of file baseapi.cpp.

2468  {
2469  int width = pixGetWidth(pix);
2470  int height = pixGetHeight(pix);
2471  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
2472 
2473  // Create C_BLOBs from the page
2474  extract_edges(pix, &block);
2475 
2476  // Merge all C_BLOBs
2477  C_BLOB_LIST *list = block.blob_list();
2478  C_BLOB_IT c_blob_it(list);
2479  if (c_blob_it.empty())
2480  return NULL;
2481  // Move all the outlines to the first blob.
2482  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2483  for (c_blob_it.forward();
2484  !c_blob_it.at_first();
2485  c_blob_it.forward()) {
2486  C_BLOB *c_blob = c_blob_it.data();
2487  ol_it.add_list_after(c_blob->out_list());
2488  }
2489  // Convert the first blob to the output TBLOB.
2490  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2491 }
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
#define TRUE
Definition: capi.h:45
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:344
Definition: ocrblock.h:30

◆ MakeTessOCRRow()

ROW * tesseract::TessBaseAPI::MakeTessOCRRow ( float  baseline,
float  xheight,
float  descender,
float  ascender 
)
static

Returns a ROW object created from the input row specification.

Definition at line 2451 of file baseapi.cpp.

2454  {
2455  inT32 xstarts[] = {-32000};
2456  double quad_coeffs[] = {0, 0, baseline};
2457  return new ROW(1,
2458  xstarts,
2459  quad_coeffs,
2460  xheight,
2461  ascender - (baseline + xheight),
2462  descender - baseline,
2463  0,
2464  0);
2465 }
int32_t inT32
Definition: host.h:38
Definition: ocrrow.h:32

◆ MeanTextConf()

int tesseract::TessBaseAPI::MeanTextConf ( )

Returns the (average) confidence value between 0 and 100.

Returns the average word confidence for Tesseract page result.

Definition at line 1933 of file baseapi.cpp.

1933  {
1934  int* conf = AllWordConfidences();
1935  if (!conf) return 0;
1936  int sum = 0;
1937  int *pt = conf;
1938  while (*pt >= 0) sum += *pt++;
1939  if (pt != conf) sum /= pt - conf;
1940  delete [] conf;
1941  return sum;
1942 }

◆ NormalizeTBLOB()

void tesseract::TessBaseAPI::NormalizeTBLOB ( TBLOB tblob,
ROW row,
bool  numeric_mode 
)
static

This method baseline normalizes a TBLOB in-place. The input row is used for normalization. The denorm is an optional parameter in which the normalization-antidote is returned.

Definition at line 2498 of file baseapi.cpp.

2498  {
2499  TBOX box = tblob->bounding_box();
2500  float x_center = (box.left() + box.right()) / 2.0f;
2501  float baseline = row->base_line(x_center);
2502  float scale = kBlnXHeight / row->x_height();
2503  tblob->Normalize(NULL, NULL, NULL, x_center, baseline, scale, scale,
2504  0.0f, static_cast<float>(kBlnBaselineOffset), false, NULL);
2505 }
const int kBlnXHeight
Definition: normalis.h:28
const int kBlnBaselineOffset
Definition: normalis.h:29
float x_height() const
Definition: ocrrow.h:61
inT16 left() const
Definition: rect.h:68
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:413
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
float base_line(float xpos) const
Definition: ocrrow.h:56
TBOX bounding_box() const
Definition: blobs.cpp:482

◆ NumDawgs()

int tesseract::TessBaseAPI::NumDawgs ( ) const

Return the number of dawgs loaded into tesseract_ object.

Definition at line 2805 of file baseapi.cpp.

2805  {
2806  return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
2807 }
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:412

◆ oem()

OcrEngineMode tesseract::TessBaseAPI::oem ( ) const
inline

Definition at line 771 of file baseapi.h.

771 { return last_oem_requested_; }
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:882

◆ ProcessPage()

bool tesseract::TessBaseAPI::ProcessPage ( Pix *  pix,
int  page_index,
const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turn a single image into symbolic text.

The pix is the image processed. filename and page_index are metadata used by side-effect processes, such as reading a box file or formatting as hOCR.

See ProcessPages for desciptions of other parameters.

Definition at line 1154 of file baseapi.cpp.

1156  {
1157  PERF_COUNT_START("ProcessPage")
1159  SetImage(pix);
1160  bool failed = false;
1161 
1162  if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
1163  // Disabled character recognition
1164  PageIterator* it = AnalyseLayout();
1165 
1166  if (it == NULL) {
1167  failed = true;
1168  } else {
1169  delete it;
1170  }
1172  failed = FindLines() != 0;
1173  } else if (timeout_millisec > 0) {
1174  // Running with a timeout.
1175  ETEXT_DESC monitor;
1176  monitor.cancel = NULL;
1177  monitor.cancel_this = NULL;
1178  monitor.set_deadline_msecs(timeout_millisec);
1179 
1180  // Now run the main recognition.
1181  failed = Recognize(&monitor) < 0;
1182  } else {
1183  // Normal layout and character recognition with no timeout.
1184  failed = Recognize(NULL) < 0;
1185  }
1186 
1188 #ifndef ANDROID_BUILD
1189  Pix* page_pix = GetThresholdedImage();
1190  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1191 #endif // ANDROID_BUILD
1192  }
1193 
1194  if (failed && retry_config != NULL && retry_config[0] != '\0') {
1195  // Save current config variables before switching modes.
1196  FILE* fp = fopen(kOldVarsFile, "wb");
1197  PrintVariables(fp);
1198  fclose(fp);
1199  // Switch to alternate mode for retry.
1200  ReadConfigFile(retry_config);
1201  SetImage(pix);
1202  Recognize(NULL);
1203  // Restore saved config variables.
1205  }
1206 
1207  if (renderer && !failed) {
1208  failed = !renderer->AddImage(this);
1209  }
1210 
1212  return !failed;
1213 }
void set_deadline_msecs(inT32 deadline_msecs)
Definition: ocrclass.h:146
#define PERF_COUNT_START(FUNCT_NAME)
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:468
void * cancel_this
called whenever progress increases
Definition: ocrclass.h:127
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:787
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:807
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:155
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:545
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2236
Pix * GetThresholdedImage()
Definition: baseapi.cpp:593
CANCEL_FUNC cancel
for errcode use
Definition: ocrclass.h:125
#define PERF_COUNT_END
const char * kOldVarsFile
Definition: baseapi.cpp:100
const char * filename
Definition: ioapi.h:38
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:266
Orientation and script detection only.
Definition: publictypes.h:152
void SetInputName(const char *name)
Definition: baseapi.cpp:203

◆ ProcessPages()

bool tesseract::TessBaseAPI::ProcessPages ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turns images into symbolic text.

filename can point to a single image, a multi-page TIFF, or a plain text list of image filenames.

retry_config is useful for debugging. If not NULL, you can fall back to an alternate configuration if a page fails for some reason.

timeout_millisec terminates processing if any single page takes too long. Set to 0 for unlimited time.

renderer is responible for creating the output. For example, use the TessTextRenderer if you want plaintext output, or the TessPDFRender to produce searchable PDF.

If tessedit_page_number is non-negative, will only process that single page. Works for multi-page tiff file, or filelist.

Returns true if successful, false on error.

Definition at line 1036 of file baseapi.cpp.

1038  {
1039  bool result =
1040  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1041  if (result) {
1044  tprintf("Write of TR file failed: %s\n", output_file_->string());
1045  return false;
1046  }
1047  }
1048  return result;
1049 }
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:879
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1062
#define tprintf(...)
Definition: tprintf.h:31
const char * string() const
Definition: strngs.cpp:198
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
const char * filename
Definition: ioapi.h:38
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:97

◆ ProcessPagesInternal()

bool tesseract::TessBaseAPI::ProcessPagesInternal ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Definition at line 1062 of file baseapi.cpp.

1065  {
1066  PERF_COUNT_START("ProcessPages")
1067  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1068  if (stdInput) {
1069 #ifdef WIN32
1070  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1071  tprintf("ERROR: cin to binary: %s", strerror(errno));
1072 #endif // WIN32
1073  }
1074 
1075  if (stream_filelist) {
1076  return ProcessPagesFileList(stdin, NULL, retry_config,
1077  timeout_millisec, renderer,
1079  }
1080 
1081  // At this point we are officially in autodection territory.
1082  // That means any data in stdin must be buffered, to make it
1083  // seekable.
1084  std::string buf;
1085  const l_uint8 *data = NULL;
1086  if (stdInput) {
1087  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1088  (std::istreambuf_iterator<char>()));
1089  data = reinterpret_cast<const l_uint8 *>(buf.data());
1090  }
1091 
1092  // Here is our autodetection
1093  int format;
1094  int r = (stdInput) ?
1095  findFileFormatBuffer(data, &format) :
1096  findFileFormat(filename, &format);
1097 
1098  // Maybe we have a filelist
1099  if (r != 0 || format == IFF_UNKNOWN) {
1100  STRING s;
1101  if (stdInput) {
1102  s = buf.c_str();
1103  } else {
1104  std::ifstream t(filename);
1105  std::string u((std::istreambuf_iterator<char>(t)),
1106  std::istreambuf_iterator<char>());
1107  s = u.c_str();
1108  }
1109  return ProcessPagesFileList(NULL, &s, retry_config,
1110  timeout_millisec, renderer,
1112  }
1113 
1114  // Maybe we have a TIFF which is potentially multipage
1115  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1116  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1117  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1118  format == IFF_TIFF_ZIP);
1119 
1120  // Fail early if we can, before producing any output
1121  Pix *pix = NULL;
1122  if (!tiff) {
1123  pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
1124  if (pix == NULL) {
1125  return false;
1126  }
1127  }
1128 
1129  // Begin the output
1130  if (renderer && !renderer->BeginDocument(unknown_title_)) {
1131  pixDestroy(&pix);
1132  return false;
1133  }
1134 
1135  // Produce output
1136  r = (tiff) ?
1137  ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1138  timeout_millisec, renderer,
1140  ProcessPage(pix, 0, filename, retry_config,
1141  timeout_millisec, renderer);
1142 
1143  // Clean up memory as needed
1144  pixDestroy(&pix);
1145 
1146  // End the output
1147  if (!r || (renderer && !renderer->EndDocument())) {
1148  return false;
1149  }
1151  return true;
1152 }
double u[max]
#define PERF_COUNT_START(FUNCT_NAME)
#define tprintf(...)
Definition: tprintf.h:31
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
Definition: strngs.h:45
#define PERF_COUNT_END
bool stream_filelist
Definition: baseapi.cpp:80
const char * filename
Definition: ioapi.h:38
const char * c_str() const
Definition: strngs.cpp:209
voidpf void * buf
Definition: ioapi.h:39
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1154

◆ Recognize()

int tesseract::TessBaseAPI::Recognize ( ETEXT_DESC monitor)

Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.

Recognize the tesseract global image and return the result as Tesseract internal structures.

Definition at line 807 of file baseapi.cpp.

807  {
808  if (tesseract_ == NULL)
809  return -1;
810  if (FindLines() != 0)
811  return -1;
812  delete page_res_;
813  if (block_list_->empty()) {
814  page_res_ = new PAGE_RES(false, block_list_,
816  return 0; // Empty page.
817  }
818 
820  recognition_done_ = true;
825  } else {
828  }
829  if (page_res_ == NULL) {
830  return -1;
831  }
835  return 0;
836  }
839  return 0;
840  }
841 
842  if (truth_cb_ != NULL) {
843  tesseract_->wordrec_run_blamer.set_value(true);
844  PageIterator *page_it = new PageIterator(
849  image_height_, page_it, this->tesseract()->pix_grey());
850  delete page_it;
851  }
852 
853  int result = 0;
855  #ifndef GRAPHICS_DISABLED
857  #endif // GRAPHICS_DISABLED
858  // The page_res is invalid after an interactive session, so cleanup
859  // in a way that lets us continue to the next page without crashing.
860  delete page_res_;
861  page_res_ = NULL;
862  return -1;
864  STRING fontname;
865  ExtractFontName(*output_file_, &fontname);
867  } else if (tesseract_->tessedit_ambigs_training) {
868  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
869  // OCR the page segmented into words by tesseract.
871  *input_file_, page_res_, monitor, training_output_file);
872  fclose(training_output_file);
873  } else {
874  // Now run the main recognition.
875  bool wait_for_text = true;
876  GetBoolVariable("paragraph_text_based", &wait_for_text);
877  if (!wait_for_text) DetectParagraphs(false);
878  if (tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0)) {
879  if (wait_for_text) DetectParagraphs(true);
880  } else {
881  result = -1;
882  }
883  }
884  return result;
885 }
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878
const UNICHARSET & getUnicharset() const
Definition: dict.h:97
Dict & getDict()
Definition: classify.h:65
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:879
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
Definition: applybox.cpp:796
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
FILE * init_recog_training(const STRING &fname)
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2580
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:46
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:238
TruthCallback * truth_cb_
Definition: baseapi.h:884
Pix * pix_grey() const
Definition: strngs.h:45
Tesseract * tesseract() const
Definition: baseapi.h:769
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2236
void TrainLineRecognizer(const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list)
Definition: linerec.cpp:45
bool wordrec_run_blamer
Definition: wordrec.h:168
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:337
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:117
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
void CorrectClassifyWords(PAGE_RES *page_res)
Definition: applybox.cpp:772
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:300
bool AnyLSTMLang() const
bool tessedit_resegment_from_line_boxes
virtual void Run(A1, A2, A3, A4)=0
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:415
int GetScaledYResolution() const
Definition: thresholder.h:93

◆ RecognizeForChopTest()

int tesseract::TessBaseAPI::RecognizeForChopTest ( ETEXT_DESC monitor)

Methods to retrieve information after SetAndThresholdImage(), Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)Variant on Recognize used for testing chopper.

Tests the chopper by exhaustively running chop_one_blob.

Definition at line 888 of file baseapi.cpp.

888  {
889  if (tesseract_ == NULL)
890  return -1;
891  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
892  tprintf("Please call SetImage before attempting recognition.");
893  return -1;
894  }
895  if (page_res_ != NULL)
896  ClearResults();
897  if (FindLines() != 0)
898  return -1;
899  // Additional conditions under which chopper test cannot be run
900  if (tesseract_->interactive_display_mode) return -1;
901 
902  recognition_done_ = true;
903 
904  page_res_ = new PAGE_RES(false, block_list_,
906 
907  PAGE_RES_IT page_res_it(page_res_);
908 
909  while (page_res_it.word() != NULL) {
910  WERD_RES *word_res = page_res_it.word();
911  GenericVector<TBOX> boxes;
912  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
913  page_res_it.row()->row, word_res);
914  page_res_it.forward();
915  }
916  return 0;
917 }
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:50
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
#define tprintf(...)
Definition: tprintf.h:31
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2236
WERD * word
Definition: pageres.h:175
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
Definition: applybox.cpp:253
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:415

◆ RunAdaptiveClassifier()

void tesseract::TessBaseAPI::RunAdaptiveClassifier ( TBLOB blob,
int  num_max_matches,
int unichar_ids,
float *  ratings,
int num_matches_returned 
)

Method to run adaptive classifier on a blob. It returns at max num_max_matches results.

Method to run adaptive classifier on a blob.

Definition at line 2771 of file baseapi.cpp.

2775  {
2776  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
2777  tesseract_->AdaptiveClassifier(blob, choices);
2778  BLOB_CHOICE_IT choices_it(choices);
2779  int& index = *num_matches_returned;
2780  index = 0;
2781  for (choices_it.mark_cycle_pt();
2782  !choices_it.cycled_list() && index < num_max_matches;
2783  choices_it.forward()) {
2784  BLOB_CHOICE* choice = choices_it.data();
2785  unichar_ids[index] = choice->unichar_id();
2786  ratings[index] = choice->rating();
2787  ++index;
2788  }
2789  *num_matches_returned = index;
2790  delete choices;
2791 }
float rating() const
Definition: ratngs.h:79
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76

◆ set_min_orientation_margin()

void tesseract::TessBaseAPI::set_min_orientation_margin ( double  margin)

Definition at line 2368 of file baseapi.cpp.

2368  {
2369  tesseract_->min_orientation_margin.set_value(margin);
2370 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870

◆ SetDictFunc()

void tesseract::TessBaseAPI::SetDictFunc ( DictFunc  f)

Sets Dict::letter_is_okay_ function to point to the given function.

Definition at line 2144 of file baseapi.cpp.

2144  {
2145  if (tesseract_ != NULL) {
2147  }
2148 }
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int(Dict::* letter_is_okay_)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:356

◆ SetFillLatticeFunc()

void tesseract::TessBaseAPI::SetFillLatticeFunc ( FillLatticeFunc  f)

Sets Wordrec::fill_lattice_ function to point to the given function.

Definition at line 2170 of file baseapi.cpp.

2170  {
2171  if (tesseract_ != NULL) tesseract_->fill_lattice_ = f;
2172 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:419

◆ SetImage() [1/2]

void tesseract::TessBaseAPI::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

Provide an image for Tesseract to recognize. Format is as TesseractRect above. Copies the image buffer and converts to Pix. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.

Definition at line 545 of file baseapi.cpp.

547  {
548  if (InternalSetImage()) {
549  thresholder_->SetImage(imagedata, width, height,
550  bytes_per_pixel, bytes_per_line);
552  }
553 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2175
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:920

◆ SetImage() [2/2]

void tesseract::TessBaseAPI::SetImage ( Pix *  pix)

Provide an image for Tesseract to recognize. As with SetImage above, Tesseract takes its own copy of the image, so it need not persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. Tesseract uses Pix as its internal representation and it is therefore more efficient to provide a Pix directly.

Definition at line 570 of file baseapi.cpp.

570  {
571  if (InternalSetImage()) {
572  thresholder_->SetImage(pix);
574  }
575 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2175
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:920

◆ SetProbabilityInContextFunc()

void tesseract::TessBaseAPI::SetProbabilityInContextFunc ( ProbabilityInContextFunc  f)

Sets Dict::probability_in_context_ function to point to the given function.

Sets Dict::probability_in_context_ function to point to the given function.

Parameters
fA single function that returns the probability of the current "character" (in general a utf-8 string), given the context of a previous utf-8 string.

Definition at line 2158 of file baseapi.cpp.

2158  {
2159  if (tesseract_ != NULL) {
2161  // Set it for the sublangs too.
2162  int num_subs = tesseract_->num_sub_langs();
2163  for (int i = 0; i < num_subs; ++i) {
2165  }
2166  }
2167 }
int num_sub_langs() const
Dict & getDict()
Definition: classify.h:65
Tesseract * get_sub_lang(int index) const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:366

◆ SetRectangle()

void tesseract::TessBaseAPI::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recogntion results so multiple rectangles can be recognized with the same image.

Definition at line 582 of file baseapi.cpp.

582  {
583  if (thresholder_ == NULL)
584  return;
585  thresholder_->SetRectangle(left, top, width, height);
586  ClearResults();
587 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
void SetRectangle(int left, int top, int width, int height)

◆ SetSourceResolution()

void tesseract::TessBaseAPI::SetSourceResolution ( int  ppi)

Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().

Definition at line 555 of file baseapi.cpp.

555  {
556  if (thresholder_)
558  else
559  tprintf("Please call SetImage before SetSourceResolution.\n");
560 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
#define tprintf(...)
Definition: tprintf.h:31
void SetSourceYResolution(int ppi)
Definition: thresholder.h:86

◆ SetThresholder()

void tesseract::TessBaseAPI::SetThresholder ( ImageThresholder thresholder)
inline

In extreme cases only, usually with a subclass of Thresholder, it is possible to provide a different Thresholder. The Thresholder may be preloaded with an image, settings etc, or they may be set after. Note that Tesseract takes ownership of the Thresholder and will delete it when it it is replaced or the API is destructed.

Definition at line 380 of file baseapi.h.

380  {
381  delete thresholder_;
382  thresholder_ = thresholder;
383  ClearResults();
384  }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874

◆ tesseract()

Tesseract* tesseract::TessBaseAPI::tesseract ( ) const
inline

Definition at line 769 of file baseapi.h.

769 { return tesseract_; }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870