tesseract  4.00.00dev
baseapi.h
Go to the documentation of this file.
1 // File: baseapi.h
3 // Description: Simple API for calling tesseract.
4 // Author: Ray Smith
5 // Created: Fri Oct 06 15:35:01 PDT 2006
6 //
7 // (C) Copyright 2006, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_API_BASEAPI_H_
21 #define TESSERACT_API_BASEAPI_H_
22 
23 #define TESSERACT_VERSION_STR "4.00.00alpha"
24 #define TESSERACT_VERSION 0x040000
25 #define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
26  (patch))
27 
28 #include <stdio.h>
29 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
30 // complexity of includes here. Use forward declarations wherever possible
31 // and hide includes of complex types in baseapi.cpp.
32 #include "apitypes.h"
33 #include "pageiterator.h"
34 #include "platform.h"
35 #include "publictypes.h"
36 #include "resultiterator.h"
37 #include "serialis.h"
38 #include "tesscallback.h"
39 #include "thresholder.h"
40 #include "unichar.h"
41 
42 template <typename T> class GenericVector;
43 class PAGE_RES;
44 class PAGE_RES_IT;
45 class ParagraphModel;
46 struct BlamerBundle;
47 class BLOCK_LIST;
48 class DENORM;
49 class MATRIX;
50 class ROW;
51 class STRING;
52 class WERD;
53 struct Pix;
54 struct Box;
55 struct Pixa;
56 struct Boxa;
57 class ETEXT_DESC;
58 struct OSResults;
59 class TBOX;
60 class UNICHARSET;
61 class WERD_CHOICE_LIST;
62 
65 struct TBLOB;
66 
67 namespace tesseract {
68 
69 class Dawg;
70 class Dict;
71 class EquationDetect;
72 class PageIterator;
73 class LTRResultIterator;
74 class ResultIterator;
75 class MutableIterator;
76 class TessResultRenderer;
77 class Tesseract;
78 class Trie;
79 class Wordrec;
80 
81 typedef int (Dict::*DictFunc)(void* void_dawg_args,
82  UNICHAR_ID unichar_id, bool word_end) const;
83 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
84  const char* context,
85  int context_bytes,
86  const char* character,
87  int character_bytes);
88 typedef float (Dict::*ParamsModelClassifyFunc)(
89  const char *lang, void *path);
90 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
91  const WERD_CHOICE_LIST &best_choices,
92  const UNICHARSET &unicharset,
93  BlamerBundle *blamer_bundle);
96 
106  public:
107  TessBaseAPI();
108  virtual ~TessBaseAPI();
109 
113  static const char* Version();
114 
122  static size_t getOpenCLDevice(void **device);
123 
128  static void CatchSignals();
129 
134  void SetInputName(const char* name);
142  const char* GetInputName();
143  // Takes ownership of the input pix.
144  void SetInputImage(Pix *pix);
145  Pix* GetInputImage();
146  int GetSourceYResolution();
147  const char* GetDatapath();
148 
150  void SetOutputName(const char* name);
151 
165  bool SetVariable(const char* name, const char* value);
166  bool SetDebugVariable(const char* name, const char* value);
167 
172  bool GetIntVariable(const char *name, int *value) const;
173  bool GetBoolVariable(const char *name, bool *value) const;
174  bool GetDoubleVariable(const char *name, double *value) const;
175 
180  const char *GetStringVariable(const char *name) const;
181 
185  void PrintVariables(FILE *fp) const;
186 
190  bool GetVariableAsString(const char *name, STRING *val);
191 
230  int Init(const char* datapath, const char* language, OcrEngineMode mode,
231  char **configs, int configs_size,
232  const GenericVector<STRING> *vars_vec,
233  const GenericVector<STRING> *vars_values,
234  bool set_only_non_debug_params);
235  int Init(const char* datapath, const char* language, OcrEngineMode oem) {
236  return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
237  }
238  int Init(const char* datapath, const char* language) {
239  return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
240  }
241  // In-memory version reads the traineddata file directly from the given
242  // data[data_size] array, and/or reads data via a FileReader.
243  int Init(const char* data, int data_size, const char* language,
244  OcrEngineMode mode, char** configs, int configs_size,
245  const GenericVector<STRING>* vars_vec,
246  const GenericVector<STRING>* vars_values,
247  bool set_only_non_debug_params, FileReader reader);
248 
257  const char* GetInitLanguagesAsString() const;
258 
264  void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
265 
269  void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
270 
277  int InitLangMod(const char* datapath, const char* language);
278 
283  void InitForAnalysePage();
284 
291  void ReadConfigFile(const char* filename);
293  void ReadDebugConfigFile(const char* filename);
294 
300  void SetPageSegMode(PageSegMode mode);
301 
303  PageSegMode GetPageSegMode() const;
304 
322  char* TesseractRect(const unsigned char* imagedata,
323  int bytes_per_pixel, int bytes_per_line,
324  int left, int top, int width, int height);
325 
330  void ClearAdaptiveClassifier();
331 
338  /* @{ */
339 
347  void SetImage(const unsigned char* imagedata, int width, int height,
348  int bytes_per_pixel, int bytes_per_line);
349 
358  void SetImage(Pix* pix);
359 
364  void SetSourceResolution(int ppi);
365 
371  void SetRectangle(int left, int top, int width, int height);
372 
380  void SetThresholder(ImageThresholder* thresholder) {
381  delete thresholder_;
382  thresholder_ = thresholder;
383  ClearResults();
384  }
385 
391  Pix* GetThresholdedImage();
392 
398  Boxa* GetRegions(Pixa** pixa);
399 
411  Boxa* GetTextlines(const bool raw_image, const int raw_padding,
412  Pixa** pixa, int** blockids, int** paraids);
413  /*
414  Helper method to extract from the thresholded image. (most common usage)
415  */
416  Boxa* GetTextlines(Pixa** pixa, int** blockids) {
417  return GetTextlines(false, 0, pixa, blockids, NULL);
418  }
419 
428  Boxa* GetStrips(Pixa** pixa, int** blockids);
429 
435  Boxa* GetWords(Pixa** pixa);
436 
445  Boxa* GetConnectedComponents(Pixa** cc);
446 
460  Boxa* GetComponentImages(const PageIteratorLevel level,
461  const bool text_only, const bool raw_image,
462  const int raw_padding,
463  Pixa** pixa, int** blockids, int** paraids);
464  // Helper function to get binary images with no padding (most common usage).
466  const bool text_only,
467  Pixa** pixa, int** blockids) {
468  return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
469  }
470 
477  int GetThresholdedImageScaleFactor() const;
478 
484  void DumpPGM(const char* filename);
485 
501  PageIterator* AnalyseLayout();
502  PageIterator* AnalyseLayout(bool merge_similar_words);
503 
510  int Recognize(ETEXT_DESC* monitor);
511 
518  int RecognizeForChopTest(ETEXT_DESC* monitor);
519 
542  bool ProcessPages(const char* filename, const char* retry_config,
543  int timeout_millisec, TessResultRenderer* renderer);
544  // Does the real work of ProcessPages.
545  bool ProcessPagesInternal(const char* filename, const char* retry_config,
546  int timeout_millisec, TessResultRenderer* renderer);
547 
557  bool ProcessPage(Pix* pix, int page_index, const char* filename,
558  const char* retry_config, int timeout_millisec,
559  TessResultRenderer* renderer);
560 
569  ResultIterator* GetIterator();
570 
579  MutableIterator* GetMutableIterator();
580 
585  char* GetUTF8Text();
586 
596  char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
597 
604  char* GetHOCRText(int page_number);
605 
611  char* GetTSVText(int page_number);
612 
620  char* GetBoxText(int page_number);
621 
627  char* GetUNLVText();
628 
638  bool DetectOrientationScript(int* orient_deg, float* orient_conf,
639  const char** script_name, float* script_conf);
640 
646  char* GetOsdText(int page_number);
647 
649  int MeanTextConf();
656  int* AllWordConfidences();
657 
668  bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
669 
676  void Clear();
677 
684  void End();
685 
693  static void ClearPersistentCache();
694 
701  int IsValidWord(const char *word);
702  // Returns true if utf8_character is defined in the UniCharset.
703  bool IsValidCharacter(const char *utf8_character);
704 
705 
706  bool GetTextDirection(int* out_offset, float* out_slope);
707 
709  void SetDictFunc(DictFunc f);
710 
714  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
715 
717  void SetFillLatticeFunc(FillLatticeFunc f);
718 
723  bool DetectOS(OSResults*);
724 
726  void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
727  int* num_features, int* feature_outline_index);
728 
733  static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
734  int right, int bottom);
735 
740  void RunAdaptiveClassifier(TBLOB* blob,
741  int num_max_matches,
742  int* unichar_ids,
743  float* ratings,
744  int* num_matches_returned);
745 
747  const char* GetUnichar(int unichar_id);
748 
750  const Dawg *GetDawg(int i) const;
751 
753  int NumDawgs() const;
754 
756  static ROW *MakeTessOCRRow(float baseline, float xheight,
757  float descender, float ascender);
758 
760  static TBLOB *MakeTBLOB(Pix *pix);
761 
767  static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
768 
769  Tesseract* tesseract() const { return tesseract_; }
770 
771  OcrEngineMode oem() const { return last_oem_requested_; }
772 
773  void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
774 
775  void set_min_orientation_margin(double margin);
776 
781  void GetBlockTextOrientations(int** block_orientation,
782  bool** vertical_writing);
783 
785  BLOCK_LIST* FindLinesCreateBlockList();
786 
792  static void DeleteBlockList(BLOCK_LIST* block_list);
793  /* @} */
794 
795  protected:
796 
798  TESS_LOCAL bool InternalSetImage();
799 
804  TESS_LOCAL virtual bool Threshold(Pix** pix);
805 
810  TESS_LOCAL int FindLines();
811 
813  void ClearResults();
814 
820  TESS_LOCAL LTRResultIterator* GetLTRIterator();
821 
828  TESS_LOCAL int TextLength(int* blob_count);
829 
831  /* @{ */
832 
837  TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
838  int length,
839  float baseline,
840  float xheight,
841  float descender,
842  float ascender);
843 
845  TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
846  TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
847  PAGE_RES* pass1_result);
848 
850  TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
851 
856  TESS_LOCAL static int TesseractExtractResult(char** text,
857  int** lengths,
858  float** costs,
859  int** x0,
860  int** y0,
861  int** x1,
862  int** y1,
863  PAGE_RES* page_res);
864 
865  TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
866  /* @} */
867 
868 
869  protected:
876  BLOCK_LIST* block_list_;
885 
890  /* @{ */
897  /* @} */
898 
899  private:
900  // A list of image filenames gets special consideration
901  bool ProcessPagesFileList(FILE *fp,
902  STRING *buf,
903  const char* retry_config, int timeout_millisec,
904  TessResultRenderer* renderer,
905  int tessedit_page_number);
906  // TIFF supports multipage so gets special consideration.
907  bool ProcessPagesMultipageTiff(const unsigned char *data,
908  size_t size,
909  const char* filename,
910  const char* retry_config,
911  int timeout_millisec,
912  TessResultRenderer* renderer,
913  int tessedit_page_number);
914  // There's currently no way to pass a document title from the
915  // Tesseract command line, and we have multiple places that choose
916  // to set the title to an empty string. Using a single named
917  // variable will hopefully reduce confusion if the situation changes
918  // in the future.
919  const char *unknown_title_ = "";
920 }; // class TessBaseAPI.
921 
923 STRING HOcrEscape(const char* text);
924 } // namespace tesseract.
925 
926 #endif // TESSERACT_API_BASEAPI_H_
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA *> *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel *> *models)
float(Dict::* ParamsModelClassifyFunc)(const char *lang, void *path)
Definition: baseapi.h:88
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:873
void(Wordrec::* FillLatticeFunc)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: baseapi.h:90
int UNICHAR_ID
Definition: unichar.h:33
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
#define TESS_API
Definition: platform.h:81
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:879
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
struct TessBaseAPI TessBaseAPI
Definition: capi.h:83
int Init(const char *datapath, const char *language, OcrEngineMode oem)
Definition: baseapi.h:235
voidpf void uLong size
Definition: ioapi.h:39
void InitTruthCallback(TruthCallback *cb)
Definition: baseapi.h:773
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:882
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
INT_FEATURE_STRUCT * INT_FEATURE
Definition: baseapi.h:63
TruthCallback * truth_cb_
Definition: baseapi.h:884
TESS_LOCAL const PAGE_RES * GetPageRes() const
Definition: baseapi.h:865
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:872
double(Dict::* ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Definition: baseapi.h:83
int Init(const char *datapath, const char *language)
Definition: baseapi.h:238
Definition: strngs.h:45
const char int mode
Definition: ioapi.h:38
TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > TruthCallback
Definition: baseapi.h:95
Tesseract * tesseract() const
Definition: baseapi.h:769
OcrEngineMode oem() const
Definition: baseapi.h:771
void SetThresholder(ImageThresholder *thresholder)
Definition: baseapi.h:380
STRING * language_
Last initialized language.
Definition: baseapi.h:881
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2810
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:871
int(Dict::* DictFunc)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: baseapi.h:81
Definition: rect.h:30
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:880
Definition: matrix.h:563
Definition: blobs.h:261
#define TESS_LOCAL
Definition: platform.h:82
const char * filename
Definition: ioapi.h:38
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
voidpf void * buf
Definition: ioapi.h:39
struct TessResultRenderer TessResultRenderer
Definition: capi.h:77
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
Boxa * GetTextlines(Pixa **pixa, int **blockids)
Definition: baseapi.h:416
Definition: werd.h:60
Definition: ocrrow.h:32
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:875
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
Definition: baseapi.h:465