tesseract  4.00.00dev
tesseract::LTRResultIterator Class Reference

#include <ltrresultiterator.h>

Inheritance diagram for tesseract::LTRResultIterator:
tesseract::PageIterator tesseract::ResultIterator tesseract::MutableIterator

Public Member Functions

 LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~LTRResultIterator ()
 
char * GetUTF8Text (PageIteratorLevel level) const
 
void SetLineSeparator (const char *new_line)
 
void SetParagraphSeparator (const char *new_para)
 
float Confidence (PageIteratorLevel level) const
 
void RowAttributes (float *row_height, float *descenders, float *ascenders) const
 
const char * WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
 
const char * WordRecognitionLanguage () const
 
StrongScriptDirection WordDirection () const
 
bool WordIsFromDictionary () const
 
int BlanksBeforeWord () const
 
bool WordIsNumeric () const
 
bool HasBlamerInfo () const
 
const void * GetParamsTrainingBundle () const
 
const char * GetBlamerDebug () const
 
const char * GetBlamerMisadaptionDebug () const
 
bool HasTruthString () const
 
bool EquivalentToTruth (const char *str) const
 
char * WordTruthUTF8Text () const
 
char * WordNormedUTF8Text () const
 
const char * WordLattice (int *lattice_size) const
 
bool SymbolIsSuperscript () const
 
bool SymbolIsSubscript () const
 
bool SymbolIsDropcap () const
 
- Public Member Functions inherited from tesseract::PageIterator
 PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~PageIterator ()
 
 PageIterator (const PageIterator &src)
 
const PageIteratoroperator= (const PageIterator &src)
 
bool PositionedAtSameWord (const PAGE_RES_IT *other) const
 
virtual void Begin ()
 
virtual void RestartParagraph ()
 
bool IsWithinFirstTextlineOfParagraph () const
 
virtual void RestartRow ()
 
virtual bool Next (PageIteratorLevel level)
 
virtual bool IsAtBeginningOf (PageIteratorLevel level) const
 
virtual bool IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const
 
int Cmp (const PageIterator &other) const
 
void SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots)
 
bool BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBox (PageIteratorLevel level, const int padding, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool Empty (PageIteratorLevel level) const
 
PolyBlockType BlockType () const
 
Pta * BlockPolygon () const
 
Pix * GetBinaryImage (PageIteratorLevel level) const
 
Pix * GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
 
bool Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
 
void Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
 
void ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
 
bool SetWordBlamerBundle (BlamerBundle *blamer_bundle)
 

Protected Attributes

const char * line_separator_
 
const char * paragraph_separator_
 
- Protected Attributes inherited from tesseract::PageIterator
PAGE_RESpage_res_
 
Tesseracttesseract_
 
PAGE_RES_ITit_
 
WERDword_
 
int word_length_
 
int blob_index_
 
C_BLOB_IT * cblob_it_
 
bool include_upper_dots_
 
bool include_lower_dots_
 
int scale_
 
int scaled_yres_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 

Friends

class ChoiceIterator
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::PageIterator
TESS_LOCAL void BeginWord (int offset)
 

Detailed Description

Definition at line 46 of file ltrresultiterator.h.

Constructor & Destructor Documentation

◆ LTRResultIterator()

tesseract::LTRResultIterator::LTRResultIterator ( PAGE_RES page_res,
Tesseract tesseract,
int  scale,
int  scaled_yres,
int  rect_left,
int  rect_top,
int  rect_width,
int  rect_height 
)

Definition at line 30 of file ltrresultiterator.cpp.

34  : PageIterator(page_res, tesseract, scale, scaled_yres,
35  rect_left, rect_top, rect_width, rect_height),
36  line_separator_("\n"),
37  paragraph_separator_("\n") {
38 }
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)

◆ ~LTRResultIterator()

tesseract::LTRResultIterator::~LTRResultIterator ( )
virtual

Definition at line 40 of file ltrresultiterator.cpp.

40  {
41 }

Member Function Documentation

◆ BlanksBeforeWord()

int tesseract::LTRResultIterator::BlanksBeforeWord ( ) const

Definition at line 224 of file ltrresultiterator.cpp.

224  {
225  if (it_->word() == NULL) return 1;
226  return it_->word()->word->space();
227 }
WERD * word
Definition: pageres.h:175
WERD_RES * word() const
Definition: pageres.h:736
uinT8 space()
Definition: werd.h:104

◆ Confidence()

float tesseract::LTRResultIterator::Confidence ( PageIteratorLevel  level) const

Definition at line 94 of file ltrresultiterator.cpp.

94  {
95  if (it_->word() == NULL) return 0.0f; // Already at the end!
96  float mean_certainty = 0.0f;
97  int certainty_count = 0;
98  PAGE_RES_IT res_it(*it_);
99  WERD_CHOICE* best_choice = res_it.word()->best_choice;
100  ASSERT_HOST(best_choice != NULL);
101  switch (level) {
102  case RIL_BLOCK:
103  do {
104  best_choice = res_it.word()->best_choice;
105  ASSERT_HOST(best_choice != NULL);
106  mean_certainty += best_choice->certainty();
107  ++certainty_count;
108  res_it.forward();
109  } while (res_it.block() == res_it.prev_block());
110  break;
111  case RIL_PARA:
112  do {
113  best_choice = res_it.word()->best_choice;
114  ASSERT_HOST(best_choice != NULL);
115  mean_certainty += best_choice->certainty();
116  ++certainty_count;
117  res_it.forward();
118  } while (res_it.block() == res_it.prev_block() &&
119  res_it.row()->row->para() == res_it.prev_row()->row->para());
120  break;
121  case RIL_TEXTLINE:
122  do {
123  best_choice = res_it.word()->best_choice;
124  ASSERT_HOST(best_choice != NULL);
125  mean_certainty += best_choice->certainty();
126  ++certainty_count;
127  res_it.forward();
128  } while (res_it.row() == res_it.prev_row());
129  break;
130  case RIL_WORD:
131  mean_certainty += best_choice->certainty();
132  ++certainty_count;
133  break;
134  case RIL_SYMBOL:
135  mean_certainty += best_choice->certainty(blob_index_);
136  ++certainty_count;
137  }
138  if (certainty_count > 0) {
139  mean_certainty /= certainty_count;
140  float confidence = 100 + 5 * mean_certainty;
141  if (confidence < 0.0f) confidence = 0.0f;
142  if (confidence > 100.0f) confidence = 100.0f;
143  return confidence;
144  }
145  return 0.0f;
146 }
#define ASSERT_HOST(x)
Definition: errcode.h:84
float certainty() const
Definition: ratngs.h:328
WERD_RES * word() const
Definition: pageres.h:736

◆ EquivalentToTruth()

bool tesseract::LTRResultIterator::EquivalentToTruth ( const char *  str) const

Definition at line 273 of file ltrresultiterator.cpp.

273  {
274  if (!HasTruthString()) return false;
275  ASSERT_HOST(it_->word()->uch_set != NULL);
276  WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
277  return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
278 }
BlamerBundle * blamer_bundle
Definition: pageres.h:230
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:111
WERD_RES * word() const
Definition: pageres.h:736
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ GetBlamerDebug()

const char * tesseract::LTRResultIterator::GetBlamerDebug ( ) const

Definition at line 251 of file ltrresultiterator.cpp.

251  {
252  return it_->word()->blamer_bundle->debug().string();
253 }
BlamerBundle * blamer_bundle
Definition: pageres.h:230
const char * string() const
Definition: strngs.cpp:198
const STRING & debug() const
Definition: blamer.h:116
WERD_RES * word() const
Definition: pageres.h:736

◆ GetBlamerMisadaptionDebug()

const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug ( ) const

Definition at line 257 of file ltrresultiterator.cpp.

257  {
259 }
BlamerBundle * blamer_bundle
Definition: pageres.h:230
const char * string() const
Definition: strngs.cpp:198
const STRING & misadaption_debug() const
Definition: blamer.h:119
WERD_RES * word() const
Definition: pageres.h:736

◆ GetParamsTrainingBundle()

const void * tesseract::LTRResultIterator::GetParamsTrainingBundle ( ) const

Definition at line 244 of file ltrresultiterator.cpp.

244  {
245  return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
246  &(it_->word()->blamer_bundle->params_training_bundle()) : NULL;
247 }
BlamerBundle * blamer_bundle
Definition: pageres.h:230
const tesseract::ParamsTrainingBundle & params_training_bundle() const
Definition: blamer.h:150
WERD_RES * word() const
Definition: pageres.h:736

◆ GetUTF8Text()

char * tesseract::LTRResultIterator::GetUTF8Text ( PageIteratorLevel  level) const

Definition at line 45 of file ltrresultiterator.cpp.

45  {
46  if (it_->word() == NULL) return NULL; // Already at the end!
47  STRING text;
48  PAGE_RES_IT res_it(*it_);
49  WERD_CHOICE* best_choice = res_it.word()->best_choice;
50  ASSERT_HOST(best_choice != NULL);
51  if (level == RIL_SYMBOL) {
52  text = res_it.word()->BestUTF8(blob_index_, false);
53  } else if (level == RIL_WORD) {
54  text = best_choice->unichar_string();
55  } else {
56  bool eol = false; // end of line?
57  bool eop = false; // end of paragraph?
58  do { // for each paragraph in a block
59  do { // for each text line in a paragraph
60  do { // for each word in a text line
61  best_choice = res_it.word()->best_choice;
62  ASSERT_HOST(best_choice != NULL);
63  text += best_choice->unichar_string();
64  text += " ";
65  res_it.forward();
66  eol = res_it.row() != res_it.prev_row();
67  } while (!eol);
68  text.truncate_at(text.length() - 1);
69  text += line_separator_;
70  eop = res_it.block() != res_it.prev_block() ||
71  res_it.row()->row->para() != res_it.prev_row()->row->para();
72  } while (level != RIL_TEXTLINE && !eop);
73  if (eop) text += paragraph_separator_;
74  } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
75  }
76  int length = text.length() + 1;
77  char* result = new char[length];
78  strncpy(result, text.string(), length);
79  return result;
80 }
void truncate_at(inT32 index)
Definition: strngs.cpp:269
const char * string() const
Definition: strngs.cpp:198
inT32 length() const
Definition: strngs.cpp:193
#define ASSERT_HOST(x)
Definition: errcode.h:84
Definition: strngs.h:45
const STRING & unichar_string() const
Definition: ratngs.h:539
WERD_RES * word() const
Definition: pageres.h:736

◆ HasBlamerInfo()

bool tesseract::LTRResultIterator::HasBlamerInfo ( ) const

Definition at line 237 of file ltrresultiterator.cpp.

237  {
238  return it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
240 }
BlamerBundle * blamer_bundle
Definition: pageres.h:230
bool HasDebugInfo() const
Definition: blamer.h:113
WERD_RES * word() const
Definition: pageres.h:736

◆ HasTruthString()

bool tesseract::LTRResultIterator::HasTruthString ( ) const

Definition at line 262 of file ltrresultiterator.cpp.

262  {
263  if (it_->word() == NULL) return false; // Already at the end!
264  if (it_->word()->blamer_bundle == NULL ||
265  it_->word()->blamer_bundle->NoTruth()) {
266  return false; // no truth information for this word
267  }
268  return true;
269 }
BlamerBundle * blamer_bundle
Definition: pageres.h:230
bool NoTruth() const
Definition: blamer.h:109
WERD_RES * word() const
Definition: pageres.h:736

◆ RowAttributes()

void tesseract::LTRResultIterator::RowAttributes ( float *  row_height,
float *  descenders,
float *  ascenders 
) const

Definition at line 148 of file ltrresultiterator.cpp.

149  {
150  *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
151  it_->row()->row->descenders();
152  *descenders = it_->row()->row->descenders();
153  *ascenders = it_->row()->row->ascenders();
154 }
ROW * row
Definition: pageres.h:127
float x_height() const
Definition: ocrrow.h:61
ROW_RES * row() const
Definition: pageres.h:739
float ascenders() const
Definition: ocrrow.h:79
float descenders() const
Definition: ocrrow.h:82

◆ SetLineSeparator()

void tesseract::LTRResultIterator::SetLineSeparator ( const char *  new_line)

Definition at line 83 of file ltrresultiterator.cpp.

83  {
85 }
#define new_line()
Definition: cutil.h:83

◆ SetParagraphSeparator()

void tesseract::LTRResultIterator::SetParagraphSeparator ( const char *  new_para)

Definition at line 88 of file ltrresultiterator.cpp.

88  {
89  paragraph_separator_ = new_para;
90 }

◆ SymbolIsDropcap()

bool tesseract::LTRResultIterator::SymbolIsDropcap ( ) const

Definition at line 339 of file ltrresultiterator.cpp.

339  {
340  if (cblob_it_ == NULL && it_->word() != NULL)
342  return false;
343 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:320
WERD_RES * word() const
Definition: pageres.h:736

◆ SymbolIsSubscript()

bool tesseract::LTRResultIterator::SymbolIsSubscript ( ) const

Definition at line 330 of file ltrresultiterator.cpp.

330  {
331  if (cblob_it_ == NULL && it_->word() != NULL)
333  return false;
334 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:320
WERD_RES * word() const
Definition: pageres.h:736

◆ SymbolIsSuperscript()

bool tesseract::LTRResultIterator::SymbolIsSuperscript ( ) const

Definition at line 320 of file ltrresultiterator.cpp.

320  {
321  if (cblob_it_ == NULL && it_->word() != NULL)
324  return false;
325 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:320
WERD_RES * word() const
Definition: pageres.h:736

◆ WordDirection()

StrongScriptDirection tesseract::LTRResultIterator::WordDirection ( ) const

Definition at line 202 of file ltrresultiterator.cpp.

202  {
203  if (it_->word() == NULL) return DIR_NEUTRAL;
204  bool has_rtl = it_->word()->AnyRtlCharsInWord();
205  bool has_ltr = it_->word()->AnyLtrCharsInWord();
206  if (has_rtl && !has_ltr)
207  return DIR_RIGHT_TO_LEFT;
208  if (has_ltr && !has_rtl)
209  return DIR_LEFT_TO_RIGHT;
210  if (!has_ltr && !has_rtl)
211  return DIR_NEUTRAL;
212  return DIR_MIX;
213 }
bool AnyRtlCharsInWord() const
Definition: pageres.h:375
WERD_RES * word() const
Definition: pageres.h:736
bool AnyLtrCharsInWord() const
Definition: pageres.h:392

◆ WordFontAttributes()

const char * tesseract::LTRResultIterator::WordFontAttributes ( bool *  is_bold,
bool *  is_italic,
bool *  is_underlined,
bool *  is_monospace,
bool *  is_serif,
bool *  is_smallcaps,
int pointsize,
int font_id 
) const

Definition at line 164 of file ltrresultiterator.cpp.

171  {
172  if (it_->word() == NULL) return NULL; // Already at the end!
173  if (it_->word()->fontinfo == NULL) {
174  *font_id = -1;
175  return NULL; // No font information.
176  }
177  const FontInfo& font_info = *it_->word()->fontinfo;
178  *font_id = font_info.universal_id;
179  *is_bold = font_info.is_bold();
180  *is_italic = font_info.is_italic();
181  *is_underlined = false; // TODO(rays) fix this!
182  *is_monospace = font_info.is_fixed_pitch();
183  *is_serif = font_info.is_serif();
184  *is_smallcaps = it_->word()->small_caps;
185  float row_height = it_->row()->row->x_height() +
186  it_->row()->row->ascenders() - it_->row()->row->descenders();
187  // Convert from pixels to printers points.
188  *pointsize = scaled_yres_ > 0
189  ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
190  : 0;
191 
192  return font_info.name;
193 }
bool is_bold() const
Definition: fontinfo.h:112
ROW * row
Definition: pageres.h:127
float x_height() const
Definition: ocrrow.h:61
ROW_RES * row() const
Definition: pageres.h:739
const FontInfo * fontinfo
Definition: pageres.h:288
float ascenders() const
Definition: ocrrow.h:79
bool small_caps
Definition: pageres.h:283
bool is_italic() const
Definition: fontinfo.h:111
WERD_RES * word() const
Definition: pageres.h:736
bool is_fixed_pitch() const
Definition: fontinfo.h:113
const int kPointsPerInch
Definition: publictypes.h:33
bool is_serif() const
Definition: fontinfo.h:114
float descenders() const
Definition: ocrrow.h:82

◆ WordIsFromDictionary()

bool tesseract::LTRResultIterator::WordIsFromDictionary ( ) const

Definition at line 216 of file ltrresultiterator.cpp.

216  {
217  if (it_->word() == NULL) return false; // Already at the end!
218  int permuter = it_->word()->best_choice->permuter();
219  return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
220  permuter == USER_DAWG_PERM;
221 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
uinT8 permuter() const
Definition: ratngs.h:344
WERD_RES * word() const
Definition: pageres.h:736

◆ WordIsNumeric()

bool tesseract::LTRResultIterator::WordIsNumeric ( ) const

Definition at line 230 of file ltrresultiterator.cpp.

230  {
231  if (it_->word() == NULL) return false; // Already at the end!
232  int permuter = it_->word()->best_choice->permuter();
233  return permuter == NUMBER_PERM;
234 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
uinT8 permuter() const
Definition: ratngs.h:344
WERD_RES * word() const
Definition: pageres.h:736

◆ WordLattice()

const char * tesseract::LTRResultIterator::WordLattice ( int lattice_size) const

Definition at line 310 of file ltrresultiterator.cpp.

310  {
311  if (it_->word() == NULL) return NULL; // Already at the end!
312  if (it_->word()->blamer_bundle == NULL) return NULL;
313  *lattice_size = it_->word()->blamer_bundle->lattice_size();
314  return it_->word()->blamer_bundle->lattice_data();
315 }
const char * lattice_data() const
Definition: blamer.h:138
int lattice_size() const
Definition: blamer.h:141
BlamerBundle * blamer_bundle
Definition: pageres.h:230
WERD_RES * word() const
Definition: pageres.h:736

◆ WordNormedUTF8Text()

char * tesseract::LTRResultIterator::WordNormedUTF8Text ( ) const

Definition at line 293 of file ltrresultiterator.cpp.

293  {
294  if (it_->word() == NULL) return NULL; // Already at the end!
295  STRING ocr_text;
296  WERD_CHOICE* best_choice = it_->word()->best_choice;
297  const UNICHARSET *unicharset = it_->word()->uch_set;
298  ASSERT_HOST(best_choice != NULL);
299  for (int i = 0; i < best_choice->length(); ++i) {
300  ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
301  }
302  int length = ocr_text.length() + 1;
303  char* result = new char[length];
304  strncpy(result, ocr_text.string(), length);
305  return result;
306 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
const char * string() const
Definition: strngs.cpp:198
inT32 length() const
Definition: strngs.cpp:193
#define ASSERT_HOST(x)
Definition: errcode.h:84
Definition: strngs.h:45
WERD_RES * word() const
Definition: pageres.h:736
const UNICHARSET * uch_set
Definition: pageres.h:192
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:788

◆ WordRecognitionLanguage()

const char * tesseract::LTRResultIterator::WordRecognitionLanguage ( ) const

Definition at line 196 of file ltrresultiterator.cpp.

196  {
197  if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL;
198  return it_->word()->tesseract->lang.string();
199 }
const char * string() const
Definition: strngs.cpp:198
tesseract::Tesseract * tesseract
Definition: pageres.h:266
STRING lang
Definition: ccutil.h:66
WERD_RES * word() const
Definition: pageres.h:736

◆ WordTruthUTF8Text()

char * tesseract::LTRResultIterator::WordTruthUTF8Text ( ) const

Definition at line 282 of file ltrresultiterator.cpp.

282  {
283  if (!HasTruthString()) return NULL;
284  STRING truth_text = it_->word()->blamer_bundle->TruthString();
285  int length = truth_text.length() + 1;
286  char* result = new char[length];
287  strncpy(result, truth_text.string(), length);
288  return result;
289 }
BlamerBundle * blamer_bundle
Definition: pageres.h:230
const char * string() const
Definition: strngs.cpp:198
inT32 length() const
Definition: strngs.cpp:193
Definition: strngs.h:45
WERD_RES * word() const
Definition: pageres.h:736
STRING TruthString() const
Definition: blamer.h:100

Friends And Related Function Documentation

◆ ChoiceIterator

friend class ChoiceIterator
friend

Definition at line 47 of file ltrresultiterator.h.

Member Data Documentation

◆ line_separator_

const char* tesseract::LTRResultIterator::line_separator_
protected

Definition at line 183 of file ltrresultiterator.h.

◆ paragraph_separator_

const char* tesseract::LTRResultIterator::paragraph_separator_
protected

Definition at line 184 of file ltrresultiterator.h.


The documentation for this class was generated from the following files: