tesseract  4.00.00dev
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

Public Member Functions

 WERD_RES ()
 
 WERD_RES (WERD *the_word)
 
 WERD_RES (const WERD_RES &source)
 
 ~WERD_RES ()
 
const char * BestUTF8 (int blob_index, bool in_rtl_context) const
 
const char * RawUTF8 (int blob_index) const
 
UNICHARSET::Direction SymbolDirection (int blob_index) const
 
bool AnyRtlCharsInWord () const
 
bool AnyLtrCharsInWord () const
 
bool UnicharsInReadingOrder () const
 
void InitNonPointers ()
 
void InitPointers ()
 
void Clear ()
 
void ClearResults ()
 
void ClearWordChoices ()
 
void ClearRatings ()
 
WERD_RESoperator= (const WERD_RES &source)
 
void CopySimpleFields (const WERD_RES &source)
 
void InitForRetryRecognition (const WERD_RES &source)
 
bool SetupForRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
 
void SetupBasicsFromChoppedWord (const UNICHARSET &unicharset_in)
 
void SetupFake (const UNICHARSET &uch)
 
void SetupWordScript (const UNICHARSET &unicharset_in)
 
void SetupBlamerBundle ()
 
void SetupBlobWidthsAndGaps ()
 
void InsertSeam (int blob_number, SEAM *seam)
 
bool AlternativeChoiceAdjustmentsWorseThan (float threshold) const
 
bool IsAmbiguous ()
 
bool StatesAllValid ()
 
void DebugWordChoices (bool debug, const char *word_to_debug)
 
void DebugTopChoice (const char *msg) const
 
void FilterWordChoices (int debug_level)
 
void ComputeAdaptionThresholds (float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
 
bool LogNewRawChoice (WERD_CHOICE *word_choice)
 
bool LogNewCookedChoice (int max_num_choices, bool debug, WERD_CHOICE *word_choice)
 
void PrintBestChoices () const
 
int GetBlobsWidth (int start_blob, int last_blob)
 
int GetBlobsGap (int blob_index)
 
BLOB_CHOICEGetBlobChoice (int index) const
 
BLOB_CHOICE_LIST * GetBlobChoices (int index) const
 
void ConsumeWordResults (WERD_RES *word)
 
void ReplaceBestChoice (WERD_CHOICE *choice)
 
void RebuildBestState ()
 
void CloneChoppedToRebuild ()
 
void SetupBoxWord ()
 
void SetScriptPositions ()
 
void SetAllScriptPositions (tesseract::ScriptPos position)
 
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
 
void FakeWordFromRatings (PermuterType permuter)
 
void BestChoiceToCorrectText ()
 
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
 
void MergeAdjacentBlobs (int index)
 
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
 
void fix_quotes ()
 
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
 
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
 
void fix_hyphens ()
 
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
 
void merge_tess_fails ()
 
void copy_on (WERD_RES *word_res)
 
bool PiecesAllNatural (int start, int count) const
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)
 

Public Attributes

WERDword
 
tesseract::BoxWordbln_boxes
 
ROWblob_row
 
DENORM denorm
 
const UNICHARSETuch_set
 
TWERDchopped_word
 
GenericVector< SEAM * > seam_array
 
GenericVector< intblob_widths
 
GenericVector< intblob_gaps
 
MATRIXratings
 
WERD_CHOICEbest_choice
 
WERD_CHOICEraw_choice
 
WERD_CHOICE_LIST best_choices
 
BlamerBundleblamer_bundle
 
TWERDrebuild_word
 
tesseract::BoxWordbox_word
 
GenericVector< intbest_state
 
GenericVector< STRINGcorrect_text
 
tesseract::Tesseracttesseract
 
WERD_CHOICEep_choice
 
REJMAP reject_map
 
BOOL8 tess_failed
 
BOOL8 tess_accepted
 
BOOL8 tess_would_adapt
 
BOOL8 done
 
bool small_caps
 
bool odd_size
 
inT8 italic
 
inT8 bold
 
const FontInfofontinfo
 
const FontInfofontinfo2
 
inT8 fontinfo_id_count
 
inT8 fontinfo_id2_count
 
BOOL8 guessed_x_ht
 
BOOL8 guessed_caps_ht
 
CRUNCH_MODE unlv_crunch_mode
 
float x_height
 
float caps_height
 
float baseline_shift
 
float space_certainty
 
BOOL8 combination
 
BOOL8 part_of_combo
 
BOOL8 reject_spaces
 

Detailed Description

Definition at line 155 of file pageres.h.

Constructor & Destructor Documentation

◆ WERD_RES() [1/3]

WERD_RES::WERD_RES ( )
inline

Definition at line 322 of file pageres.h.

322  {
323  InitNonPointers();
324  InitPointers();
325  }
void InitPointers()
Definition: pageres.cpp:1117
void InitNonPointers()
Definition: pageres.cpp:1089

◆ WERD_RES() [2/3]

WERD_RES::WERD_RES ( WERD the_word)
inline

Definition at line 326 of file pageres.h.

326  {
327  InitNonPointers();
328  InitPointers();
329  word = the_word;
330  }
void InitPointers()
Definition: pageres.cpp:1117
WERD * word
Definition: pageres.h:175
void InitNonPointers()
Definition: pageres.cpp:1089

◆ WERD_RES() [3/3]

WERD_RES::WERD_RES ( const WERD_RES source)
inline

Definition at line 333 of file pageres.h.

333  : ELIST_LINK(source) {
334  InitPointers();
335  *this = source; // see operator=
336  }
void InitPointers()
Definition: pageres.cpp:1117
ELIST_LINK()
Definition: elst.h:92

◆ ~WERD_RES()

WERD_RES::~WERD_RES ( )

Definition at line 1085 of file pageres.cpp.

1085  {
1086  Clear();
1087 }
void Clear()
Definition: pageres.cpp:1132

Member Function Documentation

◆ AlternativeChoiceAdjustmentsWorseThan()

bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan ( float  threshold) const

Definition at line 431 of file pageres.cpp.

431  {
432  // The choices are not changed by this iteration.
433  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
434  for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
435  WERD_CHOICE* choice = wc_it.data();
436  if (choice->adjust_factor() <= threshold)
437  return false;
438  }
439  return true;
440 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
float adjust_factor() const
Definition: ratngs.h:304

◆ AnyLtrCharsInWord()

bool WERD_RES::AnyLtrCharsInWord ( ) const
inline

Definition at line 392 of file pageres.h.

392  {
393  if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
394  return false;
395  for (int id = 0; id < best_choice->length(); id++) {
396  int unichar_id = best_choice->unichar_id(id);
397  if (unichar_id < 0 || unichar_id >= uch_set->size())
398  continue; // Ignore illegal chars.
399  UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
400  if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
401  return true;
402  }
403  return false;
404  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:650
int size() const
Definition: unicharset.h:299
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ AnyRtlCharsInWord()

bool WERD_RES::AnyRtlCharsInWord ( ) const
inline

Definition at line 375 of file pageres.h.

375  {
376  if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
377  return false;
378  for (int id = 0; id < best_choice->length(); id++) {
379  int unichar_id = best_choice->unichar_id(id);
380  if (unichar_id < 0 || unichar_id >= uch_set->size())
381  continue; // Ignore illegal chars.
383  uch_set->get_direction(unichar_id);
384  if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
387  return true;
388  }
389  return false;
390  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:650
int size() const
Definition: unicharset.h:299
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ BestChoiceToCorrectText()

void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 918 of file pageres.cpp.

918  {
920  ASSERT_HOST(best_choice != NULL);
921  for (int i = 0; i < best_choice->length(); ++i) {
922  UNICHAR_ID choice_id = best_choice->unichar_id(i);
923  const char* blob_choice = uch_set->id_to_unichar(choice_id);
924  correct_text.push_back(STRING(blob_choice));
925  }
926 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
int UNICHAR_ID
Definition: unichar.h:33
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
GenericVector< STRING > correct_text
Definition: pageres.h:259
int push_back(T object)
#define ASSERT_HOST(x)
Definition: errcode.h:84
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
Definition: strngs.h:45
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ BestUTF8()

const char* WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const
inline

Definition at line 345 of file pageres.h.

345  {
346  if (blob_index < 0 || best_choice == NULL ||
347  blob_index >= best_choice->length())
348  return NULL;
349  UNICHAR_ID id = best_choice->unichar_id(blob_index);
350  if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
351  return NULL;
352  UNICHAR_ID mirrored = uch_set->get_mirror(id);
353  if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
354  id = mirrored;
355  return uch_set->id_to_unichar_ext(id);
356  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
int UNICHAR_ID
Definition: unichar.h:33
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:274
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
Definition: unicharset.h:657
int size() const
Definition: unicharset.h:299
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ BothHyphens()

UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1025 of file pageres.cpp.

1025  {
1026  const char *ch = uch_set->id_to_unichar(id1);
1027  const char *next_ch = uch_set->id_to_unichar(id2);
1028  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
1029  (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
1030  return uch_set->unichar_to_id("-");
1031  return INVALID_UNICHAR_ID;
1032 }
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
const UNICHARSET * uch_set
Definition: pageres.h:192
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194

◆ BothQuotes()

UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1003 of file pageres.cpp.

1003  {
1004  const char *ch = uch_set->id_to_unichar(id1);
1005  const char *next_ch = uch_set->id_to_unichar(id2);
1006  if (is_simple_quote(ch, strlen(ch)) &&
1007  is_simple_quote(next_ch, strlen(next_ch)))
1008  return uch_set->unichar_to_id("\"");
1009  return INVALID_UNICHAR_ID;
1010 }
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
const UNICHARSET * uch_set
Definition: pageres.h:192
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194

◆ BothSpaces()

UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1054 of file pageres.cpp.

1054  {
1055  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
1056  return id1;
1057  else
1058  return INVALID_UNICHAR_ID;
1059 }
const UNICHARSET * uch_set
Definition: pageres.h:192
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194

◆ Clear()

void WERD_RES::Clear ( )

Definition at line 1132 of file pageres.cpp.

1132  {
1133  if (word != NULL && combination) {
1134  delete word;
1135  }
1136  word = NULL;
1137  delete blamer_bundle;
1138  blamer_bundle = NULL;
1139  ClearResults();
1140 }
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void ClearResults()
Definition: pageres.cpp:1142
BOOL8 combination
Definition: pageres.h:318
WERD * word
Definition: pageres.h:175

◆ ClearRatings()

void WERD_RES::ClearRatings ( )

Definition at line 1187 of file pageres.cpp.

1187  {
1188  if (ratings != NULL) {
1190  delete ratings;
1191  ratings = NULL;
1192  }
1193 }
MATRIX * ratings
Definition: pageres.h:215
void delete_matrix_pointers()
Definition: matrix.h:447

◆ ClearResults()

void WERD_RES::ClearResults ( )

Definition at line 1142 of file pageres.cpp.

1142  {
1143  done = false;
1144  fontinfo = NULL;
1145  fontinfo2 = NULL;
1146  fontinfo_id_count = 0;
1147  fontinfo_id2_count = 0;
1148  if (bln_boxes != NULL) {
1149  delete bln_boxes;
1150  bln_boxes = NULL;
1151  }
1152  blob_row = NULL;
1153  if (chopped_word != NULL) {
1154  delete chopped_word;
1155  chopped_word = NULL;
1156  }
1157  if (rebuild_word != NULL) {
1158  delete rebuild_word;
1159  rebuild_word = NULL;
1160  }
1161  if (box_word != NULL) {
1162  delete box_word;
1163  box_word = NULL;
1164  }
1165  best_state.clear();
1166  correct_text.clear();
1168  seam_array.clear();
1169  blob_widths.clear();
1170  blob_gaps.clear();
1171  ClearRatings();
1172  ClearWordChoices();
1173  if (blamer_bundle != NULL) blamer_bundle->ClearResults();
1174 }
GenericVector< int > best_state
Definition: pageres.h:255
const FontInfo * fontinfo2
Definition: pageres.h:289
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void ClearRatings()
Definition: pageres.cpp:1187
GenericVector< STRING > correct_text
Definition: pageres.h:259
TWERD * rebuild_word
Definition: pageres.h:244
GenericVector< int > blob_widths
Definition: pageres.h:205
tesseract::BoxWord * box_word
Definition: pageres.h:250
void ClearResults()
Definition: blamer.h:173
const FontInfo * fontinfo
Definition: pageres.h:288
void ClearWordChoices()
Definition: pageres.cpp:1175
inT8 fontinfo_id2_count
Definition: pageres.h:291
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
inT8 fontinfo_id_count
Definition: pageres.h:290
GenericVector< int > blob_gaps
Definition: pageres.h:208
void delete_data_pointers()
TWERD * chopped_word
Definition: pageres.h:201
ROW * blob_row
Definition: pageres.h:186
BOOL8 done
Definition: pageres.h:282
GenericVector< SEAM * > seam_array
Definition: pageres.h:203

◆ ClearWordChoices()

void WERD_RES::ClearWordChoices ( )

Definition at line 1175 of file pageres.cpp.

1175  {
1176  best_choice = NULL;
1177  if (raw_choice != NULL) {
1178  delete raw_choice;
1179  raw_choice = NULL;
1180  }
1181  best_choices.clear();
1182  if (ep_choice != NULL) {
1183  delete ep_choice;
1184  ep_choice = NULL;
1185  }
1186 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * raw_choice
Definition: pageres.h:224
WERD_CHOICE * ep_choice
Definition: pageres.h:270

◆ CloneChoppedToRebuild()

void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 828 of file pageres.cpp.

828  {
829  if (rebuild_word != NULL)
830  delete rebuild_word;
832  SetupBoxWord();
833  int word_len = box_word->length();
834  best_state.reserve(word_len);
835  correct_text.reserve(word_len);
836  for (int i = 0; i < word_len; ++i) {
839  }
840 }
void reserve(int size)
GenericVector< int > best_state
Definition: pageres.h:255
GenericVector< STRING > correct_text
Definition: pageres.h:259
int push_back(T object)
TWERD * rebuild_word
Definition: pageres.h:244
tesseract::BoxWord * box_word
Definition: pageres.h:250
Definition: blobs.h:395
Definition: strngs.h:45
void SetupBoxWord()
Definition: pageres.cpp:843
TWERD * chopped_word
Definition: pageres.h:201
int length() const
Definition: boxword.h:85

◆ ComputeAdaptionThresholds()

void WERD_RES::ComputeAdaptionThresholds ( float  certainty_scale,
float  min_rating,
float  max_rating,
float  rating_margin,
float *  thresholds 
)

Definition at line 553 of file pageres.cpp.

557  {
558  int chunk = 0;
559  int end_chunk = best_choice->state(0);
560  int end_raw_chunk = raw_choice->state(0);
561  int raw_blob = 0;
562  for (int i = 0; i < best_choice->length(); i++, thresholds++) {
563  float avg_rating = 0.0f;
564  int num_error_chunks = 0;
565 
566  // For each chunk in best choice blob i, count non-matching raw results.
567  while (chunk < end_chunk) {
568  if (chunk >= end_raw_chunk) {
569  ++raw_blob;
570  end_raw_chunk += raw_choice->state(raw_blob);
571  }
572  if (best_choice->unichar_id(i) !=
573  raw_choice->unichar_id(raw_blob)) {
574  avg_rating += raw_choice->certainty(raw_blob);
575  ++num_error_chunks;
576  }
577  ++chunk;
578  }
579 
580  if (num_error_chunks > 0) {
581  avg_rating /= num_error_chunks;
582  *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
583  } else {
584  *thresholds = max_rating;
585  }
586 
587  if (*thresholds > max_rating)
588  *thresholds = max_rating;
589  if (*thresholds < min_rating)
590  *thresholds = min_rating;
591  }
592 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
WERD_CHOICE * raw_choice
Definition: pageres.h:224
float certainty() const
Definition: ratngs.h:328
int state(int index) const
Definition: ratngs.h:317

◆ ConditionalBlobMerge()

bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX &> *  box_cb 
)

Definition at line 933 of file pageres.cpp.

935  {
936  ASSERT_HOST(best_choice->length() == 0 || ratings != NULL);
937  bool modified = false;
938  for (int i = 0; i + 1 < best_choice->length(); ++i) {
939  UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
940  best_choice->unichar_id(i+1));
941  if (new_id != INVALID_UNICHAR_ID &&
942  (box_cb == NULL || box_cb->Run(box_word->BlobBox(i),
943  box_word->BlobBox(i + 1)))) {
944  // Raw choice should not be fixed.
945  best_choice->set_unichar_id(new_id, i);
946  modified = true;
948  const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
949  if (!coord.Valid(*ratings)) {
950  ratings->IncreaseBandSize(coord.row + 1 - coord.col);
951  }
952  BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
953  if (FindMatchingChoice(new_id, blob_choices) == NULL) {
954  // Insert a fake result.
955  BLOB_CHOICE* blob_choice = new BLOB_CHOICE;
956  blob_choice->set_unichar_id(new_id);
957  BLOB_CHOICE_IT bc_it(blob_choices);
958  bc_it.add_before_then_move(blob_choice);
959  }
960  }
961  }
962  delete class_cb;
963  delete box_cb;
964  return modified;
965 }
const TBOX & BlobBox(int index) const
Definition: boxword.h:86
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
int UNICHAR_ID
Definition: unichar.h:33
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
virtual R Run(A1, A2)=0
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:280
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:969
tesseract::BoxWord * box_word
Definition: pageres.h:250
#define ASSERT_HOST(x)
Definition: errcode.h:84
MATRIX * ratings
Definition: pageres.h:215
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:144
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:357
bool Valid(const MATRIX &m) const
Definition: matrix.h:601
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:751
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:160
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:49

◆ ConsumeWordResults()

void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 757 of file pageres.cpp.

757  {
758  denorm = word->denorm;
759  blob_row = word->blob_row;
760  MovePointerData(&chopped_word, &word->chopped_word);
761  MovePointerData(&rebuild_word, &word->rebuild_word);
762  MovePointerData(&box_word, &word->box_word);
764  seam_array = word->seam_array;
765  word->seam_array.clear();
766  best_state.move(&word->best_state);
768  blob_widths.move(&word->blob_widths);
769  blob_gaps.move(&word->blob_gaps);
770  if (ratings != NULL) ratings->delete_matrix_pointers();
771  MovePointerData(&ratings, &word->ratings);
772  best_choice = word->best_choice;
773  MovePointerData(&raw_choice, &word->raw_choice);
774  best_choices.clear();
775  WERD_CHOICE_IT wc_it(&best_choices);
776  wc_it.add_list_after(&word->best_choices);
777  reject_map = word->reject_map;
778  if (word->blamer_bundle != NULL) {
779  assert(blamer_bundle != NULL);
781  }
782  CopySimpleFields(*word);
783 }
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:194
GenericVector< int > best_state
Definition: pageres.h:255
WERD_CHOICE * best_choice
Definition: pageres.h:219
BlamerBundle * blamer_bundle
Definition: pageres.h:230
GenericVector< STRING > correct_text
Definition: pageres.h:259
TWERD * rebuild_word
Definition: pageres.h:244
void move(GenericVector< T > *from)
GenericVector< int > blob_widths
Definition: pageres.h:205
tesseract::BoxWord * box_word
Definition: pageres.h:250
MATRIX * ratings
Definition: pageres.h:215
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * raw_choice
Definition: pageres.h:224
DENORM denorm
Definition: pageres.h:190
void delete_matrix_pointers()
Definition: matrix.h:447
GenericVector< int > blob_gaps
Definition: pageres.h:208
void delete_data_pointers()
TWERD * chopped_word
Definition: pageres.h:201
REJMAP reject_map
Definition: pageres.h:271
ROW * blob_row
Definition: pageres.h:186
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:241
GenericVector< SEAM * > seam_array
Definition: pageres.h:203

◆ copy_on()

void WERD_RES::copy_on ( WERD_RES word_res)
inline

Definition at line 644 of file pageres.h.

644  { //from this word
645  word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
646  word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
647  word->copy_on(word_res->word);
648  }
Definition: werd.h:36
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
Definition: werd.h:35
WERD * word
Definition: pageres.h:175
void copy_on(WERD *other)
Definition: werd.cpp:234

◆ CopySimpleFields()

void WERD_RES::CopySimpleFields ( const WERD_RES source)

Definition at line 241 of file pageres.cpp.

241  {
242  tess_failed = source.tess_failed;
243  tess_accepted = source.tess_accepted;
245  done = source.done;
247  small_caps = source.small_caps;
248  odd_size = source.odd_size;
249  italic = source.italic;
250  bold = source.bold;
251  fontinfo = source.fontinfo;
252  fontinfo2 = source.fontinfo2;
255  x_height = source.x_height;
256  caps_height = source.caps_height;
258  guessed_x_ht = source.guessed_x_ht;
260  reject_spaces = source.reject_spaces;
261  uch_set = source.uch_set;
262  tesseract = source.tesseract;
263 }
float baseline_shift
Definition: pageres.h:297
BOOL8 tess_failed
Definition: pageres.h:272
const FontInfo * fontinfo2
Definition: pageres.h:289
inT8 bold
Definition: pageres.h:286
BOOL8 guessed_caps_ht
Definition: pageres.h:293
inT8 italic
Definition: pageres.h:285
tesseract::Tesseract * tesseract
Definition: pageres.h:266
BOOL8 guessed_x_ht
Definition: pageres.h:292
BOOL8 reject_spaces
Definition: pageres.h:320
float caps_height
Definition: pageres.h:296
const FontInfo * fontinfo
Definition: pageres.h:288
BOOL8 tess_would_adapt
Definition: pageres.h:281
BOOL8 tess_accepted
Definition: pageres.h:280
bool odd_size
Definition: pageres.h:284
inT8 fontinfo_id2_count
Definition: pageres.h:291
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
inT8 fontinfo_id_count
Definition: pageres.h:290
bool small_caps
Definition: pageres.h:283
const UNICHARSET * uch_set
Definition: pageres.h:192
float x_height
Definition: pageres.h:295
BOOL8 done
Definition: pageres.h:282

◆ DebugTopChoice()

void WERD_RES::DebugTopChoice ( const char *  msg) const

Definition at line 491 of file pageres.cpp.

491  {
492  tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
494  if (best_choice == NULL)
495  tprintf("<Null choice>\n");
496  else
497  best_choice->print(msg);
498 }
void print() const
Definition: ratngs.h:578
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
BOOL8 tess_would_adapt
Definition: pageres.h:281
BOOL8 tess_accepted
Definition: pageres.h:280
BOOL8 done
Definition: pageres.h:282

◆ DebugWordChoices()

void WERD_RES::DebugWordChoices ( bool  debug,
const char *  word_to_debug 
)

Definition at line 472 of file pageres.cpp.

472  {
473  if (debug ||
474  (word_to_debug != NULL && *word_to_debug != '\0' && best_choice != NULL &&
475  best_choice->unichar_string() == STRING(word_to_debug))) {
476  if (raw_choice != NULL)
477  raw_choice->print("\nBest Raw Choice");
478 
479  WERD_CHOICE_IT it(&best_choices);
480  int index = 0;
481  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
482  WERD_CHOICE* choice = it.data();
483  STRING label;
484  label.add_str_int("\nCooked Choice #", index);
485  choice->print(label.string());
486  }
487  }
488 }
void add_str_int(const char *str, int number)
Definition: strngs.cpp:381
void print() const
Definition: ratngs.h:578
WERD_CHOICE * best_choice
Definition: pageres.h:219
const char * string() const
Definition: strngs.cpp:198
Definition: strngs.h:45
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * raw_choice
Definition: pageres.h:224
const STRING & unichar_string() const
Definition: ratngs.h:539

◆ deep_copy()

static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src)
inlinestatic

Definition at line 633 of file pageres.h.

633  {
634  WERD_RES* result = new WERD_RES(*src);
635  // That didn't copy the ratings, but we want a copy if there is one to
636  // begin with.
637  if (src->ratings != NULL)
638  result->ratings = src->ratings->DeepCopy();
639  return result;
640  }
MATRIX * ratings
Definition: pageres.h:215
WERD_RES()
Definition: pageres.h:322
MATRIX * DeepCopy() const
Definition: matrix.cpp:94

◆ FakeClassifyWord()

void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 872 of file pageres.cpp.

872  {
873  // Setup the WERD_RES.
874  ASSERT_HOST(box_word != NULL);
875  ASSERT_HOST(blob_count == box_word->length());
877  ClearRatings();
878  ratings = new MATRIX(blob_count, 1);
879  for (int c = 0; c < blob_count; ++c) {
880  BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
881  BLOB_CHOICE_IT choice_it(choice_list);
882  choice_it.add_after_then_move(choices[c]);
883  ratings->put(c, c, choice_list);
884  }
886  reject_map.initialise(blob_count);
887  best_state.init_to_size(blob_count, 1);
888  done = true;
889 }
void init_to_size(int size, T t)
GenericVector< int > best_state
Definition: pageres.h:255
void ClearRatings()
Definition: pageres.cpp:1187
tesseract::BoxWord * box_word
Definition: pageres.h:250
#define ASSERT_HOST(x)
Definition: errcode.h:84
MATRIX * ratings
Definition: pageres.h:215
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:893
void ClearWordChoices()
Definition: pageres.cpp:1175
void put(ICOORD pos, const T &thing)
Definition: matrix.h:215
Definition: matrix.h:563
void initialise(inT16 length)
Definition: rejctmap.cpp:318
REJMAP reject_map
Definition: pageres.h:271
BOOL8 done
Definition: pageres.h:282
int length() const
Definition: boxword.h:85

◆ FakeWordFromRatings()

void WERD_RES::FakeWordFromRatings ( PermuterType  permuter)

Definition at line 893 of file pageres.cpp.

893  {
894  int num_blobs = ratings->dimension();
895  WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
896  word_choice->set_permuter(permuter);
897  for (int b = 0; b < num_blobs; ++b) {
898  UNICHAR_ID unichar_id = UNICHAR_SPACE;
899  float rating = MAX_INT32;
900  float certainty = -MAX_INT32;
901  BLOB_CHOICE_LIST* choices = ratings->get(b, b);
902  if (choices != NULL && !choices->empty()) {
903  BLOB_CHOICE_IT bc_it(choices);
904  BLOB_CHOICE* choice = bc_it.data();
905  unichar_id = choice->unichar_id();
906  rating = choice->rating();
907  certainty = choice->certainty();
908  }
909  word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
910  certainty);
911  }
912  LogNewRawChoice(word_choice);
913  // Ownership of word_choice taken by word here.
914  LogNewCookedChoice(1, false, word_choice);
915 }
int UNICHAR_ID
Definition: unichar.h:33
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:612
#define MAX_INT32
Definition: host.h:62
T get(ICOORD pos) const
Definition: matrix.h:223
float rating() const
Definition: ratngs.h:79
float certainty() const
Definition: ratngs.h:82
MATRIX * ratings
Definition: pageres.h:215
int dimension() const
Definition: matrix.h:521
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:450
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:596
const UNICHARSET * uch_set
Definition: pageres.h:192
void set_permuter(uinT8 perm)
Definition: ratngs.h:373

◆ FilterWordChoices()

void WERD_RES::FilterWordChoices ( int  debug_level)

Definition at line 505 of file pageres.cpp.

505  {
506  if (best_choice == NULL || best_choices.singleton())
507  return;
508 
509  if (debug_level >= 2)
510  best_choice->print("\nFiltering against best choice");
511  WERD_CHOICE_IT it(&best_choices);
512  int index = 0;
513  for (it.forward(); !it.at_first(); it.forward(), ++index) {
514  WERD_CHOICE* choice = it.data();
515  float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
516  choice->adjust_factor());
517  // i, j index the blob choice in choice, best_choice.
518  // chunk is an index into the chopped_word blobs (AKA chunks).
519  // Since the two words may use different segmentations of the chunks, we
520  // iterate over the chunks to find out whether a comparable blob
521  // classification is much worse than the best result.
522  int i = 0, j = 0, chunk = 0;
523  // Each iteration of the while deals with 1 chunk. On entry choice_chunk
524  // and best_chunk are the indices of the first chunk in the NEXT blob,
525  // i.e. we don't have to increment i, j while chunk < choice_chunk and
526  // best_chunk respectively.
527  int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
528  while (i < choice->length() && j < best_choice->length()) {
529  if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
530  choice->certainty(i) - best_choice->certainty(j) < threshold) {
531  if (debug_level >= 2) {
532  choice->print("WorstCertaintyDiffWorseThan");
533  tprintf(
534  "i %d j %d Choice->Blob[i].Certainty %.4g"
535  " WorstOtherChoiceCertainty %g Threshold %g\n",
536  i, j, choice->certainty(i), best_choice->certainty(j), threshold);
537  tprintf("Discarding bad choice #%d\n", index);
538  }
539  delete it.extract();
540  break;
541  }
542  ++chunk;
543  // If needed, advance choice_chunk to keep up with chunk.
544  while (choice_chunk < chunk && ++i < choice->length())
545  choice_chunk += choice->state(i);
546  // If needed, advance best_chunk to keep up with chunk.
547  while (best_chunk < chunk && ++j < best_choice->length())
548  best_chunk += best_choice->state(j);
549  }
550  }
551 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
void print() const
Definition: ratngs.h:578
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
float certainty() const
Definition: ratngs.h:328
int state(int index) const
Definition: ratngs.h:317
float adjust_factor() const
Definition: ratngs.h:304

◆ fix_hyphens()

void WERD_RES::fix_hyphens ( )

Definition at line 1042 of file pageres.cpp.

1042  {
1043  if (!uch_set->contains_unichar("-") ||
1045  return; // Don't create it if it is disallowed.
1046 
1050 }
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1025
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:933
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
Definition: pageres.cpp:1036
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:838
const UNICHARSET * uch_set
Definition: pageres.h:192
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194

◆ fix_quotes()

void WERD_RES::fix_quotes ( )

Definition at line 1013 of file pageres.cpp.

1013  {
1014  if (!uch_set->contains_unichar("\"") ||
1016  return; // Don't create it if it is disallowed.
1017 
1020  NULL);
1021 }
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:933
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1003
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:838
const UNICHARSET * uch_set
Definition: pageres.h:192
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194

◆ GetBlobChoice()

BLOB_CHOICE * WERD_RES::GetBlobChoice ( int  index) const

Definition at line 742 of file pageres.cpp.

742  {
743  if (index < 0 || index >= best_choice->length()) return NULL;
744  BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
745  return FindMatchingChoice(best_choice->unichar_id(index), choices);
746 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:751
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:160

◆ GetBlobChoices()

BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices ( int  index) const

Definition at line 751 of file pageres.cpp.

751  {
752  return best_choice->blob_choices(index, ratings);
753 }
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
Definition: ratngs.cpp:268
WERD_CHOICE * best_choice
Definition: pageres.h:219
MATRIX * ratings
Definition: pageres.h:215

◆ GetBlobsGap()

int WERD_RES::GetBlobsGap ( int  blob_index)

Definition at line 732 of file pageres.cpp.

732  {
733  if (blob_index < 0 || blob_index >= blob_gaps.size())
734  return 0;
735  return blob_gaps[blob_index];
736 }
int size() const
Definition: genericvector.h:72
GenericVector< int > blob_gaps
Definition: pageres.h:208

◆ GetBlobsWidth()

int WERD_RES::GetBlobsWidth ( int  start_blob,
int  last_blob 
)

Definition at line 722 of file pageres.cpp.

722  {
723  int result = 0;
724  for (int b = start_blob; b <= last_blob; ++b) {
725  result += blob_widths[b];
726  if (b < last_blob)
727  result += blob_gaps[b];
728  }
729  return result;
730 }
GenericVector< int > blob_widths
Definition: pageres.h:205
GenericVector< int > blob_gaps
Definition: pageres.h:208

◆ HyphenBoxesOverlap()

bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 1036 of file pageres.cpp.

1036  {
1037  return box1.right() >= box2.left();
1038 }
inT16 left() const
Definition: rect.h:68
inT16 right() const
Definition: rect.h:75

◆ InitForRetryRecognition()

void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 269 of file pageres.cpp.

269  {
270  word = source.word;
271  CopySimpleFields(source);
272  if (source.blamer_bundle != NULL) {
273  blamer_bundle = new BlamerBundle();
275  }
276 }
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:187
BlamerBundle * blamer_bundle
Definition: pageres.h:230
WERD * word
Definition: pageres.h:175
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:241

◆ InitNonPointers()

void WERD_RES::InitNonPointers ( )

Definition at line 1089 of file pageres.cpp.

1089  {
1090  tess_failed = FALSE;
1091  tess_accepted = FALSE;
1093  done = FALSE;
1095  small_caps = false;
1096  odd_size = false;
1097  italic = FALSE;
1098  bold = FALSE;
1099  // The fontinfos and tesseract count as non-pointers as they point to
1100  // data owned elsewhere.
1101  fontinfo = NULL;
1102  fontinfo2 = NULL;
1103  tesseract = NULL;
1104  fontinfo_id_count = 0;
1105  fontinfo_id2_count = 0;
1106  x_height = 0.0;
1107  caps_height = 0.0;
1108  baseline_shift = 0.0f;
1109  space_certainty = 0.0f;
1110  guessed_x_ht = TRUE;
1112  combination = FALSE;
1113  part_of_combo = FALSE;
1114  reject_spaces = FALSE;
1115 }
#define TRUE
Definition: capi.h:45
float baseline_shift
Definition: pageres.h:297
BOOL8 tess_failed
Definition: pageres.h:272
const FontInfo * fontinfo2
Definition: pageres.h:289
inT8 bold
Definition: pageres.h:286
BOOL8 guessed_caps_ht
Definition: pageres.h:293
inT8 italic
Definition: pageres.h:285
BOOL8 guessed_x_ht
Definition: pageres.h:292
BOOL8 reject_spaces
Definition: pageres.h:320
BOOL8 combination
Definition: pageres.h:318
float caps_height
Definition: pageres.h:296
#define FALSE
Definition: capi.h:46
const FontInfo * fontinfo
Definition: pageres.h:288
BOOL8 tess_would_adapt
Definition: pageres.h:281
BOOL8 tess_accepted
Definition: pageres.h:280
bool odd_size
Definition: pageres.h:284
BOOL8 part_of_combo
Definition: pageres.h:319
inT8 fontinfo_id2_count
Definition: pageres.h:291
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
inT8 fontinfo_id_count
Definition: pageres.h:290
bool small_caps
Definition: pageres.h:283
float space_certainty
Definition: pageres.h:300
float x_height
Definition: pageres.h:295
BOOL8 done
Definition: pageres.h:282

◆ InitPointers()

void WERD_RES::InitPointers ( )

Definition at line 1117 of file pageres.cpp.

1117  {
1118  word = NULL;
1119  bln_boxes = NULL;
1120  blob_row = NULL;
1121  uch_set = NULL;
1122  chopped_word = NULL;
1123  rebuild_word = NULL;
1124  box_word = NULL;
1125  ratings = NULL;
1126  best_choice = NULL;
1127  raw_choice = NULL;
1128  ep_choice = NULL;
1129  blamer_bundle = NULL;
1130 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
BlamerBundle * blamer_bundle
Definition: pageres.h:230
TWERD * rebuild_word
Definition: pageres.h:244
tesseract::BoxWord * box_word
Definition: pageres.h:250
MATRIX * ratings
Definition: pageres.h:215
WERD_CHOICE * raw_choice
Definition: pageres.h:224
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
WERD * word
Definition: pageres.h:175
WERD_CHOICE * ep_choice
Definition: pageres.h:270
const UNICHARSET * uch_set
Definition: pageres.h:192
TWERD * chopped_word
Definition: pageres.h:201
ROW * blob_row
Definition: pageres.h:186

◆ InsertSeam()

void WERD_RES::InsertSeam ( int  blob_number,
SEAM seam 
)

Definition at line 410 of file pageres.cpp.

410  {
411  // Insert the seam into the SEAMS array.
412  seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
413  seam_array.insert(seam, blob_number);
414  if (ratings != NULL) {
415  // Expand the ratings matrix.
416  ratings = ratings->ConsumeAndMakeBigger(blob_number);
417  // Fix all the segmentation states.
418  if (raw_choice != NULL)
419  raw_choice->UpdateStateForSplit(blob_number);
420  WERD_CHOICE_IT wc_it(&best_choices);
421  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
422  WERD_CHOICE* choice = wc_it.data();
423  choice->UpdateStateForSplit(blob_number);
424  }
426  }
427 }
MATRIX * ConsumeAndMakeBigger(int ind)
Definition: matrix.cpp:58
bool PrepareToInsertSeam(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int insert_index, bool modify)
Definition: seam.cpp:82
MATRIX * ratings
Definition: pageres.h:215
void insert(T t, int index)
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:392
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
void UpdateStateForSplit(int blob_position)
Definition: ratngs.cpp:685
TWERD * chopped_word
Definition: pageres.h:201
GenericVector< SEAM * > seam_array
Definition: pageres.h:203

◆ IsAmbiguous()

bool WERD_RES::IsAmbiguous ( )

Definition at line 444 of file pageres.cpp.

444  {
445  return !best_choices.singleton() || best_choice->dangerous_ambig_found();
446 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
bool dangerous_ambig_found() const
Definition: ratngs.h:361

◆ LogNewCookedChoice()

bool WERD_RES::LogNewCookedChoice ( int  max_num_choices,
bool  debug,
WERD_CHOICE word_choice 
)

Definition at line 612 of file pageres.cpp.

613  {
614  if (best_choice != NULL) {
615  // Throw out obviously bad choices to save some work.
616  // TODO(rays) Get rid of this! This piece of code produces different
617  // results according to the order in which words are found, which is an
618  // undesirable behavior. It would be better to keep all the choices and
619  // prune them later when more information is available.
620  float max_certainty_delta =
621  StopperAmbigThreshold(best_choice->adjust_factor(),
622  word_choice->adjust_factor());
623  if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
624  max_certainty_delta = -kStopperAmbiguityThresholdOffset;
625  if (word_choice->certainty() - best_choice->certainty() <
626  max_certainty_delta) {
627  if (debug) {
628  STRING bad_string;
629  word_choice->string_and_lengths(&bad_string, NULL);
630  tprintf("Discarding choice \"%s\" with an overly low certainty"
631  " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
632  bad_string.string(), word_choice->certainty(),
634  max_certainty_delta + best_choice->certainty());
635  }
636  delete word_choice;
637  return false;
638  }
639  }
640 
641  // Insert in the list in order of increasing rating, but knock out worse
642  // string duplicates.
643  WERD_CHOICE_IT it(&best_choices);
644  const STRING& new_str = word_choice->unichar_string();
645  bool inserted = false;
646  int num_choices = 0;
647  if (!it.empty()) {
648  do {
649  WERD_CHOICE* choice = it.data();
650  if (choice->rating() > word_choice->rating() && !inserted) {
651  // Time to insert.
652  it.add_before_stay_put(word_choice);
653  inserted = true;
654  if (num_choices == 0)
655  best_choice = word_choice; // This is the new best.
656  ++num_choices;
657  }
658  if (choice->unichar_string() == new_str) {
659  if (inserted) {
660  // New is better.
661  delete it.extract();
662  } else {
663  // Old is better.
664  if (debug) {
665  tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
666  new_str.string(), word_choice->rating(), choice->rating());
667  }
668  delete word_choice;
669  return false;
670  }
671  } else {
672  ++num_choices;
673  if (num_choices > max_num_choices)
674  delete it.extract();
675  }
676  it.forward();
677  } while (!it.at_first());
678  }
679  if (!inserted && num_choices < max_num_choices) {
680  it.add_to_end(word_choice);
681  inserted = true;
682  if (num_choices == 0)
683  best_choice = word_choice; // This is the new best.
684  }
685  if (debug) {
686  if (inserted)
687  tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
688  else
689  tprintf("Poor");
690  word_choice->print(" Word Choice");
691  }
692  if (!inserted) {
693  delete word_choice;
694  return false;
695  }
696  return true;
697 }
void print() const
Definition: ratngs.h:578
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
const char * string() const
Definition: strngs.cpp:198
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:427
Definition: strngs.h:45
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
float certainty() const
Definition: ratngs.h:328
const STRING & unichar_string() const
Definition: ratngs.h:539
float adjust_factor() const
Definition: ratngs.h:304
float rating() const
Definition: ratngs.h:325

◆ LogNewRawChoice()

bool WERD_RES::LogNewRawChoice ( WERD_CHOICE word_choice)

Definition at line 596 of file pageres.cpp.

596  {
597  if (raw_choice == NULL || word_choice->rating() < raw_choice->rating()) {
598  delete raw_choice;
599  raw_choice = new WERD_CHOICE(*word_choice);
601  return true;
602  }
603  return false;
604 }
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void set_permuter(uinT8 perm)
Definition: ratngs.h:373
float rating() const
Definition: ratngs.h:325

◆ merge_tess_fails()

void WERD_RES::merge_tess_fails ( )

Definition at line 1062 of file pageres.cpp.

1062  {
1065  int len = best_choice->length();
1066  ASSERT_HOST(reject_map.length() == len);
1067  ASSERT_HOST(box_word->length() == len);
1068  }
1069 }
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:933
tesseract::BoxWord * box_word
Definition: pageres.h:250
#define ASSERT_HOST(x)
Definition: errcode.h:84
inT32 length() const
Definition: rejctmap.h:235
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1054
REJMAP reject_map
Definition: pageres.h:271
int length() const
Definition: boxword.h:85

◆ MergeAdjacentBlobs()

void WERD_RES::MergeAdjacentBlobs ( int  index)

Definition at line 969 of file pageres.cpp.

969  {
970  if (reject_map.length() == best_choice->length())
971  reject_map.remove_pos(index);
972  best_choice->remove_unichar_id(index + 1);
973  rebuild_word->MergeBlobs(index, index + 2);
974  box_word->MergeBoxes(index, index + 2);
975  if (index + 1 < best_state.length()) {
976  best_state[index] += best_state[index + 1];
977  best_state.remove(index + 1);
978  }
979 }
void remove_pos(inT16 pos)
Definition: rejctmap.cpp:363
GenericVector< int > best_state
Definition: pageres.h:255
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
void MergeBoxes(int start, int end)
Definition: boxword.cpp:134
void remove(int index)
TWERD * rebuild_word
Definition: pageres.h:244
tesseract::BoxWord * box_word
Definition: pageres.h:250
void remove_unichar_id(int index)
Definition: ratngs.h:482
inT32 length() const
Definition: rejctmap.h:235
int length() const
Definition: genericvector.h:85
void MergeBlobs(int start, int end)
Definition: blobs.cpp:890
REJMAP reject_map
Definition: pageres.h:271

◆ operator=()

WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 178 of file pageres.cpp.

178  {
179  this->ELIST_LINK::operator=(source);
180  Clear();
181  if (source.combination) {
182  word = new WERD;
183  *word = *(source.word); // deep copy
184  } else {
185  word = source.word; // pt to same word
186  }
187  if (source.bln_boxes != NULL)
188  bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
189  if (source.chopped_word != NULL)
190  chopped_word = new TWERD(*source.chopped_word);
191  if (source.rebuild_word != NULL)
192  rebuild_word = new TWERD(*source.rebuild_word);
193  // TODO(rays) Do we ever need to copy the seam_array?
194  blob_row = source.blob_row;
195  denorm = source.denorm;
196  if (source.box_word != NULL)
197  box_word = new tesseract::BoxWord(*source.box_word);
198  best_state = source.best_state;
199  correct_text = source.correct_text;
200  blob_widths = source.blob_widths;
201  blob_gaps = source.blob_gaps;
202  // None of the uses of operator= require the ratings matrix to be copied,
203  // so don't as it would be really slow.
204 
205  // Copy the cooked choices.
206  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
207  WERD_CHOICE_IT wc_dest_it(&best_choices);
208  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
209  const WERD_CHOICE *choice = wc_it.data();
210  wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
211  }
212  if (!wc_dest_it.empty()) {
213  wc_dest_it.move_to_first();
214  best_choice = wc_dest_it.data();
215  } else {
216  best_choice = NULL;
217  }
218 
219  if (source.raw_choice != NULL) {
220  raw_choice = new WERD_CHOICE(*source.raw_choice);
221  } else {
222  raw_choice = NULL;
223  }
224  if (source.ep_choice != NULL) {
225  ep_choice = new WERD_CHOICE(*source.ep_choice);
226  } else {
227  ep_choice = NULL;
228  }
229  reject_map = source.reject_map;
230  combination = source.combination;
231  part_of_combo = source.part_of_combo;
232  CopySimpleFields(source);
233  if (source.blamer_bundle != NULL) {
234  blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
235  }
236  return *this;
237 }
GenericVector< int > best_state
Definition: pageres.h:255
WERD_CHOICE * best_choice
Definition: pageres.h:219
BlamerBundle * blamer_bundle
Definition: pageres.h:230
GenericVector< STRING > correct_text
Definition: pageres.h:259
TWERD * rebuild_word
Definition: pageres.h:244
GenericVector< int > blob_widths
Definition: pageres.h:205
tesseract::BoxWord * box_word
Definition: pageres.h:250
Definition: blobs.h:395
BOOL8 combination
Definition: pageres.h:318
void Clear()
Definition: pageres.cpp:1132
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * raw_choice
Definition: pageres.h:224
BOOL8 part_of_combo
Definition: pageres.h:319
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
DENORM denorm
Definition: pageres.h:190
WERD * word
Definition: pageres.h:175
WERD_CHOICE * ep_choice
Definition: pageres.h:270
GenericVector< int > blob_gaps
Definition: pageres.h:208
void operator=(const ELIST_LINK &)
Definition: elst.h:101
Definition: werd.h:60
TWERD * chopped_word
Definition: pageres.h:201
REJMAP reject_map
Definition: pageres.h:271
ROW * blob_row
Definition: pageres.h:186
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:241

◆ PiecesAllNatural()

bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 1073 of file pageres.cpp.

1073  {
1074  // all seams must have no splits.
1075  for (int index = start; index < start + count - 1; ++index) {
1076  if (index >= 0 && index < seam_array.size()) {
1077  SEAM* seam = seam_array[index];
1078  if (seam != NULL && seam->HasAnySplits()) return false;
1079  }
1080  }
1081  return true;
1082 }
int size() const
Definition: genericvector.h:72
Definition: seam.h:44
int count(LIST var_list)
Definition: oldlist.cpp:103
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
bool HasAnySplits() const
Definition: seam.h:67

◆ PrintBestChoices()

void WERD_RES::PrintBestChoices ( ) const

Definition at line 709 of file pageres.cpp.

709  {
710  STRING alternates_str;
711  WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
712  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
713  if (!it.at_first()) alternates_str += "\", \"";
714  alternates_str += it.data()->unichar_string();
715  }
716  tprintf("Alternates for \"%s\": {\"%s\"}\n",
717  best_choice->unichar_string().string(), alternates_str.string());
718 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
const char * string() const
Definition: strngs.cpp:198
Definition: strngs.h:45
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
const STRING & unichar_string() const
Definition: ratngs.h:539

◆ RawUTF8()

const char* WERD_RES::RawUTF8 ( int  blob_index) const
inline

Definition at line 358 of file pageres.h.

358  {
359  if (blob_index < 0 || blob_index >= raw_choice->length())
360  return NULL;
361  UNICHAR_ID id = raw_choice->unichar_id(blob_index);
362  if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
363  return NULL;
364  return uch_set->id_to_unichar(id);
365  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
int UNICHAR_ID
Definition: unichar.h:33
int length() const
Definition: ratngs.h:301
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
WERD_CHOICE * raw_choice
Definition: pageres.h:224
int size() const
Definition: unicharset.h:299
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ RebuildBestState()

void WERD_RES::RebuildBestState ( )

Definition at line 800 of file pageres.cpp.

800  {
801  ASSERT_HOST(best_choice != NULL);
802  if (rebuild_word != NULL)
803  delete rebuild_word;
804  rebuild_word = new TWERD;
805  if (seam_array.empty())
807  best_state.truncate(0);
808  int start = 0;
809  for (int i = 0; i < best_choice->length(); ++i) {
810  int length = best_choice->state(i);
811  best_state.push_back(length);
812  if (length > 1) {
814  start + length - 1);
815  }
816  TBLOB* blob = chopped_word->blobs[start];
817  rebuild_word->blobs.push_back(new TBLOB(*blob));
818  if (length > 1) {
820  start + length - 1);
821  }
822  start += length;
823  }
824 }
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:194
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:269
GenericVector< int > best_state
Definition: pageres.h:255
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
int push_back(T object)
TWERD * rebuild_word
Definition: pageres.h:244
bool empty() const
Definition: genericvector.h:90
void truncate(int size)
#define ASSERT_HOST(x)
Definition: errcode.h:84
Definition: blobs.h:395
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:216
int state(int index) const
Definition: ratngs.h:317
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
Definition: blobs.h:261
TWERD * chopped_word
Definition: pageres.h:201
GenericVector< SEAM * > seam_array
Definition: pageres.h:203

◆ ReplaceBestChoice()

void WERD_RES::ReplaceBestChoice ( WERD_CHOICE choice)

Definition at line 787 of file pageres.cpp.

787  {
788  best_choice = choice;
790  SetupBoxWord();
791  // Make up a fake reject map of the right length to keep the
792  // rejection pass happy.
796 }
void RebuildBestState()
Definition: pageres.cpp:800
GenericVector< int > best_state
Definition: pageres.h:255
WERD_CHOICE * best_choice
Definition: pageres.h:219
void SetScriptPositions()
Definition: pageres.cpp:853
BOOL8 tess_would_adapt
Definition: pageres.h:281
BOOL8 tess_accepted
Definition: pageres.h:280
int length() const
Definition: genericvector.h:85
void SetupBoxWord()
Definition: pageres.cpp:843
void initialise(inT16 length)
Definition: rejctmap.cpp:318
REJMAP reject_map
Definition: pageres.h:271
BOOL8 done
Definition: pageres.h:282

◆ SetAllScriptPositions()

void WERD_RES::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 860 of file pageres.cpp.

860  {
862  WERD_CHOICE_IT wc_it(&best_choices);
863  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
864  wc_it.data()->SetAllScriptPositions(position);
865 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: ratngs.cpp:609

◆ SetScriptPositions()

void WERD_RES::SetScriptPositions ( )

Definition at line 853 of file pageres.cpp.

853  {
855 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
void SetScriptPositions(bool small_caps, TWERD *word)
Definition: ratngs.cpp:528
bool small_caps
Definition: pageres.h:283
TWERD * chopped_word
Definition: pageres.h:201

◆ SetupBasicsFromChoppedWord()

void WERD_RES::SetupBasicsFromChoppedWord ( const UNICHARSET unicharset_in)

Definition at line 335 of file pageres.cpp.

335  {
340 }
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:269
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:59
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:392
void ClearWordChoices()
Definition: pageres.cpp:1175
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
TWERD * chopped_word
Definition: pageres.h:201
GenericVector< SEAM * > seam_array
Definition: pageres.h:203

◆ SetupBlamerBundle()

void WERD_RES::SetupBlamerBundle ( )

Definition at line 385 of file pageres.cpp.

385  {
386  if (blamer_bundle != NULL) {
388  }
389 }
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void SetupNormTruthWord(const DENORM &denorm)
Definition: blamer.cpp:145
DENORM denorm
Definition: pageres.h:190

◆ SetupBlobWidthsAndGaps()

void WERD_RES::SetupBlobWidthsAndGaps ( )

Definition at line 392 of file pageres.cpp.

392  {
394  blob_gaps.truncate(0);
395  int num_blobs = chopped_word->NumBlobs();
396  for (int b = 0; b < num_blobs; ++b) {
397  TBLOB *blob = chopped_word->blobs[b];
398  TBOX box = blob->bounding_box();
399  blob_widths.push_back(box.width());
400  if (b + 1 < num_blobs) {
402  chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
403  }
404  }
405 }
int push_back(T object)
GenericVector< int > blob_widths
Definition: pageres.h:205
void truncate(int size)
int NumBlobs() const
Definition: blobs.h:425
Definition: rect.h:30
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
Definition: blobs.h:261
inT16 right() const
Definition: rect.h:75
inT16 width() const
Definition: rect.h:111
GenericVector< int > blob_gaps
Definition: pageres.h:208
TBOX bounding_box() const
Definition: blobs.cpp:482
TWERD * chopped_word
Definition: pageres.h:201

◆ SetupBoxWord()

void WERD_RES::SetupBoxWord ( )

Definition at line 843 of file pageres.cpp.

843  {
844  if (box_word != NULL)
845  delete box_word;
849 }
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:59
TWERD * rebuild_word
Definition: pageres.h:244
void ComputeBoundingBoxes()
Definition: blobs.cpp:873
tesseract::BoxWord * box_word
Definition: pageres.h:250
const BLOCK * block() const
Definition: normalis.h:275
DENORM denorm
Definition: pageres.h:190
WERD * word
Definition: pageres.h:175
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:95

◆ SetupFake()

void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 344 of file pageres.cpp.

344  {
345  ClearResults();
346  SetupWordScript(unicharset_in);
347  chopped_word = new TWERD;
348  rebuild_word = new TWERD;
351  int blob_count = word->cblob_list()->length();
352  if (blob_count > 0) {
353  BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
354  // For non-text blocks, just pass any blobs through to the box_word
355  // and call the word failed with a fake classification.
356  C_BLOB_IT b_it(word->cblob_list());
357  int blob_id = 0;
358  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
359  TBOX box = b_it.data()->bounding_box();
360  box_word->InsertBox(box_word->length(), box);
361  fake_choices[blob_id++] = new BLOB_CHOICE;
362  }
363  FakeClassifyWord(blob_count, fake_choices);
364  delete [] fake_choices;
365  } else {
366  WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in);
367  word->make_bad();
368  LogNewRawChoice(word);
369  // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
370  LogNewCookedChoice(1, false, word);
371  }
372  tess_failed = true;
373  done = true;
374 }
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:612
BOOL8 tess_failed
Definition: pageres.h:272
TWERD * rebuild_word
Definition: pageres.h:244
void ClearResults()
Definition: pageres.cpp:1142
tesseract::BoxWord * box_word
Definition: pageres.h:250
Definition: blobs.h:395
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
Definition: pageres.cpp:872
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:376
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:151
Definition: rect.h:30
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:441
WERD * word
Definition: pageres.h:175
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:596
TWERD * chopped_word
Definition: pageres.h:201
BOOL8 done
Definition: pageres.h:282
int length() const
Definition: boxword.h:85

◆ SetupForRecognition()

bool WERD_RES::SetupForRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
int  norm_mode,
const TBOX norm_box,
bool  numeric_mode,
bool  use_body_size,
bool  allow_detailed_fx,
ROW row,
const BLOCK block 
)

Definition at line 294 of file pageres.cpp.

301  {
302  tesseract::OcrEngineMode norm_mode_hint =
303  static_cast<tesseract::OcrEngineMode>(norm_mode);
304  tesseract = tess;
305  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
306  if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY &&
307  word->cblob_list()->empty()) ||
308  (pb != NULL && !pb->IsText())) {
309  // Empty words occur when all the blobs have been moved to the rej_blobs
310  // list, which seems to occur frequently in junk.
311  SetupFake(unicharset_in);
312  word->set_flag(W_REP_CHAR, false);
313  return false;
314  }
315  ClearResults();
316  SetupWordScript(unicharset_in);
317  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
318  float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
319  ? row->body_size() : x_height;
320  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
321  word_xheight, baseline_shift, numeric_mode,
322  norm_mode_hint, norm_box, &denorm);
323  blob_row = row;
324  SetupBasicsFromChoppedWord(unicharset_in);
326  int num_blobs = chopped_word->NumBlobs();
327  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
328  tess_failed = false;
329  return true;
330 }
Definition: werd.h:44
float body_size() const
Definition: ocrrow.h:70
float baseline_shift
Definition: pageres.h:297
BOOL8 tess_failed
Definition: pageres.h:272
const int kWordrecMaxNumJoinChunks
Definition: pageres.cpp:41
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
void ClearResults()
Definition: pageres.cpp:1142
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
MATRIX * ratings
Definition: pageres.h:215
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
Definition: blobs.cpp:807
bool IsText() const
Definition: polyblk.h:52
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:376
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:335
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:344
int NumBlobs() const
Definition: blobs.h:425
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
Definition: matrix.h:563
DENORM denorm
Definition: pageres.h:190
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
void SetupBlamerBundle()
Definition: pageres.cpp:385
WERD * word
Definition: pageres.h:175
TWERD * chopped_word
Definition: pageres.h:201
float x_height
Definition: pageres.h:295
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
Definition: blobs.cpp:793
ROW * blob_row
Definition: pageres.h:186

◆ SetupWordScript()

void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 376 of file pageres.cpp.

376  {
377  uch_set = &uch;
378  int script = uch.default_sid();
379  word->set_script_id(script);
380  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
381  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
382 }
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
void set_script_id(int id)
Definition: werd.h:113
WERD * word
Definition: pageres.h:175
const UNICHARSET * uch_set
Definition: pageres.h:192
int default_sid() const
Definition: unicharset.h:853

◆ StatesAllValid()

bool WERD_RES::StatesAllValid ( )

Definition at line 450 of file pageres.cpp.

450  {
451  int ratings_dim = ratings->dimension();
452  if (raw_choice->TotalOfStates() != ratings_dim) {
453  tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
454  raw_choice->TotalOfStates(), ratings_dim);
455  return false;
456  }
457  WERD_CHOICE_IT it(&best_choices);
458  int index = 0;
459  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
460  WERD_CHOICE* choice = it.data();
461  if (choice->TotalOfStates() != ratings_dim) {
462  tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
463  index, choice->TotalOfStates(), ratings_dim);
464  return false;
465  }
466  }
467  return true;
468 }
#define tprintf(...)
Definition: tprintf.h:31
int TotalOfStates() const
Definition: ratngs.cpp:697
MATRIX * ratings
Definition: pageres.h:215
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
WERD_CHOICE * raw_choice
Definition: pageres.h:224
int dimension() const
Definition: matrix.h:521

◆ SymbolDirection()

UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const
inline

Definition at line 367 of file pageres.h.

367  {
368  if (best_choice == NULL ||
369  blob_index >= best_choice->length() ||
370  blob_index < 0)
372  return uch_set->get_direction(best_choice->unichar_id(blob_index));
373  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:650
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ UnicharsInReadingOrder()

bool WERD_RES::UnicharsInReadingOrder ( ) const
inline

Definition at line 409 of file pageres.h.

409  {
411  }
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool unichars_in_script_order() const
Definition: ratngs.h:533

Member Data Documentation

◆ baseline_shift

float WERD_RES::baseline_shift

Definition at line 297 of file pageres.h.

◆ best_choice

WERD_CHOICE* WERD_RES::best_choice

Definition at line 219 of file pageres.h.

◆ best_choices

WERD_CHOICE_LIST WERD_RES::best_choices

Definition at line 227 of file pageres.h.

◆ best_state

GenericVector<int> WERD_RES::best_state

Definition at line 255 of file pageres.h.

◆ blamer_bundle

BlamerBundle* WERD_RES::blamer_bundle

Definition at line 230 of file pageres.h.

◆ bln_boxes

tesseract::BoxWord* WERD_RES::bln_boxes

Definition at line 184 of file pageres.h.

◆ blob_gaps

GenericVector<int> WERD_RES::blob_gaps

Definition at line 208 of file pageres.h.

◆ blob_row

ROW* WERD_RES::blob_row

Definition at line 186 of file pageres.h.

◆ blob_widths

GenericVector<int> WERD_RES::blob_widths

Definition at line 205 of file pageres.h.

◆ bold

inT8 WERD_RES::bold

Definition at line 286 of file pageres.h.

◆ box_word

tesseract::BoxWord* WERD_RES::box_word

Definition at line 250 of file pageres.h.

◆ caps_height

float WERD_RES::caps_height

Definition at line 296 of file pageres.h.

◆ chopped_word

TWERD* WERD_RES::chopped_word

Definition at line 201 of file pageres.h.

◆ combination

BOOL8 WERD_RES::combination

Definition at line 318 of file pageres.h.

◆ correct_text

GenericVector<STRING> WERD_RES::correct_text

Definition at line 259 of file pageres.h.

◆ denorm

DENORM WERD_RES::denorm

Definition at line 190 of file pageres.h.

◆ done

BOOL8 WERD_RES::done

Definition at line 282 of file pageres.h.

◆ ep_choice

WERD_CHOICE* WERD_RES::ep_choice

Definition at line 270 of file pageres.h.

◆ fontinfo

const FontInfo* WERD_RES::fontinfo

Definition at line 288 of file pageres.h.

◆ fontinfo2

const FontInfo* WERD_RES::fontinfo2

Definition at line 289 of file pageres.h.

◆ fontinfo_id2_count

inT8 WERD_RES::fontinfo_id2_count

Definition at line 291 of file pageres.h.

◆ fontinfo_id_count

inT8 WERD_RES::fontinfo_id_count

Definition at line 290 of file pageres.h.

◆ guessed_caps_ht

BOOL8 WERD_RES::guessed_caps_ht

Definition at line 293 of file pageres.h.

◆ guessed_x_ht

BOOL8 WERD_RES::guessed_x_ht

Definition at line 292 of file pageres.h.

◆ italic

inT8 WERD_RES::italic

Definition at line 285 of file pageres.h.

◆ odd_size

bool WERD_RES::odd_size

Definition at line 284 of file pageres.h.

◆ part_of_combo

BOOL8 WERD_RES::part_of_combo

Definition at line 319 of file pageres.h.

◆ ratings

MATRIX* WERD_RES::ratings

Definition at line 215 of file pageres.h.

◆ raw_choice

WERD_CHOICE* WERD_RES::raw_choice

Definition at line 224 of file pageres.h.

◆ rebuild_word

TWERD* WERD_RES::rebuild_word

Definition at line 244 of file pageres.h.

◆ reject_map

REJMAP WERD_RES::reject_map

Definition at line 271 of file pageres.h.

◆ reject_spaces

BOOL8 WERD_RES::reject_spaces

Definition at line 320 of file pageres.h.

◆ seam_array

GenericVector<SEAM*> WERD_RES::seam_array

Definition at line 203 of file pageres.h.

◆ small_caps

bool WERD_RES::small_caps

Definition at line 283 of file pageres.h.

◆ space_certainty

float WERD_RES::space_certainty

Definition at line 300 of file pageres.h.

◆ tess_accepted

BOOL8 WERD_RES::tess_accepted

Definition at line 280 of file pageres.h.

◆ tess_failed

BOOL8 WERD_RES::tess_failed

Definition at line 272 of file pageres.h.

◆ tess_would_adapt

BOOL8 WERD_RES::tess_would_adapt

Definition at line 281 of file pageres.h.

◆ tesseract

tesseract::Tesseract* WERD_RES::tesseract

Definition at line 266 of file pageres.h.

◆ uch_set

const UNICHARSET* WERD_RES::uch_set

Definition at line 192 of file pageres.h.

◆ unlv_crunch_mode

CRUNCH_MODE WERD_RES::unlv_crunch_mode

Definition at line 294 of file pageres.h.

◆ word

WERD* WERD_RES::word

Definition at line 175 of file pageres.h.

◆ x_height

float WERD_RES::x_height

Definition at line 295 of file pageres.h.


The documentation for this class was generated from the following files: