36 static const double kStopperAmbiguityThresholdGain = 8.0;
39 static const double kStopperAmbiguityThresholdOffset = 1.5;
53 static double StopperAmbigThreshold(
double f1,
double f2) {
54 return (f2 - f1) * kStopperAmbiguityThresholdGain -
55 kStopperAmbiguityThresholdOffset;
64 bool merge_similar_words,
65 BLOCK_LIST *the_block_list,
68 BLOCK_IT block_it(the_block_list);
70 for (block_it.mark_cycle_pt();
71 !block_it.cycled_list(); block_it.forward()) {
72 block_res_it.add_to_end(
new BLOCK_RES(merge_similar_words,
85 ROW_IT row_it (the_block->
row_list ());
86 ROW_RES_IT row_res_it(&row_res_list);
92 font_assigned =
FALSE;
99 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
100 row_res_it.add_to_end(
new ROW_RES(merge_similar_words, row_it.data()));
112 WERD_RES_IT word_res_it(&word_res_list);
118 whole_word_rej_count = 0;
121 bool add_next_word =
false;
125 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
133 }
else if (merge_similar_words) {
137 word_res->
odd_size = !add_next_word;
139 WERD* next_word = word_it.data_relative(1);
140 if (merge_similar_words) {
148 int prev_right = union_box.
right();
149 union_box += next_box;
153 add_next_word =
false;
162 copy_word =
new WERD;
163 *copy_word = *(word_it.data());
167 word_res_it.add_to_end(combo);
173 word_res_it.add_to_end(word_res);
183 *word = *(source.
word);
206 WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.
best_choices));
207 WERD_CHOICE_IT wc_dest_it(&best_choices);
208 for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
210 wc_dest_it.add_after_then_move(
new WERD_CHOICE(*choice));
212 if (!wc_dest_it.empty()) {
213 wc_dest_it.move_to_first();
214 best_choice = wc_dest_it.data();
232 CopySimpleFields(source);
271 CopySimpleFields(source);
297 const TBOX* norm_box,
300 bool allow_detailed_fx,
307 word->cblob_list()->empty()) ||
308 (pb != NULL && !pb->
IsText())) {
311 SetupFake(unicharset_in);
316 SetupWordScript(unicharset_in);
318 float word_xheight = use_body_size && row != NULL && row->
body_size() > 0.0f
320 chopped_word->BLNormalize(block, row, pix, word->flag(
W_INVERSE),
321 word_xheight, baseline_shift, numeric_mode,
322 norm_mode_hint, norm_box, &denorm);
324 SetupBasicsFromChoppedWord(unicharset_in);
326 int num_blobs = chopped_word->NumBlobs();
338 SetupBlobWidthsAndGaps();
346 SetupWordScript(unicharset_in);
347 chopped_word =
new TWERD;
348 rebuild_word =
new TWERD;
351 int blob_count = word->cblob_list()->
length();
352 if (blob_count > 0) {
356 C_BLOB_IT b_it(word->cblob_list());
358 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
359 TBOX box = b_it.data()->bounding_box();
360 box_word->InsertBox(box_word->length(), box);
363 FakeClassifyWord(blob_count, fake_choices);
364 delete [] fake_choices;
368 LogNewRawChoice(word);
370 LogNewCookedChoice(1,
false, word);
379 word->set_script_id(script);
386 if (blamer_bundle != NULL) {
387 blamer_bundle->SetupNormTruthWord(denorm);
393 blob_widths.truncate(0);
394 blob_gaps.truncate(0);
395 int num_blobs = chopped_word->NumBlobs();
396 for (
int b = 0; b < num_blobs; ++b) {
397 TBLOB *blob = chopped_word->blobs[b];
399 blob_widths.push_back(box.
width());
400 if (b + 1 < num_blobs) {
402 chopped_word->blobs[b + 1]->bounding_box().left() - box.
right());
413 seam_array.insert(seam, blob_number);
414 if (ratings != NULL) {
416 ratings = ratings->ConsumeAndMakeBigger(blob_number);
418 if (raw_choice != NULL)
419 raw_choice->UpdateStateForSplit(blob_number);
420 WERD_CHOICE_IT wc_it(&best_choices);
421 for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
425 SetupBlobWidthsAndGaps();
433 WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
434 for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
445 return !best_choices.singleton() || best_choice->dangerous_ambig_found();
451 int ratings_dim = ratings->dimension();
452 if (raw_choice->TotalOfStates() != ratings_dim) {
453 tprintf(
"raw_choice has total of states = %d vs ratings dim of %d\n",
454 raw_choice->TotalOfStates(), ratings_dim);
457 WERD_CHOICE_IT it(&best_choices);
459 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
462 tprintf(
"Cooked #%d has total of states = %d vs ratings dim of %d\n",
474 (word_to_debug != NULL && *word_to_debug !=
'\0' && best_choice != NULL &&
475 best_choice->unichar_string() ==
STRING(word_to_debug))) {
476 if (raw_choice != NULL)
477 raw_choice->print(
"\nBest Raw Choice");
479 WERD_CHOICE_IT it(&best_choices);
481 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
492 tprintf(
"Best choice: accepted=%d, adaptable=%d, done=%d : ",
493 tess_accepted, tess_would_adapt, done);
494 if (best_choice == NULL)
497 best_choice->print(msg);
506 if (best_choice == NULL || best_choices.singleton())
509 if (debug_level >= 2)
510 best_choice->print(
"\nFiltering against best choice");
511 WERD_CHOICE_IT it(&best_choices);
513 for (it.forward(); !it.at_first(); it.forward(), ++index) {
515 float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
522 int i = 0, j = 0, chunk = 0;
527 int choice_chunk = choice->
state(0), best_chunk = best_choice->state(0);
528 while (i < choice->length() && j < best_choice->length()) {
529 if (choice->
unichar_id(i) != best_choice->unichar_id(j) &&
530 choice->
certainty(i) - best_choice->certainty(j) < threshold) {
531 if (debug_level >= 2) {
532 choice->
print(
"WorstCertaintyDiffWorseThan");
534 "i %d j %d Choice->Blob[i].Certainty %.4g" 535 " WorstOtherChoiceCertainty %g Threshold %g\n",
536 i, j, choice->
certainty(i), best_choice->certainty(j), threshold);
537 tprintf(
"Discarding bad choice #%d\n", index);
544 while (choice_chunk < chunk && ++i < choice->length())
545 choice_chunk += choice->
state(i);
547 while (best_chunk < chunk && ++j < best_choice->length())
548 best_chunk += best_choice->state(j);
559 int end_chunk = best_choice->state(0);
560 int end_raw_chunk = raw_choice->state(0);
562 for (
int i = 0; i < best_choice->length(); i++, thresholds++) {
563 float avg_rating = 0.0f;
564 int num_error_chunks = 0;
567 while (chunk < end_chunk) {
568 if (chunk >= end_raw_chunk) {
570 end_raw_chunk += raw_choice->state(raw_blob);
572 if (best_choice->unichar_id(i) !=
573 raw_choice->unichar_id(raw_blob)) {
574 avg_rating += raw_choice->certainty(raw_blob);
580 if (num_error_chunks > 0) {
581 avg_rating /= num_error_chunks;
582 *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
584 *thresholds = max_rating;
587 if (*thresholds > max_rating)
588 *thresholds = max_rating;
589 if (*thresholds < min_rating)
590 *thresholds = min_rating;
597 if (raw_choice == NULL || word_choice->
rating() < raw_choice->rating()) {
614 if (best_choice != NULL) {
620 float max_certainty_delta =
621 StopperAmbigThreshold(best_choice->adjust_factor(),
623 if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
624 max_certainty_delta = -kStopperAmbiguityThresholdOffset;
625 if (word_choice->
certainty() - best_choice->certainty() <
626 max_certainty_delta) {
630 tprintf(
"Discarding choice \"%s\" with an overly low certainty" 631 " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
633 best_choice->certainty(),
634 max_certainty_delta + best_choice->certainty());
643 WERD_CHOICE_IT it(&best_choices);
645 bool inserted =
false;
650 if (choice->
rating() > word_choice->
rating() && !inserted) {
652 it.add_before_stay_put(word_choice);
654 if (num_choices == 0)
655 best_choice = word_choice;
665 tprintf(
"Discarding duplicate choice \"%s\", rating %g vs %g\n",
673 if (num_choices > max_num_choices)
677 }
while (!it.at_first());
679 if (!inserted && num_choices < max_num_choices) {
680 it.add_to_end(word_choice);
682 if (num_choices == 0)
683 best_choice = word_choice;
687 tprintf(
"New %s", best_choice == word_choice ?
"Best" :
"Secondary");
690 word_choice->
print(
" Word Choice");
702 template<
class T>
static void MovePointerData(T** dest, T**src) {
711 WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
712 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
713 if (!it.at_first()) alternates_str +=
"\", \"";
714 alternates_str += it.data()->unichar_string();
716 tprintf(
"Alternates for \"%s\": {\"%s\"}\n",
717 best_choice->unichar_string().string(), alternates_str.
string());
724 for (
int b = start_blob; b <= last_blob; ++b) {
725 result += blob_widths[b];
727 result += blob_gaps[b];
733 if (blob_index < 0 || blob_index >= blob_gaps.size())
735 return blob_gaps[blob_index];
743 if (index < 0 || index >= best_choice->length())
return NULL;
744 BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
752 return best_choice->blob_choices(index, ratings);
762 MovePointerData(&box_word, &word->
box_word);
763 seam_array.delete_data_pointers();
770 if (ratings != NULL) ratings->delete_matrix_pointers();
771 MovePointerData(&ratings, &word->
ratings);
773 MovePointerData(&raw_choice, &word->
raw_choice);
774 best_choices.clear();
775 WERD_CHOICE_IT wc_it(&best_choices);
779 assert(blamer_bundle != NULL);
782 CopySimpleFields(*word);
788 best_choice = choice;
793 reject_map.initialise(best_state.length());
794 done = tess_accepted = tess_would_adapt =
true;
795 SetScriptPositions();
802 if (rebuild_word != NULL)
804 rebuild_word =
new TWERD;
805 if (seam_array.empty())
807 best_state.truncate(0);
809 for (
int i = 0; i < best_choice->length(); ++i) {
810 int length = best_choice->state(i);
811 best_state.push_back(length);
816 TBLOB* blob = chopped_word->blobs[start];
817 rebuild_word->blobs.push_back(
new TBLOB(*blob));
829 if (rebuild_word != NULL)
831 rebuild_word =
new TWERD(*chopped_word);
833 int word_len = box_word->length();
834 best_state.reserve(word_len);
835 correct_text.reserve(word_len);
836 for (
int i = 0; i < word_len; ++i) {
837 best_state.push_back(1);
838 correct_text.push_back(
STRING(
""));
844 if (box_word != NULL)
846 rebuild_word->ComputeBoundingBoxes();
848 box_word->ClipToOriginalWord(denorm.block(), word);
854 best_choice->SetScriptPositions(small_caps, chopped_word);
861 raw_choice->SetAllScriptPositions(position);
862 WERD_CHOICE_IT wc_it(&best_choices);
863 for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
864 wc_it.data()->SetAllScriptPositions(position);
878 ratings =
new MATRIX(blob_count, 1);
879 for (
int c = 0; c < blob_count; ++c) {
880 BLOB_CHOICE_LIST* choice_list =
new BLOB_CHOICE_LIST;
881 BLOB_CHOICE_IT choice_it(choice_list);
882 choice_it.add_after_then_move(choices[c]);
883 ratings->put(c, c, choice_list);
886 reject_map.initialise(blob_count);
887 best_state.init_to_size(blob_count, 1);
894 int num_blobs = ratings->dimension();
897 for (
int b = 0; b < num_blobs; ++b) {
901 BLOB_CHOICE_LIST* choices = ratings->get(b, b);
902 if (choices != NULL && !choices->empty()) {
903 BLOB_CHOICE_IT bc_it(choices);
906 rating = choice->
rating();
912 LogNewRawChoice(word_choice);
914 LogNewCookedChoice(1,
false, word_choice);
919 correct_text.clear();
921 for (
int i = 0; i < best_choice->length(); ++i) {
922 UNICHAR_ID choice_id = best_choice->unichar_id(i);
923 const char* blob_choice = uch_set->id_to_unichar(choice_id);
924 correct_text.push_back(
STRING(blob_choice));
936 ASSERT_HOST(best_choice->length() == 0 || ratings != NULL);
937 bool modified =
false;
938 for (
int i = 0; i + 1 < best_choice->length(); ++i) {
939 UNICHAR_ID new_id = class_cb->
Run(best_choice->unichar_id(i),
940 best_choice->unichar_id(i+1));
941 if (new_id != INVALID_UNICHAR_ID &&
942 (box_cb == NULL || box_cb->
Run(box_word->BlobBox(i),
943 box_word->BlobBox(i + 1)))) {
945 best_choice->set_unichar_id(new_id, i);
947 MergeAdjacentBlobs(i);
948 const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
949 if (!coord.
Valid(*ratings)) {
950 ratings->IncreaseBandSize(coord.
row + 1 - coord.
col);
952 BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
957 BLOB_CHOICE_IT bc_it(blob_choices);
958 bc_it.add_before_then_move(blob_choice);
970 if (reject_map.length() == best_choice->length())
971 reject_map.remove_pos(index);
972 best_choice->remove_unichar_id(index + 1);
973 rebuild_word->MergeBlobs(index, index + 2);
974 box_word->MergeBoxes(index, index + 2);
975 if (index + 1 < best_state.length()) {
976 best_state[index] += best_state[index + 1];
977 best_state.remove(index + 1);
987 static int is_simple_quote(
const char* signed_str,
int length) {
988 const unsigned char* str =
989 reinterpret_cast<const unsigned char*
>(signed_str);
991 return (length == 1 && (*str ==
'\'' || *str ==
'`')) ||
993 (length == 3 && ((*str == 0xe2 &&
994 *(str + 1) == 0x80 &&
995 *(str + 2) == 0x98) ||
997 *(str + 1) == 0x80 &&
998 *(str + 2) == 0x99)));
1004 const char *ch = uch_set->id_to_unichar(id1);
1005 const char *next_ch = uch_set->id_to_unichar(id2);
1006 if (is_simple_quote(ch, strlen(ch)) &&
1007 is_simple_quote(next_ch, strlen(next_ch)))
1008 return uch_set->unichar_to_id(
"\"");
1009 return INVALID_UNICHAR_ID;
1014 if (!uch_set->contains_unichar(
"\"") ||
1015 !uch_set->get_enabled(uch_set->unichar_to_id(
"\"")))
1018 ConditionalBlobMerge(
1026 const char *ch = uch_set->id_to_unichar(id1);
1027 const char *next_ch = uch_set->id_to_unichar(id2);
1028 if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
1029 (*ch ==
'-' || *ch ==
'~') && (*next_ch ==
'-' || *next_ch ==
'~'))
1030 return uch_set->unichar_to_id(
"-");
1031 return INVALID_UNICHAR_ID;
1043 if (!uch_set->contains_unichar(
"-") ||
1044 !uch_set->get_enabled(uch_set->unichar_to_id(
"-")))
1047 ConditionalBlobMerge(
1055 if (id1 == id2 && id1 == uch_set->unichar_to_id(
" "))
1058 return INVALID_UNICHAR_ID;
1063 if (ConditionalBlobMerge(
1065 int len = best_choice->length();
1075 for (
int index = start; index < start + count - 1; ++index) {
1076 if (index >= 0 && index < seam_array.size()) {
1077 SEAM* seam = seam_array[index];
1078 if (seam != NULL && seam->
HasAnySplits())
return false;
1090 tess_failed =
FALSE;
1091 tess_accepted =
FALSE;
1092 tess_would_adapt =
FALSE;
1104 fontinfo_id_count = 0;
1105 fontinfo_id2_count = 0;
1108 baseline_shift = 0.0f;
1109 space_certainty = 0.0f;
1110 guessed_x_ht =
TRUE;
1111 guessed_caps_ht =
TRUE;
1112 combination =
FALSE;
1113 part_of_combo =
FALSE;
1114 reject_spaces =
FALSE;
1122 chopped_word = NULL;
1123 rebuild_word = NULL;
1129 blamer_bundle = NULL;
1133 if (word != NULL && combination) {
1137 delete blamer_bundle;
1138 blamer_bundle = NULL;
1146 fontinfo_id_count = 0;
1147 fontinfo_id2_count = 0;
1148 if (bln_boxes != NULL) {
1153 if (chopped_word != NULL) {
1154 delete chopped_word;
1155 chopped_word = NULL;
1157 if (rebuild_word != NULL) {
1158 delete rebuild_word;
1159 rebuild_word = NULL;
1161 if (box_word != NULL) {
1166 correct_text.clear();
1167 seam_array.delete_data_pointers();
1169 blob_widths.clear();
1173 if (blamer_bundle != NULL) blamer_bundle->ClearResults();
1177 if (raw_choice != NULL) {
1181 best_choices.clear();
1182 if (ep_choice != NULL) {
1188 if (ratings != NULL) {
1189 ratings->delete_matrix_pointers();
1197 return word_res == other.word_res &&
1198 row_res == other.row_res &&
1199 block_res == other.block_res;
1204 if (other.block_res == NULL) {
1206 if (block_res == NULL)
1210 if (block_res == NULL) {
1213 if (block_res == other.block_res) {
1214 if (other.row_res == NULL || row_res == NULL) {
1218 if (row_res == other.row_res) {
1220 ASSERT_HOST(other.word_res != NULL && word_res != NULL);
1221 if (word_res == other.word_res) {
1226 WERD_RES_IT word_res_it(&row_res->word_res_list);
1227 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
1228 word_res_it.forward()) {
1229 if (word_res_it.data() == word_res) {
1231 }
else if (word_res_it.data() == other.word_res) {
1235 ASSERT_HOST(
"Error: Incomparable PAGE_RES_ITs" == NULL);
1239 ROW_RES_IT row_res_it(&block_res->row_res_list);
1240 for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
1241 row_res_it.forward()) {
1242 if (row_res_it.data() == row_res) {
1244 }
else if (row_res_it.data() == other.row_res) {
1248 ASSERT_HOST(
"Error: Incomparable PAGE_RES_ITs" == NULL);
1252 BLOCK_RES_IT block_res_it(&page_res->block_res_list);
1253 for (block_res_it.mark_cycle_pt();
1254 !block_res_it.cycled_list(); block_res_it.forward()) {
1255 if (block_res_it.data() == block_res) {
1257 }
else if (block_res_it.data() == other.block_res) {
1262 ASSERT_HOST(
"Error: Incomparable PAGE_RES_ITs" == NULL);
1277 WERD_RES_IT wr_it(&row()->word_res_list);
1278 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1280 if (word == word_res)
1284 wr_it.add_before_then_move(new_res);
1285 if (wr_it.at_first()) {
1288 ResetWordIterator();
1296 static void ComputeBlobEnds(
const WERD_RES& word, C_BLOB_LIST* next_word_blobs,
1302 TBOX blob_box = blob_it.data()->bounding_box();
1304 for (
int b = 1; b < length; ++b) {
1305 blob_box += blob_it.data()->bounding_box();
1311 if (!blob_it.at_first() || next_word_blobs != NULL) {
1312 if (blob_it.at_first())
1313 blob_it.set_to_list(next_word_blobs);
1314 blob_end = (blob_box.
right() + blob_it.data()->bounding_box().left()) / 2;
1325 if (words->
empty()) {
1326 DeleteCurrentWord();
1332 (*words)[0]->word->set_flag(
W_BOL,
true);
1334 (*words)[0]->word->set_blanks(1);
1344 WERD_IT w_it(row()->row->word_list());
1346 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1347 WERD* word = w_it.data();
1348 if (word == input_word->
word)
1355 WERD_RES_IT wr_it(&row()->word_res_list);
1356 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1358 if (word == input_word)
1368 for (
int w = 0; w < words->
size(); ++w) {
1372 C_BLOB_LIST* next_word_blobs =
1373 w + 1 < words->
size() ? (*words)[w + 1]->word->cblob_list() : NULL;
1374 ComputeBlobEnds(*word_w, next_word_blobs, &blob_ends);
1380 for (
int i = 0; i < blob_ends.
size(); ++i) {
1381 int end_x = blob_ends[i];
1384 while (!src_b_it.empty() &&
1385 src_b_it.data()->bounding_box().x_middle() < end_x) {
1386 blob_box += src_b_it.data()->bounding_box();
1387 dest_it.add_after_then_move(src_b_it.extract());
1390 while (!rej_b_it.empty() &&
1391 rej_b_it.data()->bounding_box().x_middle() < end_x) {
1392 blob_box += rej_b_it.data()->bounding_box();
1393 dest_it.add_after_then_move(rej_b_it.extract());
1399 if (i > 0 && blob_box.
left() < blob_ends[i - 1])
1400 blob_box.
set_left(blob_ends[i - 1]);
1401 if (blob_box.
right() > end_x)
1403 box_word->InsertBox(i, blob_box);
1408 for (
int i = 0; i < box_word->length(); ++i) {
1409 TBOX box = box_word->BlobBox(i);
1413 for (dest_it.mark_cycle_pt(); !dest_it.cycled_list();
1414 dest_it.forward()) {
1415 TBOX blob_box = dest_it.data()->bounding_box();
1416 if (blob_box.
left() < blob_ends[i] &&
1417 (i == 0 || blob_box.
right() >= blob_ends[i - 1])) {
1418 if (i > 0 && blob_box.
left() < blob_ends[i - 1])
1419 blob_box.
set_left(blob_ends[i - 1]);
1420 if (blob_box.
right() > blob_ends[i])
1422 box_word->ChangeBox(i, blob_box);
1433 w_it.add_before_stay_put(word_w->
word);
1437 wr_it.add_before_stay_put(word_w);
1445 delete w_it.extract();
1446 delete wr_it.extract();
1447 ResetWordIterator();
1455 if (!word_res->combination) {
1459 WERD_IT w_it(row()->row->word_list());
1460 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1461 if (w_it.data() == word_res->word) {
1466 delete w_it.extract();
1470 WERD_RES_IT wr_it(&row()->word_res_list);
1471 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1472 if (wr_it.data() == word_res) {
1478 delete wr_it.extract();
1479 ResetWordIterator();
1485 WERD* real_word = word_res->word;
1488 if (word_res->combination) {
1491 WERD_RES_IT wr_it(&row()->word_res_list);
1492 for (wr_it.mark_cycle_pt();
1493 !wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
1497 real_word = wr_it.data()->word;
1512 block_res_it.set_to_list(&page_res->block_res_list);
1513 block_res_it.mark_cycle_pt();
1514 prev_block_res = NULL;
1515 prev_row_res = NULL;
1516 prev_word_res = NULL;
1520 next_block_res = NULL;
1521 next_row_res = NULL;
1522 next_word_res = NULL;
1523 internal_forward(
true, empty_ok);
1524 return internal_forward(
false, empty_ok);
1535 if (row_res == next_row_res) {
1538 word_res_it.move_to_first();
1539 for (word_res_it.mark_cycle_pt();
1540 !word_res_it.cycled_list() && word_res_it.data() != next_word_res;
1541 word_res_it.forward()) {
1542 if (!word_res_it.data()->part_of_combo) {
1543 if (prev_row_res == row_res) prev_word_res = word_res;
1544 word_res = word_res_it.data();
1548 word_res_it.forward();
1551 WERD_RES_IT wr_it(&row_res->word_res_list);
1552 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1553 if (!wr_it.data()->part_of_combo) {
1554 if (prev_row_res == row_res) prev_word_res = word_res;
1555 word_res = wr_it.data();
1576 WERD_RES *PAGE_RES_IT::internal_forward(
bool new_block,
bool empty_ok) {
1577 bool new_row =
false;
1579 prev_block_res = block_res;
1580 prev_row_res = row_res;
1581 prev_word_res = word_res;
1582 block_res = next_block_res;
1583 row_res = next_row_res;
1584 word_res = next_word_res;
1585 next_block_res = NULL;
1586 next_row_res = NULL;
1587 next_word_res = NULL;
1589 while (!block_res_it.cycled_list()) {
1592 row_res_it.set_to_list(&block_res_it.data()->row_res_list);
1593 row_res_it.mark_cycle_pt();
1594 if (row_res_it.empty() && empty_ok) {
1595 next_block_res = block_res_it.data();
1600 while (!row_res_it.cycled_list()) {
1603 word_res_it.set_to_list(&row_res_it.data()->word_res_list);
1604 word_res_it.mark_cycle_pt();
1607 while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo)
1608 word_res_it.forward();
1609 if (!word_res_it.cycled_list()) {
1610 next_block_res = block_res_it.data();
1611 next_row_res = row_res_it.data();
1612 next_word_res = word_res_it.data();
1613 word_res_it.forward();
1617 row_res_it.forward();
1621 block_res_it.forward();
1626 if (page_res != NULL && page_res->prev_word_best_choice != NULL) {
1627 *page_res->prev_word_best_choice =
1628 (new_block || prev_word_res == NULL) ? NULL : prev_word_res->
best_choice;
1640 if (!row)
return NULL;
1641 for (restart_page(); this->row() != row; forward()) {
1654 while (block_res == next_block_res &&
1655 (next_row_res != NULL && next_row_res->row != NULL &&
1656 row_res->row->para() == next_row_res->row->para())) {
1657 internal_forward(
false,
true);
1659 return internal_forward(
false,
true);
1669 while (block_res == next_block_res) {
1670 internal_forward(
false,
true);
1672 return internal_forward(
false,
true);
1676 inT16 chars_in_word;
1677 inT16 rejects_in_word = 0;
1679 chars_in_word = word_res->reject_map.length ();
1680 page_res->char_count += chars_in_word;
1681 block_res->char_count += chars_in_word;
1682 row_res->char_count += chars_in_word;
1684 rejects_in_word = word_res->reject_map.reject_count ();
1686 page_res->rej_count += rejects_in_word;
1687 block_res->rej_count += rejects_in_word;
1688 row_res->rej_count += rejects_in_word;
1689 if (chars_in_word == rejects_in_word)
1690 row_res->whole_word_rej_count += rejects_in_word;
void add_str_int(const char *str, int number)
void InitForRetryRecognition(const WERD_RES &source)
const double kMaxWordGapRatio
void ReplaceBestChoice(WERD_CHOICE *choice)
UNICHAR_ID unichar_id(int index) const
const double kMaxLineSizeRatio
bool PrepareToInsertSeam(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int insert_index, bool modify)
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
GenericVector< int > best_state
WERD_RES * forward_paragraph()
const FontInfo * fontinfo2
WERD_CHOICE * best_choice
const int kWordrecMaxNumJoinChunks
void SetAllScriptPositions(tesseract::ScriptPos position)
void SetScriptPositions()
BlamerBundle * blamer_bundle
static BoxWord * CopyFromNormalized(TWERD *tessword)
BLOCK_RES_LIST block_res_list
void ReplaceCurrentWord(tesseract::PointerVector< WERD_RES > *words)
GenericVector< STRING > correct_text
void set_flag(WERD_FLAGS mask, BOOL8 value)
WERD_RES & operator=(const WERD_RES &source)
void BestChoiceToCorrectText()
const char * string() const
int TotalOfStates() const
GenericVector< int > blob_widths
void CloneChoppedToRebuild()
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
void FilterWordChoices(int debug_level)
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
void MergeAdjacentBlobs(int index)
TBOX bounding_box() const
tesseract::Tesseract * tesseract
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
tesseract::BoxWord * box_word
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
BOOL8 flag(WERD_FLAGS mask) const
void copy_on(WERD_RES *word_res)
int cmp(const PAGE_RES_IT &other) const
WERD_CHOICE ** prev_word_best_choice
static int SortByXMiddle(const void *v1, const void *v2)
void InsertSeam(int blob_number, SEAM *seam)
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
void SetupWordScript(const UNICHARSET &unicharset_in)
WERD_RES * start_page(bool empty_ok)
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
WERD_CHOICE_LIST best_choices
const FontInfo * fontinfo
void FakeWordFromRatings(PermuterType permuter)
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
WERD_RES * forward_block()
void SetupFake(const UNICHARSET &uch)
void set_unichar_id(UNICHAR_ID newunichar_id)
void SetupBlobWidthsAndGaps()
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
CRUNCH_MODE unlv_crunch_mode
int state(int index) const
const STRING & unichar_string() const
tesseract::BoxWord * bln_boxes
bool operator==(const PAGE_RES_IT &other) const
POLY_BLOCK * poly_block() const
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
float adjust_factor() const
const double kMaxWordSizeRatio
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
C_BLOB_LIST * cblob_list()
void PrintBestChoices() const
BLOB_CHOICE * GetBlobChoice(int index) const
void make_bad()
Set the fields in this choice to be default (bad) values.
bool Valid(const MATRIX &m) const
void UpdateStateForSplit(int blob_position)
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
UNICHAR_ID unichar_id() const
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
C_BLOB_LIST * rej_cblob_list()
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
GenericVector< int > blob_gaps
void operator=(const ELIST_LINK &)
void ConsumeWordResults(WERD_RES *word)
#define ELISTIZE(CLASSNAME)
bool LogNewRawChoice(WERD_CHOICE *word_choice)
void DebugTopChoice(const char *msg) const
WERD_RES * InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word)
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
const UNICHARSET * uch_set
CLISTIZE(BLOCK_RES) ELISTIZE(ROW_RES) ELISTIZE(WERD_RES) static const double kStopperAmbiguityThresholdGain
void DebugWordChoices(bool debug, const char *word_to_debug)
TBOX bounding_box() const
void set_permuter(uinT8 perm)
ROW_LIST * row_list()
get rows
int GetBlobsWidth(int start_blob, int last_blob)
void MakeCurrentWordFuzzy()
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
bool script_has_xheight() const
void CopySimpleFields(const WERD_RES &source)
GenericVector< SEAM * > seam_array
bool PiecesAllNatural(int start, int count) const
bool HasAnySplits() const
int GetBlobsGap(int blob_index)