tesseract/a00158_source.html

 /******************************************************************
  * File:        superscript.cpp
  * Description: Correction pass to fix superscripts and subscripts.
  * Author:      David Eger
  * Created:     Mon Mar 12 14:05:00 PDT 2012
  *
  * (C) Copyright 2012, Google, Inc.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
  ** http://www.apache.org/licenses/LICENSE-2.0
  ** Unless required by applicable law or agreed to in writing, software
  ** distributed under the License is distributed on an "AS IS" BASIS,
  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ** See the License for the specific language governing permissions and
  ** limitations under the License.
  *
  **********************************************************************/

 #include "normalis.h"
 #include "tesseractclass.h"

 static int LeadingUnicharsToChopped(WERD_RES *word, int num_unichars) {
   int num_chopped = 0;
   for (int i = 0; i < num_unichars; i++)
     num_chopped += word->best_state[i];
   return num_chopped;
 }

 static int TrailingUnicharsToChopped(WERD_RES *word, int num_unichars) {
   int num_chopped = 0;
   for (int i = 0; i < num_unichars; i++)
     num_chopped += word->best_state[word->best_state.size() - 1 - i];
   return num_chopped;
 }


 namespace tesseract {

 void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index,
                     int super_y_bottom, int sub_y_top,
                     ScriptPos *leading_pos, int *num_leading_outliers,
                     ScriptPos *trailing_pos, int *num_trailing_outliers) {
   ScriptPos sp_unused1, sp_unused2;
   int unused1, unused2;
   if (!leading_pos) leading_pos = &sp_unused1;
   if (!num_leading_outliers) num_leading_outliers = &unused1;
   if (!trailing_pos) trailing_pos = &sp_unused2;
   if (!num_trailing_outliers) num_trailing_outliers = &unused2;

   *num_leading_outliers = *num_trailing_outliers = 0;
   *leading_pos = *trailing_pos = SP_NORMAL;

   int chopped_start = LeadingUnicharsToChopped(word, rebuilt_blob_index);
   int num_chopped_pieces = word->best_state[rebuilt_blob_index];
   ScriptPos last_pos = SP_NORMAL;
   int trailing_outliers = 0;
   for (int i = 0; i < num_chopped_pieces; i++) {
     TBOX box = word->chopped_word->blobs[chopped_start + i]->bounding_box();
     ScriptPos pos = SP_NORMAL;
     if (box.bottom() >= super_y_bottom) {
       pos = SP_SUPERSCRIPT;
     } else if (box.top() <= sub_y_top) {
       pos = SP_SUBSCRIPT;
     }
     if (pos == SP_NORMAL) {
       if (trailing_outliers == i) {
         *num_leading_outliers = trailing_outliers;
         *leading_pos = last_pos;
       }
       trailing_outliers = 0;
     } else {
       if (pos == last_pos) {
         trailing_outliers++;
       } else {
         trailing_outliers = 1;
       }
     }
     last_pos = pos;
   }
   *num_trailing_outliers = trailing_outliers;
   *trailing_pos = last_pos;
 }

 bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {
   if (word->tess_failed || word->word->flag(W_REP_CHAR) ||
       !word->best_choice) {
     return false;
   }
   int num_leading, num_trailing;
   ScriptPos sp_leading, sp_trailing;
   float leading_certainty, trailing_certainty;
   float avg_certainty, unlikely_threshold;

   // Calculate the number of whole suspicious characters at the edges.
   GetSubAndSuperscriptCandidates(
           word, &num_leading, &sp_leading, &leading_certainty,
           &num_trailing, &sp_trailing, &trailing_certainty,
           &avg_certainty, &unlikely_threshold);

   const char *leading_pos = sp_leading == SP_SUBSCRIPT ? "sub" : "super";
   const char *trailing_pos = sp_trailing == SP_SUBSCRIPT ? "sub" : "super";

   int num_blobs = word->best_choice->length();

   // Calculate the remainder (partial characters) at the edges.
   // This accounts for us having classified the best version of
   // a word as [speaker?'] when it was instead [speaker.^{21}]
   // (that is we accidentally thought the 2 was attached to the period).
   int num_remainder_leading = 0, num_remainder_trailing = 0;
   if (num_leading + num_trailing < num_blobs && unlikely_threshold < 0.0) {
     int super_y_bottom =
         kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;
     int sub_y_top =
         kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;
     int last_word_char = num_blobs - 1 - num_trailing;
     float last_char_certainty = word->best_choice->certainty(last_word_char);
     if (word->best_choice->unichar_id(last_word_char) != 0 &&
         last_char_certainty <= unlikely_threshold) {
       ScriptPos rpos;
       YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top,
                      NULL, NULL, &rpos, &num_remainder_trailing);
       if (num_trailing > 0 && rpos != sp_trailing) num_remainder_trailing = 0;
       if (num_remainder_trailing > 0 &&
           last_char_certainty < trailing_certainty) {
         trailing_certainty = last_char_certainty;
       }
     }
     bool another_blob_available = (num_remainder_trailing == 0) ||
         num_leading + num_trailing + 1 < num_blobs;
     int first_char_certainty = word->best_choice->certainty(num_leading);
     if (another_blob_available &&
         word->best_choice->unichar_id(num_leading) != 0 &&
         first_char_certainty <= unlikely_threshold) {
       ScriptPos lpos;
       YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top,
                      &lpos, &num_remainder_leading, NULL, NULL);
       if (num_leading > 0 && lpos != sp_leading) num_remainder_leading = 0;
       if (num_remainder_leading > 0 &&
           first_char_certainty < leading_certainty) {
         leading_certainty = first_char_certainty;
       }
     }
   }

   // If nothing to do, bail now.
   if (num_leading + num_trailing +
       num_remainder_leading + num_remainder_trailing == 0) {
     return false;
   }

   if (superscript_debug >= 1) {
     tprintf("Candidate for superscript detection: %s (",
             word->best_choice->unichar_string().string());
     if (num_leading || num_remainder_leading) {
       tprintf("%d.%d %s-leading ", num_leading, num_remainder_leading,
               leading_pos);
     }
     if (num_trailing || num_remainder_trailing) {
       tprintf("%d.%d %s-trailing ", num_trailing, num_remainder_trailing,
               trailing_pos);
     }
     tprintf(")\n");
   }
   if (superscript_debug >= 3) {
     word->best_choice->print();
   }
   if (superscript_debug >= 2) {
     tprintf(" Certainties -- Average: %.2f  Unlikely thresh: %.2f  ",
             avg_certainty, unlikely_threshold);
     if (num_leading)
       tprintf("Orig. leading (min): %.2f  ", leading_certainty);
     if (num_trailing)
       tprintf("Orig. trailing (min): %.2f  ", trailing_certainty);
     tprintf("\n");
   }

   // We've now calculated the number of rebuilt blobs we want to carve off.
   // However, split_word() works from TBLOBs in chopped_word, so we need to
   // convert to those.
   int num_chopped_leading =
       LeadingUnicharsToChopped(word, num_leading) + num_remainder_leading;
   int num_chopped_trailing =
       TrailingUnicharsToChopped(word, num_trailing) + num_remainder_trailing;

   int retry_leading = 0;
   int retry_trailing = 0;
   bool is_good = false;
   WERD_RES *revised = TrySuperscriptSplits(
       num_chopped_leading, leading_certainty, sp_leading,
       num_chopped_trailing, trailing_certainty, sp_trailing,
       word, &is_good, &retry_leading, &retry_trailing);
   if (is_good) {
     word->ConsumeWordResults(revised);
   } else if (retry_leading || retry_trailing) {
     int retry_chopped_leading =
         LeadingUnicharsToChopped(revised, retry_leading);
     int retry_chopped_trailing =
         TrailingUnicharsToChopped(revised, retry_trailing);
     WERD_RES *revised2 = TrySuperscriptSplits(
         retry_chopped_leading, leading_certainty, sp_leading,
         retry_chopped_trailing, trailing_certainty, sp_trailing,
         revised, &is_good, &retry_leading, &retry_trailing);
     if (is_good) {
       word->ConsumeWordResults(revised2);
     }
     delete revised2;
   }
   delete revised;
   return is_good;
 }

 void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word,
                                                int *num_rebuilt_leading,
                                                ScriptPos *leading_pos,
                                                float *leading_certainty,
                                                int *num_rebuilt_trailing,
                                                ScriptPos *trailing_pos,
                                                float *trailing_certainty,
                                                float *avg_certainty,
                                                float *unlikely_threshold) {
   *avg_certainty = *unlikely_threshold = 0.0f;
   *num_rebuilt_leading = *num_rebuilt_trailing = 0;
   *leading_certainty = *trailing_certainty = 0.0f;

   int super_y_bottom =
       kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;
   int sub_y_top =
       kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;

   // Step one: Get an average certainty for "normally placed" characters.

   // Counts here are of blobs in the rebuild_word / unichars in best_choice.
   *leading_pos = *trailing_pos = SP_NORMAL;
   int leading_outliers = 0;
   int trailing_outliers = 0;
   int num_normal = 0;
   float normal_certainty_total = 0.0f;
   float worst_normal_certainty = 0.0f;
   ScriptPos last_pos = SP_NORMAL;
   int num_blobs = word->rebuild_word->NumBlobs();
   for (int b = 0; b < num_blobs; ++b) {
     TBOX box = word->rebuild_word->blobs[b]->bounding_box();
     ScriptPos pos = SP_NORMAL;
     if (box.bottom() >= super_y_bottom) {
       pos = SP_SUPERSCRIPT;
     } else if (box.top() <= sub_y_top) {
       pos = SP_SUBSCRIPT;
     }
     if (pos == SP_NORMAL) {
       if (word->best_choice->unichar_id(b) != 0) {
         float char_certainty = word->best_choice->certainty(b);
         if (char_certainty < worst_normal_certainty) {
           worst_normal_certainty = char_certainty;
         }
         num_normal++;
         normal_certainty_total += char_certainty;
       }
       if (trailing_outliers == b) {
         leading_outliers = trailing_outliers;
         *leading_pos = last_pos;
       }
       trailing_outliers = 0;
     } else {
       if (last_pos == pos) {
         trailing_outliers++;
       } else {
         trailing_outliers = 1;
       }
     }
     last_pos = pos;
   }
   *trailing_pos = last_pos;
   if (num_normal >= 3) {  // throw out the worst as an outlier.
     num_normal--;
     normal_certainty_total -= worst_normal_certainty;
   }
   if (num_normal > 0) {
     *avg_certainty = normal_certainty_total / num_normal;
     *unlikely_threshold = superscript_worse_certainty * (*avg_certainty);
   }
   if (num_normal == 0 ||
       (leading_outliers == 0 && trailing_outliers == 0)) {
     return;
   }

   // Step two: Try to split off bits of the word that are both outliers
   //           and have much lower certainty than average
   // Calculate num_leading and leading_certainty.
   for (*leading_certainty = 0.0f, *num_rebuilt_leading = 0;
        *num_rebuilt_leading < leading_outliers;
        (*num_rebuilt_leading)++) {
     float char_certainty = word->best_choice->certainty(*num_rebuilt_leading);
     if (char_certainty > *unlikely_threshold) {
       break;
     }
     if (char_certainty < *leading_certainty) {
       *leading_certainty = char_certainty;
     }
   }

   // Calculate num_trailing and trailing_certainty.
   for (*trailing_certainty = 0.0f, *num_rebuilt_trailing = 0;
        *num_rebuilt_trailing < trailing_outliers;
        (*num_rebuilt_trailing)++) {
     int blob_idx = num_blobs - 1 - *num_rebuilt_trailing;
     float char_certainty = word->best_choice->certainty(blob_idx);
     if (char_certainty > *unlikely_threshold) {
       break;
     }
     if (char_certainty < *trailing_certainty) {
       *trailing_certainty = char_certainty;
     }
   }
 }


 WERD_RES *Tesseract::TrySuperscriptSplits(
     int num_chopped_leading, float leading_certainty, ScriptPos leading_pos,
     int num_chopped_trailing, float trailing_certainty,
     ScriptPos trailing_pos,
     WERD_RES *word,
     bool *is_good,
     int *retry_rebuild_leading, int *retry_rebuild_trailing) {
   int num_chopped = word->chopped_word->NumBlobs();

   *retry_rebuild_leading = *retry_rebuild_trailing = 0;

   // Chop apart the word into up to three pieces.

   BlamerBundle *bb0 = NULL;
   BlamerBundle *bb1 = NULL;
   WERD_RES *prefix = NULL;
   WERD_RES *core = NULL;
   WERD_RES *suffix = NULL;
   if (num_chopped_leading > 0) {
     prefix = new WERD_RES(*word);
     split_word(prefix, num_chopped_leading, &core, &bb0);
   } else {
     core = new WERD_RES(*word);
   }

   if (num_chopped_trailing > 0) {
     int split_pt = num_chopped - num_chopped_trailing - num_chopped_leading;
     split_word(core, split_pt, &suffix, &bb1);
   }

   //  Recognize the pieces in turn.
   int saved_cp_multiplier = classify_class_pruner_multiplier;
   int saved_im_multiplier = classify_integer_matcher_multiplier;
   if (prefix) {
     // Turn off Tesseract's y-position penalties for the leading superscript.
     classify_class_pruner_multiplier.set_value(0);
     classify_integer_matcher_multiplier.set_value(0);

     // Adjust our expectations about the baseline for this prefix.
     if (superscript_debug >= 3) {
       tprintf(" recognizing first %d chopped blobs\n", num_chopped_leading);
     }
     recog_word_recursive(prefix);
     if (superscript_debug >= 2) {
       tprintf(" The leading bits look like %s %s\n",
               ScriptPosToString(leading_pos),
               prefix->best_choice->unichar_string().string());
     }

     // Restore the normal y-position penalties.
     classify_class_pruner_multiplier.set_value(saved_cp_multiplier);
     classify_integer_matcher_multiplier.set_value(saved_im_multiplier);
   }

   if (superscript_debug >= 3) {
     tprintf(" recognizing middle %d chopped blobs\n",
             num_chopped - num_chopped_leading - num_chopped_trailing);
   }

   if (suffix) {
     // Turn off Tesseract's y-position penalties for the trailing superscript.
     classify_class_pruner_multiplier.set_value(0);
     classify_integer_matcher_multiplier.set_value(0);

     if (superscript_debug >= 3) {
       tprintf(" recognizing last %d chopped blobs\n", num_chopped_trailing);
     }
     recog_word_recursive(suffix);
     if (superscript_debug >= 2) {
       tprintf(" The trailing bits look like %s %s\n",
               ScriptPosToString(trailing_pos),
               suffix->best_choice->unichar_string().string());
     }

     // Restore the normal y-position penalties.
     classify_class_pruner_multiplier.set_value(saved_cp_multiplier);
     classify_integer_matcher_multiplier.set_value(saved_im_multiplier);
   }

   // Evaluate whether we think the results are believably better
   // than what we already had.
   bool good_prefix = !prefix || BelievableSuperscript(
       superscript_debug >= 1, *prefix,
       superscript_bettered_certainty * leading_certainty,
       retry_rebuild_leading, NULL);
   bool good_suffix = !suffix || BelievableSuperscript(
       superscript_debug >= 1, *suffix,
       superscript_bettered_certainty * trailing_certainty,
       NULL, retry_rebuild_trailing);

   *is_good = good_prefix && good_suffix;
   if (!*is_good && !*retry_rebuild_leading && !*retry_rebuild_trailing) {
     // None of it is any good. Quit now.
     delete core;
     delete prefix;
     delete suffix;
     return NULL;
   }
   recog_word_recursive(core);

   // Now paste the results together into core.
   if (suffix) {
     suffix->SetAllScriptPositions(trailing_pos);
     join_words(core, suffix, bb1);
   }
   if (prefix) {
     prefix->SetAllScriptPositions(leading_pos);
     join_words(prefix, core, bb0);
     core = prefix;
     prefix = NULL;
   }

   if (superscript_debug >= 1) {
     tprintf("%s superscript fix: %s\n", *is_good ? "ACCEPT" : "REJECT",
             core->best_choice->unichar_string().string());
   }
   return core;
 }


 bool Tesseract::BelievableSuperscript(bool debug,
                                       const WERD_RES &word,
                                       float certainty_threshold,
                                       int *left_ok,
                                       int *right_ok) const {
   int initial_ok_run_count = 0;
   int ok_run_count = 0;
   float worst_certainty = 0.0f;
   const WERD_CHOICE &wc = *word.best_choice;

   const UnicityTable<FontInfo>& fontinfo_table = get_fontinfo_table();
   for (int i = 0; i < wc.length(); i++) {
     TBLOB *blob = word.rebuild_word->blobs[i];
     UNICHAR_ID unichar_id = wc.unichar_id(i);
     float char_certainty = wc.certainty(i);
     bool bad_certainty = char_certainty < certainty_threshold;
     bool is_punc = wc.unicharset()->get_ispunctuation(unichar_id);
     bool is_italic = word.fontinfo && word.fontinfo->is_italic();
     BLOB_CHOICE *choice = word.GetBlobChoice(i);
     if (choice && fontinfo_table.size() > 0) {
       // Get better information from the specific choice, if available.
       int font_id1 = choice->fontinfo_id();
       bool font1_is_italic = font_id1 >= 0
           ? fontinfo_table.get(font_id1).is_italic() : false;
       int font_id2 = choice->fontinfo_id2();
       is_italic = font1_is_italic &&
           (font_id2 < 0 || fontinfo_table.get(font_id2).is_italic());
     }

     float height_fraction = 1.0f;
     float char_height = blob->bounding_box().height();
     float normal_height = char_height;
     if (wc.unicharset()->top_bottom_useful()) {
       int min_bot, max_bot, min_top, max_top;
       wc.unicharset()->get_top_bottom(unichar_id,
                                       &min_bot, &max_bot,
                                       &min_top, &max_top);
       float hi_height = max_top - max_bot;
       float lo_height = min_top - min_bot;
       normal_height = (hi_height + lo_height) / 2;
       if (normal_height >= kBlnXHeight) {
         // Only ding characters that we have decent information for because
         // they're supposed to be normal sized, not tiny specks or dashes.
         height_fraction = char_height / normal_height;
       }
     }
     bool bad_height = height_fraction < superscript_scaledown_ratio;

     if (debug) {
       if (is_italic) {
         tprintf(" Rejecting: superscript is italic.\n");
       }
       if (is_punc) {
         tprintf(" Rejecting: punctuation present.\n");
       }
       const char *char_str = wc.unicharset()->id_to_unichar(unichar_id);
       if (bad_certainty) {
         tprintf(" Rejecting: don't believe character %s with certainty %.2f "
                 "which is less than threshold %.2f\n", char_str,
                 char_certainty, certainty_threshold);
       }
       if (bad_height) {
         tprintf(" Rejecting: character %s seems too small @ %.2f versus "
                 "expected %.2f\n", char_str, char_height, normal_height);
       }
     }
     if (bad_certainty || bad_height || is_punc || is_italic) {
       if (ok_run_count == i) {
         initial_ok_run_count = ok_run_count;
       }
       ok_run_count = 0;
     } else {
       ok_run_count++;
     }
     if (char_certainty < worst_certainty) {
       worst_certainty = char_certainty;
     }
   }
   bool all_ok = ok_run_count == wc.length();
   if (all_ok && debug) {
     tprintf(" Accept: worst revised certainty is %.2f\n", worst_certainty);
   }
   if (!all_ok) {
     if (left_ok) *left_ok = initial_ok_run_count;
     if (right_ok) *right_ok = ok_run_count;
   }
   return all_ok;
 }


 }  // namespace tesseract
tesseract::Tesseract::split_word
void split_word(WERD_RES *word, int split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const
Definition: tfacepp.cpp:182

tesseract::Tesseract::TrySuperscriptSplits
WERD_RES * TrySuperscriptSplits(int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, int num_chopped_trailing, float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word, bool *is_good, int *retry_leading, int *retry_trailing)
Definition: superscript.cpp:382

WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313

WERD_CHOICE::print
void print() const
Definition: ratngs.h:578

UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:33

UNICHARSET::get_ispunctuation
bool get_ispunctuation(UNICHAR_ID unichar_id) const
Definition: unicharset.h:479

tesseract::Tesseract::superscript_min_y_bottom
double superscript_min_y_bottom
Definition: tesseractclass.h:1012

WERD_RES::tess_failed
BOOL8 tess_failed
Definition: pageres.h:272

WERD_RES::best_state
GenericVector< int > best_state
Definition: pageres.h:255

WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:219

BLOB_CHOICE::fontinfo_id2
inT16 fontinfo_id2() const
Definition: ratngs.h:88

WERD_RES::SetAllScriptPositions
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: pageres.cpp:860

WERD_CHOICE::length
int length() const
Definition: ratngs.h:301

tesseract::Tesseract::subscript_max_y_top
double subscript_max_y_top
Definition: tesseractclass.h:1008

tesseractclass.h

UnicityTable::size
int size() const
Return the size used.
Definition: unicity_table.h:119

tesseract::SP_SUBSCRIPT
Definition: ratngs.h:262

kBlnXHeight
const int kBlnXHeight
Definition: normalis.h:28

tesseract::Classify::classify_integer_matcher_multiplier
int classify_integer_matcher_multiplier
Definition: classify.h:468

WERD_RES::rebuild_word
TWERD * rebuild_word
Definition: pageres.h:244

tprintf
#define tprintf(...)
Definition: tprintf.h:31

tesseract::Tesseract::superscript_worse_certainty
double superscript_worse_certainty
Definition: tesseractclass.h:996

STRING::string
const char * string() const
Definition: strngs.cpp:198

kBlnBaselineOffset
const int kBlnBaselineOffset
Definition: normalis.h:29

tesseract::Classify::get_fontinfo_table
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:344

GenericVector::size
int size() const
Definition: genericvector.h:72

tesseract
Definition: baseapi.cpp:82

WERD::flag
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128

UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266

BLOB_CHOICE
Definition: ratngs.h:48

tesseract::SP_NORMAL
Definition: ratngs.h:261

tesseract::Classify::classify_class_pruner_multiplier
int classify_class_pruner_multiplier
Definition: classify.h:464

BLOB_CHOICE::fontinfo_id
inT16 fontinfo_id() const
Definition: ratngs.h:85

normalis.h

tesseract::YOutlierPieces
void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, int super_y_bottom, int sub_y_top, ScriptPos *leading_pos, int *num_leading_outliers, ScriptPos *trailing_pos, int *num_trailing_outliers)
Definition: superscript.cpp:46

UNICHARSET::get_top_bottom
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:528

WERD_RES::fontinfo
const FontInfo * fontinfo
Definition: pageres.h:288

WERD_RES
Definition: pageres.h:155

UnicityTable
Definition: fontinfo.h:28

WERD_CHOICE::certainty
float certainty() const
Definition: ratngs.h:328

tesseract::ScriptPosToString
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:180

tesseract::Tesseract::GetSubAndSuperscriptCandidates
void GetSubAndSuperscriptCandidates(const WERD_RES *word, int *num_rebuilt_leading, ScriptPos *leading_pos, float *leading_certainty, int *num_rebuilt_trailing, ScriptPos *trailing_pos, float *trailing_certainty, float *avg_certainty, float *unlikely_threshold)
Definition: superscript.cpp:253

TWERD::NumBlobs
int NumBlobs() const
Definition: blobs.h:425

TBOX::top
inT16 top() const
Definition: rect.h:54

WERD_CHOICE::unicharset
const UNICHARSET * unicharset() const
Definition: ratngs.h:298

tesseract::Tesseract::BelievableSuperscript
bool BelievableSuperscript(bool debug, const WERD_RES &word, float certainty_threshold, int *left_ok, int *right_ok) const
Definition: superscript.cpp:520

WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:539

TBOX
Definition: rect.h:30

TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:436

tesseract::Tesseract::join_words
void join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const
Definition: tfacepp.cpp:240

TBLOB
Definition: blobs.h:261

TBOX::height
inT16 height() const
Definition: rect.h:104

tesseract::Tesseract::superscript_scaledown_ratio
double superscript_scaledown_ratio
Definition: tesseractclass.h:1004

tesseract::Tesseract::recog_word_recursive
void recog_word_recursive(WERD_RES *word)
Definition: tfacepp.cpp:110

WERD_RES::GetBlobChoice
BLOB_CHOICE * GetBlobChoice(int index) const
Definition: pageres.cpp:742

WERD_RES::word
WERD * word
Definition: pageres.h:175

W_REP_CHAR
Definition: werd.h:41

tesseract::SP_SUPERSCRIPT
Definition: ratngs.h:263

tesseract::FontInfo::is_italic
bool is_italic() const
Definition: fontinfo.h:111

tesseract::Tesseract::superscript_bettered_certainty
double superscript_bettered_certainty
Definition: tesseractclass.h:1000

TBOX::bottom
inT16 bottom() const
Definition: rect.h:61

WERD_RES::ConsumeWordResults
void ConsumeWordResults(WERD_RES *word)
Definition: pageres.cpp:757

WERD_CHOICE
Definition: ratngs.h:271

tesseract::Tesseract::superscript_debug
int superscript_debug
Definition: tesseractclass.h:993

UnicityTable::get
const T & get(int id) const
Return the object from an id.
Definition: unicity_table.h:132

TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:482

WERD_RES::chopped_word
TWERD * chopped_word
Definition: pageres.h:201

tesseract::Tesseract::SubAndSuperscriptFix
bool SubAndSuperscriptFix(WERD_RES *word_res)
Definition: superscript.cpp:101

UNICHARSET::top_bottom_useful
bool top_bottom_useful() const
Definition: unicharset.h:497

tesseract::ScriptPos
ScriptPos
Definition: ratngs.h:260

BlamerBundle
Definition: blamer.h:88