tesseract/a00101_source.html

 /******************************************************************
  * File:        output.cpp  (Formerly output.c)
  * Description: Output pass
  * Author:          Phil Cheatle
  * Created:         Thu Aug  4 10:56:08 BST 1994
  *
  * (C) Copyright 1994, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
  ** http://www.apache.org/licenses/LICENSE-2.0
  ** Unless required by applicable law or agreed to in writing, software
  ** distributed under the License is distributed on an "AS IS" BASIS,
  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ** See the License for the specific language governing permissions and
  ** limitations under the License.
  *
  **********************************************************************/

 #ifdef _MSC_VER
 #pragma warning(disable:4244)  // Conversion warnings
 #endif

 #include <string.h>
 #include <ctype.h>
 #ifdef __UNIX__
 #include          <assert.h>
 #include          <unistd.h>
 #include          <errno.h>
 #endif
 #include "helpers.h"
 #include "tessvars.h"
 #include "control.h"
 #include "reject.h"
 #include "docqual.h"
 #include "output.h"
 #include "globals.h"
 #include "tesseractclass.h"

 #define EPAPER_EXT      ".ep"
 #define PAGE_YSIZE      3508
 #define CTRL_INSET      '\024'   //dc4=text inset
 #define CTRL_FONT       '\016'   //so=font change
 #define CTRL_DEFAULT      '\017' //si=default font
 #define CTRL_SHIFT      '\022'   //dc2=x shift
 #define CTRL_TAB        '\011'   //tab
 #define CTRL_NEWLINE      '\012' //newline
 #define CTRL_HARDLINE   '\015'   //cr

 /**********************************************************************
  * pixels_to_pts
  *
  * Convert an integer number of pixels to the nearest integer
  * number of points.
  **********************************************************************/

 inT32 pixels_to_pts(               //convert coords
                     inT32 pixels,
                     inT32 pix_res  //resolution
                    ) {
   float pts;                     //converted value

   pts = pixels * 72.0 / pix_res;
   return (inT32) (pts + 0.5);    //round it
 }

 namespace tesseract {
 void Tesseract::output_pass(  //Tess output pass //send to api
                             PAGE_RES_IT &page_res_it,
                             const TBOX *target_word_box) {
   BLOCK_RES *block_of_last_word;
   BOOL8 force_eol;               //During output
   BLOCK *nextblock;              //block of next word
   WERD *nextword;                //next word

   page_res_it.restart_page ();
   block_of_last_word = NULL;
   while (page_res_it.word () != NULL) {
     check_debug_pt (page_res_it.word (), 120);

     if (target_word_box) {
       TBOX current_word_box = page_res_it.word()->word->bounding_box();
       FCOORD center_pt(
           (current_word_box.right() + current_word_box.left()) / 2,
           (current_word_box.bottom() + current_word_box.top()) / 2);
       if (!target_word_box->contains(center_pt)) {
         page_res_it.forward();
         continue;
       }
     }
     if (tessedit_write_block_separators &&
     block_of_last_word != page_res_it.block ()) {
       block_of_last_word = page_res_it.block ();
     }

     force_eol = (tessedit_write_block_separators &&
       (page_res_it.block () != page_res_it.next_block ())) ||
       (page_res_it.next_word () == NULL);

     if (page_res_it.next_word () != NULL)
       nextword = page_res_it.next_word ()->word;
     else
       nextword = NULL;
     if (page_res_it.next_block () != NULL)
       nextblock = page_res_it.next_block ()->block;
     else
       nextblock = NULL;
                                  //regardless of tilde crunching
     write_results(page_res_it,
                   determine_newline_type(page_res_it.word()->word,
                                          page_res_it.block()->block,
                                          nextword, nextblock), force_eol);
     page_res_it.forward();
   }
 }


 /*************************************************************************
  * write_results()
  *
  * All recognition and rejection has now been done. Generate the following:
  *   .txt file     - giving the final best choices with NO highlighting
  *   .raw file     - giving the tesseract top choice output for each word
  *   .map file     - showing how the .txt file has been rejected in the .ep file
  *   epchoice list - a list of one element per word, containing the text for the
  *                   epaper. Reject strings are inserted.
  *   inset list    - a list of bounding boxes of reject insets - indexed by the
  *                   reject strings in the epchoice text.
  *************************************************************************/
 void Tesseract::write_results(PAGE_RES_IT &page_res_it,
                               char newline_type,  // type of newline
                               BOOL8 force_eol) {  // override tilde crunch?
   WERD_RES *word = page_res_it.word();
   const UNICHARSET &uchset = *word->uch_set;
   int i;
   BOOL8 need_reject = FALSE;
   UNICHAR_ID space = uchset.unichar_to_id(" ");

   if ((word->unlv_crunch_mode != CR_NONE ||
        word->best_choice->length() == 0) &&
       !tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
     if ((word->unlv_crunch_mode != CR_DELETE) &&
         (!stats_.tilde_crunch_written ||
          ((word->unlv_crunch_mode == CR_KEEP_SPACE) &&
           (word->word->space () > 0) &&
           !word->word->flag (W_FUZZY_NON) &&
           !word->word->flag (W_FUZZY_SP)))) {
       if (!word->word->flag (W_BOL) &&
           (word->word->space () > 0) &&
           !word->word->flag (W_FUZZY_NON) &&
           !word->word->flag (W_FUZZY_SP)) {
         stats_.last_char_was_tilde = false;
       }
       need_reject = TRUE;
     }
     if ((need_reject && !stats_.last_char_was_tilde) ||
         (force_eol && stats_.write_results_empty_block)) {
       /* Write a reject char - mark as rejected unless zero_rejection mode */
       stats_.last_char_was_tilde = TRUE;
       stats_.tilde_crunch_written = true;
       stats_.last_char_was_newline = false;
       stats_.write_results_empty_block = false;
     }

     if ((word->word->flag (W_EOL) && !stats_.last_char_was_newline) || force_eol) {
       stats_.tilde_crunch_written = false;
       stats_.last_char_was_newline = true;
       stats_.last_char_was_tilde = false;
     }

     if (force_eol)
       stats_.write_results_empty_block = true;
     return;
   }

   /* NORMAL PROCESSING of non tilde crunched words */

   stats_.tilde_crunch_written = false;
   if (newline_type)
     stats_.last_char_was_newline = true;
   else
     stats_.last_char_was_newline = false;
   stats_.write_results_empty_block = force_eol;  // about to write a real word

   if (unlv_tilde_crunching &&
       stats_.last_char_was_tilde &&
       (word->word->space() == 0) &&
       !(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) &&
       (word->best_choice->unichar_id(0) == space)) {
     /* Prevent adjacent tilde across words - we know that adjacent tildes within
        words have been removed */
     word->MergeAdjacentBlobs(0);
   }
   if (newline_type ||
     (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes))
     stats_.last_char_was_tilde = false;
   else {
     if (word->reject_map.length () > 0) {
       if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)
         stats_.last_char_was_tilde = true;
       else
         stats_.last_char_was_tilde = false;
     }
     else if (word->word->space () > 0)
       stats_.last_char_was_tilde = false;
     /* else it is unchanged as there are no output chars */
   }

   ASSERT_HOST (word->best_choice->length() == word->reject_map.length());

   set_unlv_suspects(word);
   check_debug_pt (word, 120);
   if (tessedit_rejection_debug) {
     tprintf ("Dict word: \"%s\": %d\n",
              word->best_choice->debug_string().string(),
              dict_word(*(word->best_choice)));
   }
   if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) {
     if (tessedit_zero_rejection) {
       /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
       for (i = 0; i < word->best_choice->length(); ++i) {
         if (word->reject_map[i].rejected())
           word->reject_map[i].setrej_minimal_rej_accept();
       }
     }
     if (tessedit_minimal_rejection) {
       /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
       for (i = 0; i < word->best_choice->length(); ++i) {
         if ((word->best_choice->unichar_id(i) != space) &&
             word->reject_map[i].rejected())
           word->reject_map[i].setrej_minimal_rej_accept();
       }
     }
   }
 }
 }  // namespace tesseract

 /**********************************************************************
  * determine_newline_type
  *
  * Find whether we have a wrapping or hard newline.
  * Return FALSE if not at end of line.
  **********************************************************************/

 char determine_newline_type(                   //test line ends
                             WERD *word,        //word to do
                             BLOCK *block,      //current block
                             WERD *next_word,   //next word
                             BLOCK *next_block  //block of next word
                            ) {
   inT16 end_gap;                 //to right edge
   inT16 width;                   //of next word
   TBOX word_box;                  //bounding
   TBOX next_box;                  //next word
   TBOX block_box;                 //block bounding

   if (!word->flag (W_EOL))
     return FALSE;                //not end of line
   if (next_word == NULL || next_block == NULL || block != next_block)
     return CTRL_NEWLINE;
   if (next_word->space () > 0)
     return CTRL_HARDLINE;        //it is tabbed
   word_box = word->bounding_box ();
   next_box = next_word->bounding_box ();
   block_box = block->bounding_box ();
                                  //gap to eol
   end_gap = block_box.right () - word_box.right ();
   end_gap -= (inT32) block->space ();
   width = next_box.right () - next_box.left ();
   //      tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",
   //              block_box.right(),word_box.right(),end_gap,
   //              next_box.right(),next_box.left(),width,
   //              end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE);
   return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;
 }

 /*************************************************************************
  * get_rep_char()
  * Return the first accepted character from the repetition string. This is the
  * character which is repeated - as determined earlier by fix_rep_char()
  *************************************************************************/
 namespace tesseract {
 UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) {  // what char is repeated?
   int i;
   for (i = 0; ((i < word->reject_map.length()) &&
                (word->reject_map[i].rejected())); ++i);

   if (i < word->reject_map.length()) {
     return word->best_choice->unichar_id(i);
   } else {
     return word->uch_set->unichar_to_id(unrecognised_char.string());
   }
 }

 /*************************************************************************
  * SUSPECT LEVELS
  *
  * 0 - don't reject ANYTHING
  * 1,2 - partial rejection
  * 3 - BEST
  *
  * NOTE: to reject JUST tess failures in the .map file set suspect_level 3 and
  * tessedit_minimal_rejection.
  *************************************************************************/
 void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
   int len = word_res->reject_map.length();
   const WERD_CHOICE &word = *(word_res->best_choice);
   const UNICHARSET &uchset = *word.unicharset();
   int i;
   float rating_per_ch;

   if (suspect_level == 0) {
     for (i = 0; i < len; i++) {
       if (word_res->reject_map[i].rejected())
         word_res->reject_map[i].setrej_minimal_rej_accept();
     }
     return;
   }

   if (suspect_level >= 3)
     return;                      //Use defaults

   /* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/

   if (safe_dict_word(word_res) &&
       (count_alphas(word) > suspect_short_words)) {
     /* Unreject alphas in dictionary words */
     for (i = 0; i < len; ++i) {
       if (word_res->reject_map[i].rejected() &&
           uchset.get_isalpha(word.unichar_id(i)))
         word_res->reject_map[i].setrej_minimal_rej_accept();
     }
   }

   rating_per_ch = word.rating() / word_res->reject_map.length();

   if (rating_per_ch >= suspect_rating_per_ch)
     return;  // Don't touch bad ratings

   if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
     /* Unreject any Tess Acceptable word - but NOT tess reject chs*/
     for (i = 0; i < len; ++i) {
       if (word_res->reject_map[i].rejected() &&
           (!uchset.eq(word.unichar_id(i), " ")))
         word_res->reject_map[i].setrej_minimal_rej_accept();
     }
   }

   for (i = 0; i < len; i++) {
     if (word_res->reject_map[i].rejected()) {
       if (word_res->reject_map[i].flag(R_DOC_REJ))
         word_res->reject_map[i].setrej_minimal_rej_accept();
       if (word_res->reject_map[i].flag(R_BLOCK_REJ))
         word_res->reject_map[i].setrej_minimal_rej_accept();
       if (word_res->reject_map[i].flag(R_ROW_REJ))
         word_res->reject_map[i].setrej_minimal_rej_accept();
     }
   }

   if (suspect_level == 2)
     return;

   if (!suspect_constrain_1Il ||
       (word_res->reject_map.length() <= suspect_short_words)) {
     for (i = 0; i < len; i++) {
       if (word_res->reject_map[i].rejected()) {
         if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
           word_res->reject_map[i].flag(R_POSTNN_1IL)))
           word_res->reject_map[i].setrej_minimal_rej_accept();

         if (!suspect_constrain_1Il &&
           word_res->reject_map[i].flag(R_MM_REJECT))
           word_res->reject_map[i].setrej_minimal_rej_accept();
       }
     }
   }

   if (acceptable_word_string(*word_res->uch_set,
                              word.unichar_string().string(),
                              word.unichar_lengths().string()) !=
                                  AC_UNACCEPTABLE ||
       acceptable_number_string(word.unichar_string().string(),
                                word.unichar_lengths().string())) {
     if (word_res->reject_map.length() > suspect_short_words) {
       for (i = 0; i < len; i++) {
         if (word_res->reject_map[i].rejected() &&
           (!word_res->reject_map[i].perm_rejected() ||
            word_res->reject_map[i].flag (R_1IL_CONFLICT) ||
            word_res->reject_map[i].flag (R_POSTNN_1IL) ||
            word_res->reject_map[i].flag (R_MM_REJECT))) {
           word_res->reject_map[i].setrej_minimal_rej_accept();
         }
       }
     }
   }
 }

 inT16 Tesseract::count_alphas(const WERD_CHOICE &word) {
   int count = 0;
   for (int i = 0; i < word.length(); ++i) {
     if (word.unicharset()->get_isalpha(word.unichar_id(i)))
       count++;
   }
   return count;
 }


 inT16 Tesseract::count_alphanums(const WERD_CHOICE &word) {
   int count = 0;
   for (int i = 0; i < word.length(); ++i) {
     if (word.unicharset()->get_isalpha(word.unichar_id(i)) ||
         word.unicharset()->get_isdigit(word.unichar_id(i)))
       count++;
   }
   return count;
 }


 BOOL8 Tesseract::acceptable_number_string(const char *s,
                                           const char *lengths) {
   BOOL8 prev_digit = FALSE;

   if (*lengths == 1 && *s == '(')
     s++;

   if (*lengths == 1 &&
       ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-')))
     s++;

   for (; *s != '\0'; s += *(lengths++)) {
     if (unicharset.get_isdigit(s, *lengths))
       prev_digit = TRUE;
     else if (prev_digit &&
              (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-'))))
       prev_digit = FALSE;
     else if (prev_digit && *lengths == 1 &&
              (*(s + *lengths) == '\0') && ((*s == '%') || (*s == ')')))
       return TRUE;
     else if (prev_digit &&
              *lengths == 1 && (*s == '%') &&
              (*(lengths + 1) == 1 && *(s + *lengths) == ')') &&
              (*(s + *lengths + *(lengths + 1)) == '\0'))
       return TRUE;
     else
       return FALSE;
   }
   return TRUE;
 }
 }  // namespace tesseract
tesseract::Tesseract::suspect_short_words
int suspect_short_words
Definition: tesseractclass.h:1029

PAGE_RES_IT
Definition: pageres.h:659

helpers.h

BLOCK_RES
Definition: pageres.h:97

CTRL_HARDLINE
#define CTRL_HARDLINE
Definition: output.cpp:48

tesseract::Tesseract::count_alphas
inT16 count_alphas(const WERD_CHOICE &word)
Definition: output.cpp:398

tesseract::Tesseract::output_pass
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box)
Definition: output.cpp:68

tesseract::Tesseract::get_rep_char
UNICHAR_ID get_rep_char(WERD_RES *word)
Definition: output.cpp:283

UNICHARSET
Definition: unicharset.h:139

PAGE_RES_IT::next_word
WERD_RES * next_word() const
Definition: pageres.h:745

R_DOC_REJ
Definition: rejctmap.h:79

AC_UNACCEPTABLE
Unacceptable word.
Definition: control.h:36

TRUE
#define TRUE
Definition: capi.h:45

WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313

FCOORD
Definition: points.h:189

tesseract::Tesseract::tessedit_zero_rejection
bool tessedit_zero_rejection
Definition: tesseractclass.h:1034

inT32
int32_t inT32
Definition: host.h:38

UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:33

WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:219

PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59

WERD_CHOICE::length
int length() const
Definition: ratngs.h:301

WERD_CHOICE::unichar_lengths
const STRING & unichar_lengths() const
Definition: ratngs.h:546

R_MM_REJECT
Definition: rejctmap.h:59

control.h

tesseract::Tesseract::tessedit_write_rep_codes
bool tessedit_write_rep_codes
Definition: tesseractclass.h:1016

tesseract::TesseractStats::tilde_crunch_written
bool tilde_crunch_written
Definition: tesseractclass.h:131

W_FUZZY_NON
Definition: werd.h:43

tesseract::Tesseract::tessedit_minimal_rejection
bool tessedit_minimal_rejection
Definition: tesseractclass.h:1033

tesseractclass.h

R_BLOCK_REJ
Definition: rejctmap.h:80

W_EOL
Definition: werd.h:36

pixels_to_pts
inT32 pixels_to_pts(inT32 pixels, inT32 pix_res)
Definition: output.cpp:57

WERD_CHOICE::debug_string
const STRING debug_string() const
Definition: ratngs.h:503

tesseract::Tesseract::check_debug_pt
BOOL8 check_debug_pt(WERD_RES *word, int location)
Definition: control.cpp:1794

tprintf
#define tprintf(...)
Definition: tprintf.h:31

BLOCK_RES::block
BLOCK * block
Definition: pageres.h:99

STRING::string
const char * string() const
Definition: strngs.cpp:198

tesseract::Tesseract::set_unlv_suspects
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:305

tessvars.h

tesseract::TesseractStats::last_char_was_newline
bool last_char_was_newline
Definition: tesseractclass.h:132

WERD_RES::MergeAdjacentBlobs
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:969

tesseract
Definition: baseapi.cpp:82

WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:160

inT16
int16_t inT16
Definition: host.h:36

R_ROW_REJ
Definition: rejctmap.h:81

ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:84

WERD::flag
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128

TBOX::left
inT16 left() const
Definition: rect.h:68

REJMAP::length
inT32 length() const
Definition: rejctmap.h:235

PAGE_RES_IT::restart_page
WERD_RES * restart_page()
Definition: pageres.h:683

tesseract::Tesseract::safe_dict_word
inT16 safe_dict_word(const WERD_RES *werd_res)
Definition: reject.cpp:607

tesseract::Tesseract::suspect_level
int suspect_level
Definition: tesseractclass.h:1026

tesseract::TesseractStats::last_char_was_tilde
bool last_char_was_tilde
Definition: tesseractclass.h:133

UNICHARSET::get_isalpha
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:451

tesseract::Tesseract::tessedit_rejection_debug
bool tessedit_rejection_debug
Definition: tesseractclass.h:1041

W_FUZZY_SP
Definition: werd.h:42

PAGE_RES_IT::forward
WERD_RES * forward()
Definition: pageres.h:716

UNICHARSET::get_isdigit
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:472

tesseract::Tesseract::tessedit_word_for_word
bool tessedit_word_for_word
Definition: tesseractclass.h:1036

R_POSTNN_1IL
Definition: rejctmap.h:57

tesseract::Tesseract::unlv_tilde_crunching
bool unlv_tilde_crunching
Definition: tesseractclass.h:947

BOOL8
unsigned char BOOL8
Definition: host.h:44

tesseract::Tesseract::tessedit_zero_kelvin_rejection
bool tessedit_zero_kelvin_rejection
Definition: tesseractclass.h:1038

FALSE
#define FALSE
Definition: capi.h:46

CR_DELETE
Definition: pageres.h:150

CR_NONE
Definition: pageres.h:147

tesseract::Tesseract::count_alphanums
inT16 count_alphanums(const WERD_CHOICE &word)
Definition: output.cpp:408

tesseract::Tesseract::tessedit_write_block_separators
bool tessedit_write_block_separators
Definition: tesseractclass.h:1014

WERD_RES::tess_accepted
BOOL8 tess_accepted
Definition: pageres.h:280

tesseract::Tesseract::write_results
void write_results(PAGE_RES_IT &page_res_it, char newline_type, BOOL8 force_eol)
Definition: output.cpp:130

TBOX::contains
bool contains(const FCOORD pt) const
Definition: rect.h:323

WERD_RES
Definition: pageres.h:155

tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:68

tesseract::Tesseract::unrecognised_char
char * unrecognised_char
Definition: tesseractclass.h:1025

W_BOL
Definition: werd.h:35

reject.h

WERD_RES::unlv_crunch_mode
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294

TBOX::top
inT16 top() const
Definition: rect.h:54

PAGE_RES_IT::next_block
BLOCK_RES * next_block() const
Definition: pageres.h:751

WERD_CHOICE::unicharset
const UNICHARSET * unicharset() const
Definition: ratngs.h:298

tesseract::Tesseract::acceptable_number_string
BOOL8 acceptable_number_string(const char *s, const char *lengths)
Definition: output.cpp:419

WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:539

TBOX
Definition: rect.h:30

tesseract::Tesseract::suspect_accept_rating
double suspect_accept_rating
Definition: tesseractclass.h:1032

tesseract::Tesseract::acceptable_word_string
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, const char *lengths)
Definition: control.cpp:1690

CTRL_NEWLINE
#define CTRL_NEWLINE
Definition: output.cpp:47

UNICHARSET::eq
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:656

WERD_RES::word
WERD * word
Definition: pageres.h:175

TBOX::right
inT16 right() const
Definition: rect.h:75

W_REP_CHAR
Definition: werd.h:41

CR_KEEP_SPACE
Definition: pageres.h:148

tesseract::Wordrec::dict_word
int dict_word(const WERD_CHOICE &word)
Definition: tface.cpp:128

tesseract::Tesseract::suspect_constrain_1Il
bool suspect_constrain_1Il
Definition: tesseractclass.h:1030

globals.h

PAGE_RES_IT::word
WERD_RES * word() const
Definition: pageres.h:736

TBOX::bottom
inT16 bottom() const
Definition: rect.h:61

WERD_CHOICE
Definition: ratngs.h:271

tesseract::Tesseract::suspect_rating_per_ch
double suspect_rating_per_ch
Definition: tesseractclass.h:1031

docqual.h

WERD_RES::uch_set
const UNICHARSET * uch_set
Definition: pageres.h:192

UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194

WERD
Definition: werd.h:60

WERD_RES::reject_map
REJMAP reject_map
Definition: pageres.h:271

determine_newline_type
char determine_newline_type(WERD *word, BLOCK *block, WERD *next_word, BLOCK *next_block)
Definition: output.cpp:245

WERD::space
uinT8 space()
Definition: werd.h:104

count
int count(LIST var_list)
Definition: oldlist.cpp:103

output.h

BLOCK::space
inT16 space() const
return spacing
Definition: ocrblock.h:102

tesseract::TesseractStats::write_results_empty_block
bool write_results_empty_block
Definition: tesseractclass.h:134

BLOCK
Definition: ocrblock.h:30

R_1IL_CONFLICT
Definition: rejctmap.h:56

PAGE_RES_IT::block
BLOCK_RES * block() const
Definition: pageres.h:742

WERD_CHOICE::rating
float rating() const
Definition: ratngs.h:325