tesseract  4.00.00dev
tesseract::Textord Class Reference

#include <textord.h>

Public Member Functions

 Textord (CCStruct *ccstruct)
 
 ~Textord ()
 
void TextordPage (PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res)
 
bool use_cjk_fp_model () const
 
void set_use_cjk_fp_model (bool flag)
 
void to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks)
 
ROWmake_prop_words (TO_ROW *row, FCOORD rotation)
 
ROWmake_blob_words (TO_ROW *row, FCOORD rotation)
 
void find_components (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
 
void compute_block_xheight (TO_BLOCK *block, float gradient)
 
void make_spline_rows (TO_BLOCK *block, float gradient, BOOL8 testing_on)
 
compute_row_xheight

Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time. Set xheigh_evidence to the number of blobs with the chosen xheight that appear in this row.

void compute_row_xheight (TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
 

Public Attributes

bool textord_single_height_mode = false
 
bool tosp_old_to_method = false
 
bool tosp_old_to_constrain_sp_kn = false
 
bool tosp_only_use_prop_rows = true
 
bool tosp_force_wordbreak_on_punct = false
 
bool tosp_use_pre_chopping = false
 
bool tosp_old_to_bug_fix = false
 
bool tosp_block_use_cert_spaces = true
 
bool tosp_row_use_cert_spaces = true
 
bool tosp_narrow_blobs_not_cert = true
 
bool tosp_row_use_cert_spaces1 = true
 
bool tosp_recovery_isolated_row_stats = true
 
bool tosp_only_small_gaps_for_kern = false
 
bool tosp_all_flips_fuzzy = false
 
bool tosp_fuzzy_limit_all = true
 
bool tosp_stats_use_xht_gaps = true
 
bool tosp_use_xht_gaps = true
 
bool tosp_only_use_xht_gaps = false
 
bool tosp_rule_9_test_punct = false
 
bool tosp_flip_fuzz_kn_to_sp = true
 
bool tosp_flip_fuzz_sp_to_kn = true
 
bool tosp_improve_thresh = false
 
int tosp_debug_level = 0
 
int tosp_enough_space_samples_for_median = 3
 
int tosp_redo_kern_limit = 10
 
int tosp_few_samples = 40
 
int tosp_short_row = 20
 
int tosp_sanity_method = 1
 
double tosp_old_sp_kn_th_factor = 2.0
 
double tosp_threshold_bias1 = 0
 
double tosp_threshold_bias2 = 0
 
double tosp_narrow_fraction = 0.3
 
double tosp_narrow_aspect_ratio = 0.48
 
double tosp_wide_fraction = 0.52
 
double tosp_wide_aspect_ratio = 0.0
 
double tosp_fuzzy_space_factor = 0.6
 
double tosp_fuzzy_space_factor1 = 0.5
 
double tosp_fuzzy_space_factor2 = 0.72
 
double tosp_gap_factor = 0.83
 
double tosp_kern_gap_factor1 = 2.0
 
double tosp_kern_gap_factor2 = 1.3
 
double tosp_kern_gap_factor3 = 2.5
 
double tosp_ignore_big_gaps = -1
 
double tosp_ignore_very_big_gaps = 3.5
 
double tosp_rep_space = 1.6
 
double tosp_enough_small_gaps = 0.65
 
double tosp_table_kn_sp_ratio = 2.25
 
double tosp_table_xht_sp_ratio = 0.33
 
double tosp_table_fuzzy_kn_sp_ratio = 3.0
 
double tosp_fuzzy_kn_fraction = 0.5
 
double tosp_fuzzy_sp_fraction = 0.5
 
double tosp_min_sane_kn_sp = 1.5
 
double tosp_init_guess_kn_mult = 2.2
 
double tosp_init_guess_xht_mult = 0.28
 
double tosp_max_sane_kn_thresh = 5.0
 
double tosp_flip_caution = 0.0
 
double tosp_large_kerning = 0.19
 
double tosp_dont_fool_with_small_kerns = -1
 
double tosp_near_lh_edge = 0
 
double tosp_silly_kn_sp_gap = 0.2
 
double tosp_pass_wide_fuzz_sp_to_context = 0.75
 
bool textord_no_rejects = false
 
bool textord_show_blobs = false
 
bool textord_show_boxes = false
 
int textord_max_noise_size = 7
 
int textord_baseline_debug = 0
 
double textord_blob_size_bigile = 95
 
double textord_noise_area_ratio = 0.7
 
double textord_blob_size_smallile = 20
 
double textord_initialx_ile = 0.75
 
double textord_initialasc_ile = 0.90
 
int textord_noise_sizefraction = 10
 
double textord_noise_sizelimit = 0.5
 
int textord_noise_translimit = 16
 
double textord_noise_normratio = 2.0
 
bool textord_noise_rejwords = true
 
bool textord_noise_rejrows = true
 
double textord_noise_syfract = 0.2
 
double textord_noise_sxfract = 0.4
 
double textord_noise_hfract = 1.0/64
 
int textord_noise_sncount = 1
 
double textord_noise_rowratio = 6.0
 
bool textord_noise_debug = FALSE
 
double textord_blshift_maxshift = 0.00
 
double textord_blshift_xfraction = 9.99
 

Detailed Description

Definition at line 68 of file textord.h.

Constructor & Destructor Documentation

◆ Textord()

tesseract::Textord::Textord ( CCStruct ccstruct)
explicit

Definition at line 35 of file textord.cpp.

36  : ccstruct_(ccstruct),
37  use_cjk_fp_model_(false),
38  // makerow.cpp ///////////////////////////////////////////
40  "Script has no xheight, so use a single mode",
41  ccstruct_->params()),
42  // tospace.cpp ///////////////////////////////////////////
43  BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
44  ccstruct_->params()),
46  "Constrain relative values of inter and intra-word gaps for "
47  "old_to_method.",
48  ccstruct_->params()),
50  "Block stats to use fixed pitch rows?", ccstruct_->params()),
52  "Force word breaks on punct to break long lines in non-space "
53  "delimited langs",
54  ccstruct_->params()),
55  BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?",
56  ccstruct_->params()),
57  BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
58  ccstruct_->params()),
59  BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces",
60  ccstruct_->params()),
61  BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
62  ccstruct_->params()),
63  BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces",
64  ccstruct_->params()),
65  BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
66  ccstruct_->params()),
68  "Use row alone when inadequate cert spaces",
69  ccstruct_->params()),
70  BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
71  ccstruct_->params()),
72  BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
73  ccstruct_->params()),
75  "Don't restrict kn->sp fuzzy limit to tables",
76  ccstruct_->params()),
78  "Use within xht gap for wd breaks", ccstruct_->params()),
79  BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
80  ccstruct_->params()),
82  "Only use within xht gap for wd breaks", ccstruct_->params()),
84  "Don't chng kn to space next to punct", ccstruct_->params()),
85  BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
86  ccstruct_->params()),
87  BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
88  ccstruct_->params()),
89  BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
90  ccstruct_->params()),
91  INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()),
93  "or should we use mean", ccstruct_->params()),
95  "No.samples reqd to reestimate for row", ccstruct_->params()),
97  "No.gaps reqd with 1 large gap to treat as a table",
98  ccstruct_->params()),
100  "No.gaps reqd with few cert spaces to use certs",
101  ccstruct_->params()),
102  INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
103  ccstruct_->params()),
105  "Factor for defining space threshold in terms of space and "
106  "kern sizes",
107  ccstruct_->params()),
108  double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?",
109  ccstruct_->params()),
110  double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?",
111  ccstruct_->params()),
112  double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
113  ccstruct_->params()),
115  "narrow if w/h less than this", ccstruct_->params()),
116  double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
117  ccstruct_->params()),
118  double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
119  ccstruct_->params()),
121  "Fract of xheight for fuzz sp", ccstruct_->params()),
123  "Fract of xheight for fuzz sp", ccstruct_->params()),
125  "Fract of xheight for fuzz sp", ccstruct_->params()),
126  double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
127  ccstruct_->params()),
128  double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
129  ccstruct_->params()),
130  double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
131  ccstruct_->params()),
132  double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
133  ccstruct_->params()),
134  double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
135  ccstruct_->params()),
136  double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
137  ccstruct_->params()),
138  double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
139  ccstruct_->params()),
141  "Fract of kerns reqd for isolated row stats",
142  ccstruct_->params()),
144  "Min difference of kn & sp in table", ccstruct_->params()),
146  "Expect spaces bigger than this", ccstruct_->params()),
148  "Fuzzy if less than this", ccstruct_->params()),
149  double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
150  ccstruct_->params()),
151  double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
152  ccstruct_->params()),
154  "Don't trust spaces less than this time kn",
155  ccstruct_->params()),
157  "Thresh guess - mult kn by this", ccstruct_->params()),
159  "Thresh guess - mult xht by this", ccstruct_->params()),
161  "Multiplier on kn to limit thresh", ccstruct_->params()),
163  "Don't autoflip kn to sp when large separation",
164  ccstruct_->params()),
166  "Limit use of xht gap with large kns", ccstruct_->params()),
168  "Limit use of xht gap with odd small kns",
169  ccstruct_->params()),
171  "Don't reduce box if the top left is non blank",
172  ccstruct_->params()),
174  "Don't let sp minus kn get too small", ccstruct_->params()),
176  "How wide fuzzies need context", ccstruct_->params()),
177  // tordmain.cpp ///////////////////////////////////////////
178  BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
179  ccstruct_->params()),
180  BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
181  ccstruct_->params()),
182  BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
183  ccstruct_->params()),
184  INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
185  ccstruct_->params()),
186  INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
187  ccstruct_->params()),
188  double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
189  ccstruct_->params()),
191  "Fraction of bounding box for noise", ccstruct_->params()),
193  "Percentile for small blobs", ccstruct_->params()),
195  "Ile of sizes for xheight guess", ccstruct_->params()),
197  "Ile of sizes for xheight guess", ccstruct_->params()),
198  INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima",
199  ccstruct_->params()),
201  "Fraction of x for big t count", ccstruct_->params()),
202  INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
203  ccstruct_->params()),
205  "Dot to norm ratio for deletion", ccstruct_->params()),
206  BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
207  ccstruct_->params()),
208  BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
209  ccstruct_->params()),
211  "xh fract height error for norm blobs",
212  ccstruct_->params()),
214  "xh fract width error for norm blobs", ccstruct_->params()),
216  "Height fraction to discard outlines as speckle noise",
217  ccstruct_->params()),
218  INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
219  ccstruct_->params()),
221  "Dot to norm ratio for deletion", ccstruct_->params()),
222  BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
223  ccstruct_->params()),
224  double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
225  ccstruct_->params()),
227  "Min size of baseline shift", ccstruct_->params()) {}
double tosp_rep_space
Definition: textord.h:341
double tosp_fuzzy_space_factor
Definition: textord.h:327
double tosp_silly_kn_sp_gap
Definition: textord.h:369
bool tosp_improve_thresh
Definition: textord.h:301
double textord_blshift_maxshift
Definition: textord.h:398
double tosp_ignore_big_gaps
Definition: textord.h:339
bool tosp_old_to_method
Definition: textord.h:263
double tosp_threshold_bias2
Definition: textord.h:318
int textord_noise_sncount
Definition: textord.h:395
ParamsVectors * params()
Definition: ccutil.h:62
double textord_blob_size_bigile
Definition: textord.h:378
bool tosp_row_use_cert_spaces1
Definition: textord.h:283
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:266
double tosp_fuzzy_space_factor1
Definition: textord.h:329
bool textord_no_rejects
Definition: textord.h:373
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:309
double tosp_min_sane_kn_sp
Definition: textord.h:353
bool textord_single_height_mode
Definition: textord.h:261
bool tosp_row_use_cert_spaces
Definition: textord.h:279
double textord_noise_area_ratio
Definition: textord.h:380
bool tosp_recovery_isolated_row_stats
Definition: textord.h:285
bool tosp_flip_fuzz_kn_to_sp
Definition: textord.h:298
double tosp_table_kn_sp_ratio
Definition: textord.h:345
double tosp_pass_wide_fuzz_sp_to_context
Definition: textord.h:371
int textord_max_noise_size
Definition: textord.h:376
double tosp_table_fuzzy_kn_sp_ratio
Definition: textord.h:349
double textord_noise_sxfract
Definition: textord.h:392
bool tosp_fuzzy_limit_all
Definition: textord.h:289
bool textord_noise_rejrows
Definition: textord.h:389
bool tosp_block_use_cert_spaces
Definition: textord.h:277
bool tosp_old_to_bug_fix
Definition: textord.h:275
bool textord_show_blobs
Definition: textord.h:374
double tosp_wide_aspect_ratio
Definition: textord.h:325
int textord_baseline_debug
Definition: textord.h:377
bool tosp_use_pre_chopping
Definition: textord.h:273
double tosp_old_sp_kn_th_factor
Definition: textord.h:314
bool tosp_rule_9_test_punct
Definition: textord.h:297
double tosp_enough_small_gaps
Definition: textord.h:343
double textord_noise_sizelimit
Definition: textord.h:385
double textord_blob_size_smallile
Definition: textord.h:381
bool tosp_all_flips_fuzzy
Definition: textord.h:287
bool tosp_use_xht_gaps
Definition: textord.h:293
int textord_noise_sizefraction
Definition: textord.h:384
double tosp_table_xht_sp_ratio
Definition: textord.h:347
bool textord_noise_debug
Definition: textord.h:397
double tosp_kern_gap_factor2
Definition: textord.h:336
double textord_noise_syfract
Definition: textord.h:390
bool tosp_only_use_prop_rows
Definition: textord.h:268
double textord_noise_rowratio
Definition: textord.h:396
bool tosp_stats_use_xht_gaps
Definition: textord.h:291
double tosp_init_guess_xht_mult
Definition: textord.h:357
int tosp_redo_kern_limit
Definition: textord.h:306
int tosp_enough_space_samples_for_median
Definition: textord.h:304
bool tosp_only_use_xht_gaps
Definition: textord.h:295
int tosp_sanity_method
Definition: textord.h:311
double textord_noise_hfract
Definition: textord.h:394
double tosp_threshold_bias1
Definition: textord.h:316
double textord_initialx_ile
Definition: textord.h:382
double tosp_flip_caution
Definition: textord.h:361
double tosp_gap_factor
Definition: textord.h:332
double tosp_dont_fool_with_small_kerns
Definition: textord.h:365
double tosp_fuzzy_space_factor2
Definition: textord.h:331
double tosp_wide_fraction
Definition: textord.h:323
bool tosp_only_small_gaps_for_kern
Definition: textord.h:286
bool textord_show_boxes
Definition: textord.h:375
double tosp_kern_gap_factor3
Definition: textord.h:338
bool tosp_force_wordbreak_on_punct
Definition: textord.h:271
double tosp_ignore_very_big_gaps
Definition: textord.h:340
double tosp_init_guess_kn_mult
Definition: textord.h:355
bool tosp_flip_fuzz_sp_to_kn
Definition: textord.h:299
double tosp_near_lh_edge
Definition: textord.h:367
double tosp_narrow_fraction
Definition: textord.h:320
double tosp_kern_gap_factor1
Definition: textord.h:334
double tosp_max_sane_kn_thresh
Definition: textord.h:359
bool textord_noise_rejwords
Definition: textord.h:388
int textord_noise_translimit
Definition: textord.h:386
double tosp_narrow_aspect_ratio
Definition: textord.h:322
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:303
double tosp_fuzzy_sp_fraction
Definition: textord.h:351
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:300
bool tosp_narrow_blobs_not_cert
Definition: textord.h:281
double textord_noise_normratio
Definition: textord.h:387
double tosp_large_kerning
Definition: textord.h:363
double textord_blshift_xfraction
Definition: textord.h:399
double textord_initialasc_ile
Definition: textord.h:383
double tosp_fuzzy_kn_fraction
Definition: textord.h:350

◆ ~Textord()

tesseract::Textord::~Textord ( )

Definition at line 229 of file textord.cpp.

229  {
230 }

Member Function Documentation

◆ CleanupSingleRowResult()

void tesseract::Textord::CleanupSingleRowResult ( PageSegMode  pageseg_mode,
PAGE_RES page_res 
)

Definition at line 325 of file textord.cpp.

326  {
327  if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
328  return; // No cleanup required.
329  PAGE_RES_IT it(page_res);
330  // Find the best row, being the greatest mean word conf.
331  float row_total_conf = 0.0f;
332  int row_word_count = 0;
333  ROW_RES* best_row = NULL;
334  float best_conf = 0.0f;
335  for (it.restart_page(); it.word() != NULL; it.forward()) {
336  WERD_RES* word = it.word();
337  row_total_conf += word->best_choice->certainty();
338  ++row_word_count;
339  if (it.next_row() != it.row()) {
340  row_total_conf /= row_word_count;
341  if (best_row == NULL || best_conf < row_total_conf) {
342  best_row = it.row();
343  best_conf = row_total_conf;
344  }
345  row_total_conf = 0.0f;
346  row_word_count = 0;
347  }
348  }
349  // Now eliminate any word not in the best row.
350  for (it.restart_page(); it.word() != NULL; it.forward()) {
351  if (it.row() != best_row)
352  it.DeleteCurrentWord();
353  }
354 }
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:188
WERD_CHOICE * best_choice
Definition: pageres.h:219
ROW * row
Definition: pageres.h:127
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:194
float certainty() const
Definition: ratngs.h:328
WERD * word
Definition: pageres.h:175

◆ compute_block_xheight()

void tesseract::Textord::compute_block_xheight ( TO_BLOCK block,
float  gradient 
)

Definition at line 1271 of file makerow.cpp.

1271  {
1272  TO_ROW *row; // current row
1273  float asc_frac_xheight = CCStruct::kAscenderFraction /
1275  float desc_frac_xheight = CCStruct::kDescenderFraction /
1277  inT32 min_height, max_height; // limits on xheight
1278  TO_ROW_IT row_it = block->get_rows();
1279  if (row_it.empty()) return; // no rows
1280 
1281  // Compute the best guess of xheight of each row individually.
1282  // Use xheight and ascrise values of the rows where ascenders were found.
1283  get_min_max_xheight(block->line_size, &min_height, &max_height);
1284  STATS row_asc_xheights(min_height, max_height + 1);
1285  STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
1286  static_cast<int>(max_height * asc_frac_xheight) + 1);
1287  int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
1288  int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
1289  STATS row_asc_descdrop(min_desc_height, max_desc_height + 1);
1290  STATS row_desc_xheights(min_height, max_height + 1);
1291  STATS row_desc_descdrop(min_desc_height, max_desc_height + 1);
1292  STATS row_cap_xheights(min_height, max_height + 1);
1293  STATS row_cap_floating_xheights(min_height, max_height + 1);
1294  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1295  row = row_it.data();
1296  // Compute the xheight of this row if it has not been computed before.
1297  if (row->xheight <= 0.0) {
1299  gradient, block->line_size);
1300  }
1301  ROW_CATEGORY row_category = get_row_category(row);
1302  if (row_category == ROW_ASCENDERS_FOUND) {
1303  row_asc_xheights.add(static_cast<inT32>(row->xheight),
1304  row->xheight_evidence);
1305  row_asc_ascrise.add(static_cast<inT32>(row->ascrise),
1306  row->xheight_evidence);
1307  row_asc_descdrop.add(static_cast<inT32>(-row->descdrop),
1308  row->xheight_evidence);
1309  } else if (row_category == ROW_DESCENDERS_FOUND) {
1310  row_desc_xheights.add(static_cast<inT32>(row->xheight),
1311  row->xheight_evidence);
1312  row_desc_descdrop.add(static_cast<inT32>(-row->descdrop),
1313  row->xheight_evidence);
1314  } else if (row_category == ROW_UNKNOWN) {
1315  fill_heights(row, gradient, min_height, max_height,
1316  &row_cap_xheights, &row_cap_floating_xheights);
1317  }
1318  }
1319 
1320  float xheight = 0.0;
1321  float ascrise = 0.0;
1322  float descdrop = 0.0;
1323  // Compute our best guess of xheight of this block.
1324  if (row_asc_xheights.get_total() > 0) {
1325  // Determine xheight from rows where ascenders were found.
1326  xheight = row_asc_xheights.median();
1327  ascrise = row_asc_ascrise.median();
1328  descdrop = -row_asc_descdrop.median();
1329  } else if (row_desc_xheights.get_total() > 0) {
1330  // Determine xheight from rows where descenders were found.
1331  xheight = row_desc_xheights.median();
1332  descdrop = -row_desc_descdrop.median();
1333  } else if (row_cap_xheights.get_total() > 0) {
1334  // All the rows in the block were (a/de)scenderless.
1335  // Try to search for two modes in row_cap_heights that could
1336  // be the xheight and the capheight (e.g. some of the rows
1337  // were lowercase, but did not have enough (a/de)scenders.
1338  // If such two modes can not be found, this block is most
1339  // likely all caps (or all small caps, in which case the code
1340  // still works as intended).
1341  compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights,
1343  block->block->classify_rotation().y() == 0.0,
1344  min_height, max_height, &(xheight), &(ascrise));
1345  if (ascrise == 0) { // assume only caps in the whole block
1346  xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio;
1347  }
1348  } else { // default block sizes
1349  xheight = block->line_size * CCStruct::kXHeightFraction;
1350  }
1351  // Correct xheight, ascrise and descdrop if necessary.
1352  bool corrected_xheight = false;
1353  if (xheight < textord_min_xheight) {
1354  xheight = static_cast<float>(textord_min_xheight);
1355  corrected_xheight = true;
1356  }
1357  if (corrected_xheight || ascrise <= 0.0) {
1358  ascrise = xheight * asc_frac_xheight;
1359  }
1360  if (corrected_xheight || descdrop >= 0.0) {
1361  descdrop = -(xheight * desc_frac_xheight);
1362  }
1363  block->xheight = xheight;
1364 
1365  if (textord_debug_xheights) {
1366  tprintf("Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n",
1367  xheight, ascrise, descdrop);
1368  }
1369  // Correct xheight, ascrise, descdrop of rows based on block averages.
1370  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1371  correct_row_xheight(row_it.data(), xheight, ascrise, descdrop);
1372  }
1373 }
bool textord_debug_xheights
Definition: makerow.cpp:57
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:123
int xheight_evidence
Definition: blobbox.h:654
int32_t inT32
Definition: host.h:38
static const double kAscenderFraction
Definition: ccstruct.h:35
static const double kDescenderFraction
Definition: ccstruct.h:33
static const double kXHeightCapRatio
Definition: ccstruct.h:37
float xheight
Definition: blobbox.h:653
float descdrop
Definition: blobbox.h:656
bool textord_single_height_mode
Definition: textord.h:261
static const double kXHeightFraction
Definition: ccstruct.h:34
#define tprintf(...)
Definition: tprintf.h:31
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:116
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1423
float line_size
Definition: blobbox.h:781
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
Definition: makerow.cpp:1702
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1484
ROW_CATEGORY
Definition: makerow.h:36
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1383
int textord_min_xheight
Definition: makerow.cpp:69
float xheight
Definition: blobbox.h:784
float y() const
Definition: points.h:212
Definition: statistc.h:33
BLOCK * block
Definition: blobbox.h:773
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
FCOORD classify_rotation() const
Definition: ocrblock.h:144
float ascrise
Definition: blobbox.h:655

◆ compute_row_xheight()

void tesseract::Textord::compute_row_xheight ( TO_ROW row,
const FCOORD rotation,
float  gradient,
int  block_line_size 
)

Definition at line 1383 of file makerow.cpp.

1386  {
1387  // Find blobs representing repeated characters in rows and mark them.
1388  // This information is used for computing row xheight and at a later
1389  // stage when words are formed by make_words.
1390  if (!row->rep_chars_marked()) {
1391  mark_repeated_chars(row);
1392  }
1393 
1394  int min_height, max_height;
1395  get_min_max_xheight(block_line_size, &min_height, &max_height);
1396  STATS heights(min_height, max_height + 1);
1397  STATS floating_heights(min_height, max_height + 1);
1398  fill_heights(row, gradient, min_height, max_height,
1399  &heights, &floating_heights);
1400  row->ascrise = 0.0f;
1401  row->xheight = 0.0f;
1402  row->xheight_evidence =
1403  compute_xheight_from_modes(&heights, &floating_heights,
1405  rotation.y() == 0.0,
1406  min_height, max_height,
1407  &(row->xheight), &(row->ascrise));
1408  row->descdrop = 0.0f;
1409  if (row->xheight > 0.0) {
1410  row->descdrop = static_cast<float>(
1411  compute_row_descdrop(row, gradient, row->xheight_evidence, &heights));
1412  }
1413 }
int xheight_evidence
Definition: blobbox.h:654
float xheight
Definition: blobbox.h:653
float descdrop
Definition: blobbox.h:656
bool textord_single_height_mode
Definition: textord.h:261
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:116
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1423
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1484
inT32 compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
Definition: makerow.cpp:1580
bool rep_chars_marked() const
Definition: blobbox.h:627
float y() const
Definition: points.h:212
Definition: statistc.h:33
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2657
float ascrise
Definition: blobbox.h:655

◆ filter_blobs()

void tesseract::Textord::filter_blobs ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks,
BOOL8  testing_on 
)

Definition at line 236 of file tordmain.cpp.

238  { // for plotting
239  TO_BLOCK_IT block_it = blocks; // destination iterator
240  TO_BLOCK *block; // created block
241 
242  #ifndef GRAPHICS_DISABLED
243  if (to_win != NULL)
244  to_win->Clear();
245  #endif // GRAPHICS_DISABLED
246 
247  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
248  block_it.forward()) {
249  block = block_it.data();
250  block->line_size = filter_noise_blobs(&block->blobs,
251  &block->noise_blobs,
252  &block->small_blobs,
253  &block->large_blobs);
254  if (block->line_size == 0) block->line_size = 1;
255  block->line_spacing = block->line_size *
262 
263  #ifndef GRAPHICS_DISABLED
264  if (textord_show_blobs && testing_on) {
265  if (to_win == NULL)
266  create_to_win(page_tr);
267  block->plot_graded_blobs(to_win);
268  }
269  if (textord_show_boxes && testing_on) {
270  if (to_win == NULL)
271  create_to_win(page_tr);
276  }
277  #endif // GRAPHICS_DISABLED
278  }
279 }
static const double kAscenderFraction
Definition: ccstruct.h:35
static const double kDescenderFraction
Definition: ccstruct.h:33
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1067
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
double textord_excess_blobsize
Definition: makerow.cpp:85
static const double kXHeightFraction
Definition: ccstruct.h:34
bool textord_show_blobs
Definition: textord.h:374
void Clear()
Definition: scrollview.cpp:595
float line_size
Definition: blobbox.h:781
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
double textord_min_linesize
Definition: makerow.cpp:83
bool textord_show_boxes
Definition: textord.h:375
float max_blob_size
Definition: blobbox.h:782
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
float line_spacing
Definition: blobbox.h:775
void plot_box_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)
Definition: drawtord.cpp:70

◆ find_components()

void tesseract::Textord::find_components ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 205 of file tordmain.cpp.

206  {
207  int width = pixGetWidth(pix);
208  int height = pixGetHeight(pix);
209  if (width > MAX_INT16 || height > MAX_INT16) {
210  tprintf("Input image too large! (%d, %d)\n", width, height);
211  return; // Can't handle it.
212  }
213 
215 
216  BLOCK_IT block_it(blocks); // iterator
217  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
218  block_it.forward()) {
219  BLOCK* block = block_it.data();
220  if (block->poly_block() == NULL || block->poly_block()->IsText()) {
221  extract_edges(pix, block);
222  }
223  }
224 
225  assign_blobs_to_blocks2(pix, blocks, to_blocks);
226  ICOORD page_tr(width, height);
227  filter_blobs(page_tr, to_blocks, !textord_test_landscape);
228 }
void set_global_loc_code(int loc_code)
Definition: globaloc.cpp:79
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
Definition: tordmain.cpp:236
#define MAX_INT16
Definition: host.h:61
#define tprintf(...)
Definition: tprintf.h:31
bool IsText() const
Definition: polyblk.h:52
void assign_blobs_to_blocks2(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: tordmain.cpp:154
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
bool textord_test_landscape
Definition: makerow.cpp:50
#define LOC_EDGE_PROG
Definition: errcode.h:44
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
Definition: ocrblock.h:30
integer coordinate
Definition: points.h:30

◆ make_blob_words()

ROW * tesseract::Textord::make_blob_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 1183 of file tospace.cpp.

1186  {
1187  bool bol; // start of line
1188  ROW *real_row; // output row
1189  C_OUTLINE_IT cout_it;
1190  C_BLOB_LIST cblobs;
1191  C_BLOB_IT cblob_it = &cblobs;
1192  WERD_LIST words;
1193  WERD_IT word_it; // new words
1194  WERD *word; // new word
1195  BLOBNBOX *bblob; // current blob
1196  TBOX blob_box; // bounding box
1197  BLOBNBOX_IT box_it; // iterator
1198  inT16 word_count = 0;
1199 
1200  cblob_it.set_to_list(&cblobs);
1201  box_it.set_to_list(row->blob_list());
1202  word_it.set_to_list(&words);
1203  bol = TRUE;
1204  if (!box_it.empty()) {
1205 
1206  do {
1207  bblob = box_it.data();
1208  blob_box = bblob->bounding_box();
1209  if (bblob->joined_to_prev()) {
1210  if (bblob->cblob() != NULL) {
1211  cout_it.set_to_list(cblob_it.data()->out_list());
1212  cout_it.move_to_last();
1213  cout_it.add_list_after(bblob->cblob()->out_list());
1214  delete bblob->cblob();
1215  }
1216  } else {
1217  if (bblob->cblob() != NULL)
1218  cblob_it.add_after_then_move(bblob->cblob());
1219  }
1220  box_it.forward(); // next one
1221  bblob = box_it.data();
1222  blob_box = bblob->bounding_box();
1223 
1224  if (!bblob->joined_to_prev() && !cblobs.empty()) {
1225  word = new WERD(&cblobs, 1, NULL);
1226  word_count++;
1227  word_it.add_after_then_move(word);
1228  if (bol) {
1229  word->set_flag(W_BOL, TRUE);
1230  bol = FALSE;
1231  }
1232  if (box_it.at_first()) { // at end of line
1233  word->set_flag(W_EOL, TRUE);
1234  }
1235  }
1236  }
1237  while (!box_it.at_first()); // until back at start
1238  /* Setup the row with created words. */
1239  real_row = new ROW(row, (inT16) row->kern_size, (inT16) row->space_size);
1240  word_it.set_to_list(real_row->word_list());
1241  //put words in row
1242  word_it.add_list_after(&words);
1243  real_row->recalc_bounding_box();
1244  if (tosp_debug_level > 4) {
1245  tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1246  word_count,
1247  real_row->bounding_box().left(),
1248  real_row->bounding_box().bottom(),
1249  real_row->bounding_box().right(),
1250  real_row->bounding_box().top());
1251  }
1252  return real_row;
1253  }
1254  return NULL;
1255 }
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
#define TRUE
Definition: capi.h:45
float kern_size
Definition: blobbox.h:662
float space_size
Definition: blobbox.h:663
Definition: werd.h:36
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
#define tprintf(...)
Definition: tprintf.h:31
bool joined_to_prev() const
Definition: blobbox.h:241
WERD_LIST * word_list()
Definition: ocrrow.h:52
C_BLOB * cblob() const
Definition: blobbox.h:253
int16_t inT16
Definition: host.h:36
inT16 left() const
Definition: rect.h:68
#define FALSE
Definition: capi.h:46
Definition: werd.h:35
TBOX bounding_box() const
Definition: ocrrow.h:85
inT16 top() const
Definition: rect.h:54
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
inT16 bottom() const
Definition: rect.h:61
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
Definition: werd.h:60
void recalc_bounding_box()
Definition: ocrrow.cpp:101
Definition: ocrrow.h:32
const TBOX & bounding_box() const
Definition: blobbox.h:215

◆ make_prop_words()

ROW * tesseract::Textord::make_prop_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 890 of file tospace.cpp.

893  {
894  BOOL8 bol; // start of line
895  /* prev_ values are for start of word being built. non prev_ values are for
896  the gap between the word being built and the next one. */
897  BOOL8 prev_fuzzy_sp; // probably space
898  BOOL8 prev_fuzzy_non; // probably not
899  uinT8 prev_blanks; // in front of word
900  BOOL8 fuzzy_sp = false; // probably space
901  BOOL8 fuzzy_non = false; // probably not
902  uinT8 blanks = 0; // in front of word
903  BOOL8 prev_gap_was_a_space = FALSE;
904  BOOL8 break_at_next_gap = FALSE;
905  ROW *real_row; // output row
906  C_OUTLINE_IT cout_it;
907  C_BLOB_LIST cblobs;
908  C_BLOB_IT cblob_it = &cblobs;
909  WERD_LIST words;
910  WERD_IT word_it; // new words
911  WERD *word; // new word
912  WERD_IT rep_char_it; // repeated char words
913  inT32 next_rep_char_word_right = MAX_INT32;
914  float repetition_spacing; // gap between repetitions
915  inT32 xstarts[2]; // row ends
916  inT32 prev_x; // end of prev blob
917  BLOBNBOX *bblob; // current blob
918  TBOX blob_box; // bounding box
919  BLOBNBOX_IT box_it; // iterator
920  TBOX prev_blob_box;
921  TBOX next_blob_box;
922  inT16 prev_gap = MAX_INT16;
923  inT16 current_gap = MAX_INT16;
924  inT16 next_gap = MAX_INT16;
925  inT16 prev_within_xht_gap = MAX_INT16;
926  inT16 current_within_xht_gap = MAX_INT16;
927  inT16 next_within_xht_gap = MAX_INT16;
928  inT16 word_count = 0;
929 
930  rep_char_it.set_to_list (&(row->rep_words));
931  if (!rep_char_it.empty ()) {
932  next_rep_char_word_right =
933  rep_char_it.data ()->bounding_box ().right ();
934  }
935 
936  prev_x = -MAX_INT16;
937  cblob_it.set_to_list (&cblobs);
938  box_it.set_to_list (row->blob_list ());
939  word_it.set_to_list (&words);
940  bol = TRUE;
941  prev_blanks = 0;
942  prev_fuzzy_sp = FALSE;
943  prev_fuzzy_non = FALSE;
944  if (!box_it.empty ()) {
945  xstarts[0] = box_it.data ()->bounding_box ().left ();
946  if (xstarts[0] > next_rep_char_word_right) {
947  /* We need to insert a repeated char word at the start of the row */
948  word = rep_char_it.extract ();
949  word_it.add_after_then_move (word);
950  /* Set spaces before repeated char word */
951  word->set_flag (W_BOL, TRUE);
952  bol = FALSE;
953  word->set_blanks (0);
954  //NO uncertainty
955  word->set_flag (W_FUZZY_SP, FALSE);
956  word->set_flag (W_FUZZY_NON, FALSE);
957  xstarts[0] = word->bounding_box ().left ();
958  /* Set spaces after repeated char word (and leave current word set) */
959  repetition_spacing = find_mean_blob_spacing (word);
960  current_gap = box_it.data ()->bounding_box ().left () -
961  next_rep_char_word_right;
962  current_within_xht_gap = current_gap;
963  if (current_gap > tosp_rep_space * repetition_spacing) {
964  prev_blanks = (uinT8) floor (current_gap / row->space_size);
965  if (prev_blanks < 1)
966  prev_blanks = 1;
967  }
968  else
969  prev_blanks = 0;
970  if (tosp_debug_level > 5)
971  tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
972  box_it.data ()->bounding_box ().left (),
973  box_it.data ()->bounding_box ().bottom (),
974  repetition_spacing, current_gap);
975  prev_fuzzy_sp = FALSE;
976  prev_fuzzy_non = FALSE;
977  if (rep_char_it.empty ()) {
978  next_rep_char_word_right = MAX_INT32;
979  }
980  else {
981  rep_char_it.forward ();
982  next_rep_char_word_right =
983  rep_char_it.data ()->bounding_box ().right ();
984  }
985  }
986 
987  peek_at_next_gap(row,
988  box_it,
989  next_blob_box,
990  next_gap,
991  next_within_xht_gap);
992  do {
993  bblob = box_it.data ();
994  blob_box = bblob->bounding_box ();
995  if (bblob->joined_to_prev ()) {
996  if (bblob->cblob () != NULL) {
997  cout_it.set_to_list (cblob_it.data ()->out_list ());
998  cout_it.move_to_last ();
999  cout_it.add_list_after (bblob->cblob ()->out_list ());
1000  delete bblob->cblob ();
1001  }
1002  } else {
1003  if (bblob->cblob() != NULL)
1004  cblob_it.add_after_then_move (bblob->cblob ());
1005  prev_x = blob_box.right ();
1006  }
1007  box_it.forward (); //next one
1008  bblob = box_it.data ();
1009  blob_box = bblob->bounding_box ();
1010 
1011  if (!bblob->joined_to_prev() && bblob->cblob() != NULL) {
1012  /* Real Blob - not multiple outlines or pre-chopped */
1013  prev_gap = current_gap;
1014  prev_within_xht_gap = current_within_xht_gap;
1015  prev_blob_box = next_blob_box;
1016  current_gap = next_gap;
1017  current_within_xht_gap = next_within_xht_gap;
1018  peek_at_next_gap(row,
1019  box_it,
1020  next_blob_box,
1021  next_gap,
1022  next_within_xht_gap);
1023 
1024  inT16 prev_gap_arg = prev_gap;
1025  inT16 next_gap_arg = next_gap;
1026  if (tosp_only_use_xht_gaps) {
1027  prev_gap_arg = prev_within_xht_gap;
1028  next_gap_arg = next_within_xht_gap;
1029  }
1030  // Decide if a word-break should be inserted
1031  if (blob_box.left () > next_rep_char_word_right ||
1032  make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1033  current_gap, current_within_xht_gap,
1034  next_blob_box, next_gap_arg,
1035  blanks, fuzzy_sp, fuzzy_non,
1036  prev_gap_was_a_space,
1037  break_at_next_gap) ||
1038  box_it.at_first()) {
1039  /* Form a new word out of the blobs collected */
1040  word = new WERD (&cblobs, prev_blanks, NULL);
1041  word_count++;
1042  word_it.add_after_then_move (word);
1043  if (bol) {
1044  word->set_flag (W_BOL, TRUE);
1045  bol = FALSE;
1046  }
1047  if (prev_fuzzy_sp)
1048  //probably space
1049  word->set_flag (W_FUZZY_SP, TRUE);
1050  else if (prev_fuzzy_non)
1051  word->set_flag (W_FUZZY_NON, TRUE);
1052  //probably not
1053 
1054  if (blob_box.left () > next_rep_char_word_right) {
1055  /* We need to insert a repeated char word */
1056  word = rep_char_it.extract ();
1057  word_it.add_after_then_move (word);
1058 
1059  /* Set spaces before repeated char word */
1060  repetition_spacing = find_mean_blob_spacing (word);
1061  current_gap = word->bounding_box ().left () - prev_x;
1062  current_within_xht_gap = current_gap;
1063  if (current_gap > tosp_rep_space * repetition_spacing) {
1064  blanks =
1065  (uinT8) floor (current_gap / row->space_size);
1066  if (blanks < 1)
1067  blanks = 1;
1068  }
1069  else
1070  blanks = 0;
1071  if (tosp_debug_level > 5)
1072  tprintf
1073  ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1074  word->bounding_box ().left (),
1075  word->bounding_box ().bottom (),
1076  repetition_spacing, current_gap, blanks);
1077  word->set_blanks (blanks);
1078  //NO uncertainty
1079  word->set_flag (W_FUZZY_SP, FALSE);
1080  word->set_flag (W_FUZZY_NON, FALSE);
1081 
1082  /* Set spaces after repeated char word (and leave current word set) */
1083  current_gap =
1084  blob_box.left () - next_rep_char_word_right;
1085  if (current_gap > tosp_rep_space * repetition_spacing) {
1086  blanks = (uinT8) (current_gap / row->space_size);
1087  if (blanks < 1)
1088  blanks = 1;
1089  }
1090  else
1091  blanks = 0;
1092  if (tosp_debug_level > 5)
1093  tprintf (" Rgap:%d (%d blanks)\n",
1094  current_gap, blanks);
1095  fuzzy_sp = FALSE;
1096  fuzzy_non = FALSE;
1097 
1098  if (rep_char_it.empty ()) {
1099  next_rep_char_word_right = MAX_INT32;
1100  }
1101  else {
1102  rep_char_it.forward ();
1103  next_rep_char_word_right =
1104  rep_char_it.data ()->bounding_box ().right ();
1105  }
1106  }
1107 
1108  if (box_it.at_first () && rep_char_it.empty ()) {
1109  //at end of line
1110  word->set_flag (W_EOL, TRUE);
1111  xstarts[1] = prev_x;
1112  }
1113  else {
1114  prev_blanks = blanks;
1115  prev_fuzzy_sp = fuzzy_sp;
1116  prev_fuzzy_non = fuzzy_non;
1117  }
1118  }
1119  }
1120  }
1121  while (!box_it.at_first ()); //until back at start
1122 
1123  /* Insert any further repeated char words */
1124  while (!rep_char_it.empty ()) {
1125  word = rep_char_it.extract ();
1126  word_it.add_after_then_move (word);
1127 
1128  /* Set spaces before repeated char word */
1129  repetition_spacing = find_mean_blob_spacing (word);
1130  current_gap = word->bounding_box ().left () - prev_x;
1131  if (current_gap > tosp_rep_space * repetition_spacing) {
1132  blanks = (uinT8) floor (current_gap / row->space_size);
1133  if (blanks < 1)
1134  blanks = 1;
1135  }
1136  else
1137  blanks = 0;
1138  if (tosp_debug_level > 5)
1139  tprintf(
1140  "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
1141  word->bounding_box().left(), word->bounding_box().bottom(),
1142  repetition_spacing, current_gap, blanks);
1143  word->set_blanks (blanks);
1144  //NO uncertainty
1145  word->set_flag (W_FUZZY_SP, FALSE);
1146  word->set_flag (W_FUZZY_NON, FALSE);
1147  prev_x = word->bounding_box ().right ();
1148  if (rep_char_it.empty ()) {
1149  //at end of line
1150  word->set_flag (W_EOL, TRUE);
1151  xstarts[1] = prev_x;
1152  }
1153  else {
1154  rep_char_it.forward ();
1155  }
1156  }
1157  real_row = new ROW (row,
1158  (inT16) row->kern_size, (inT16) row->space_size);
1159  word_it.set_to_list (real_row->word_list ());
1160  //put words in row
1161  word_it.add_list_after (&words);
1162  real_row->recalc_bounding_box ();
1163 
1164  if (tosp_debug_level > 4) {
1165  tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1166  word_count,
1167  real_row->bounding_box ().left (),
1168  real_row->bounding_box ().bottom (),
1169  real_row->bounding_box ().right (),
1170  real_row->bounding_box ().top ());
1171  }
1172  return real_row;
1173  }
1174  return NULL;
1175 }
double tosp_rep_space
Definition: textord.h:341
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
#define TRUE
Definition: capi.h:45
int32_t inT32
Definition: host.h:38
float kern_size
Definition: blobbox.h:662
#define MAX_INT32
Definition: host.h:62
float space_size
Definition: blobbox.h:663
#define MAX_INT16
Definition: host.h:61
Definition: werd.h:36
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
#define tprintf(...)
Definition: tprintf.h:31
bool joined_to_prev() const
Definition: blobbox.h:241
WERD_LIST * word_list()
Definition: ocrrow.h:52
C_BLOB * cblob() const
Definition: blobbox.h:253
TBOX bounding_box() const
Definition: werd.cpp:160
int16_t inT16
Definition: host.h:36
inT16 left() const
Definition: rect.h:68
WERD_LIST rep_words
Definition: blobbox.h:664
unsigned char BOOL8
Definition: host.h:44
#define FALSE
Definition: capi.h:46
Definition: werd.h:35
TBOX bounding_box() const
Definition: ocrrow.h:85
inT16 top() const
Definition: rect.h:54
bool tosp_only_use_xht_gaps
Definition: textord.h:295
Definition: rect.h:30
uint8_t uinT8
Definition: host.h:35
inT16 right() const
Definition: rect.h:75
inT16 bottom() const
Definition: rect.h:61
void set_blanks(uinT8 new_blanks)
Definition: werd.h:107
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
Definition: werd.h:60
void recalc_bounding_box()
Definition: ocrrow.cpp:101
Definition: ocrrow.h:32
const TBOX & bounding_box() const
Definition: blobbox.h:215

◆ make_spline_rows()

void tesseract::Textord::make_spline_rows ( TO_BLOCK block,
float  gradient,
BOOL8  testing_on 
)

Definition at line 2020 of file makerow.cpp.

2022  {
2023 #ifndef GRAPHICS_DISABLED
2024  ScrollView::Color colour; //of row
2025 #endif
2026  TO_ROW_IT row_it = block->get_rows ();
2027 
2028  row_it.move_to_first ();
2029  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2030  if (row_it.data ()->blob_list ()->empty ())
2031  delete row_it.extract (); //nothing in it
2032  else
2033  make_baseline_spline (row_it.data (), block);
2034  }
2035  if (textord_old_baselines) {
2036 #ifndef GRAPHICS_DISABLED
2037  if (testing_on) {
2038  colour = ScrollView::RED;
2039  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
2040  row_it.forward ()) {
2041  row_it.data ()->baseline.plot (to_win, colour);
2042  colour = (ScrollView::Color) (colour + 1);
2043  if (colour > ScrollView::MAGENTA)
2044  colour = ScrollView::RED;
2045  }
2046  }
2047 #endif
2048  make_old_baselines(block, testing_on, gradient);
2049  }
2050 #ifndef GRAPHICS_DISABLED
2051  if (testing_on) {
2052  colour = ScrollView::RED;
2053  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2054  row_it.data ()->baseline.plot (to_win, colour);
2055  colour = (ScrollView::Color) (colour + 1);
2056  if (colour > ScrollView::MAGENTA)
2057  colour = ScrollView::RED;
2058  }
2059  }
2060 #endif
2061 }
bool textord_old_baselines
Definition: makerow.cpp:53
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
Definition: makerow.cpp:2073
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700

◆ set_use_cjk_fp_model()

void tesseract::Textord::set_use_cjk_fp_model ( bool  flag)
inline

Definition at line 95 of file textord.h.

95  {
96  use_cjk_fp_model_ = flag;
97  }

◆ TextordPage()

void tesseract::Textord::TextordPage ( PageSegMode  pageseg_mode,
const FCOORD reskew,
int  width,
int  height,
Pix *  binary_pix,
Pix *  thresholds_pix,
Pix *  grey_pix,
bool  use_box_bottoms,
BLOBNBOX_LIST *  diacritic_blobs,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 233 of file textord.cpp.

237  {
238  page_tr_.set_x(width);
239  page_tr_.set_y(height);
240  if (to_blocks->empty()) {
241  // AutoPageSeg was not used, so we need to find_components first.
242  find_components(binary_pix, blocks, to_blocks);
243  TO_BLOCK_IT it(to_blocks);
244  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
245  TO_BLOCK* to_block = it.data();
246  // Compute the edge offsets whether or not there is a grey_pix.
247  // We have by-passed auto page seg, so we have to run it here.
248  // By page segmentation mode there is no non-text to avoid running on.
249  to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
250  }
251  } else if (!PSM_SPARSE(pageseg_mode)) {
252  // AutoPageSeg does not need to find_components as it did that already.
253  // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
254  filter_blobs(page_tr_, to_blocks, true);
255  }
256 
257  ASSERT_HOST(!to_blocks->empty());
258  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
259  const FCOORD anticlockwise90(0.0f, 1.0f);
260  const FCOORD clockwise90(0.0f, -1.0f);
261  TO_BLOCK_IT it(to_blocks);
262  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
263  TO_BLOCK* to_block = it.data();
264  BLOCK* block = to_block->block;
265  // Create a fake poly_block in block from its bounding box.
266  block->set_poly_block(new POLY_BLOCK(block->bounding_box(),
268  // Rotate the to_block along with its contained block and blobnbox lists.
269  to_block->rotate(anticlockwise90);
270  // Set the block's rotation values to obey the convention followed in
271  // layout analysis for vertical text.
272  block->set_re_rotation(clockwise90);
273  block->set_classify_rotation(clockwise90);
274  }
275  }
276 
277  TO_BLOCK_IT to_block_it(to_blocks);
278  TO_BLOCK* to_block = to_block_it.data();
279  // Make the rows in the block.
280  float gradient;
281  // Do it the old fashioned way.
282  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
283  gradient = make_rows(page_tr_, to_blocks);
284  } else if (!PSM_SPARSE(pageseg_mode)) {
285  // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
286  gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
287  to_block, to_blocks);
288  } else {
289  gradient = 0.0f;
290  }
291  BaselineDetect baseline_detector(textord_baseline_debug,
292  reskew, to_blocks);
293  baseline_detector.ComputeStraightBaselines(use_box_bottoms);
294  baseline_detector.ComputeBaselineSplinesAndXheights(
295  page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr,
297  // Now make the words in the lines.
298  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
299  // SINGLE_LINE uses the old word maker on the single line.
300  make_words(this, page_tr_, gradient, blocks, to_blocks);
301  } else {
302  // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
303  // single word, and in SINGLE_CHAR mode, all the outlines
304  // go in a single blob.
305  TO_BLOCK* to_block = to_block_it.data();
306  make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
307  to_block->get_rows(), to_block->block->row_list());
308  }
309  // Remove empties.
310  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
311  TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
312  // Compute the margins for each row in the block, to be used later for
313  // paragraph detection.
314  BLOCK_IT b_it(blocks);
315  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
316  b_it.data()->compute_row_margins();
317  }
318 #ifndef GRAPHICS_DISABLED
319  close_to_win();
320 #endif
321 }
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:188
Definition: points.h:189
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:201
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
bool textord_heavy_nr
Definition: makerow.cpp:44
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
Definition: tordmain.cpp:236
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:57
Treat the image as a single character.
Definition: publictypes.h:164
#define ASSERT_HOST(x)
Definition: errcode.h:84
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
Definition: blobbox.cpp:1051
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:194
int textord_baseline_debug
Definition: textord.h:377
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:164
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:147
void rotate(const FCOORD &rotation)
Definition: blobbox.h:706
bool textord_show_final_rows
Definition: makerow.cpp:48
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:205
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:197
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:141
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:60
BLOCK * block
Definition: blobbox.h:773
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void close_to_win()
Definition: drawtord.cpp:56
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Definition: ocrblock.h:30
void set_y(inT16 yin)
rewrite function
Definition: points.h:65
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:104

◆ to_spacing()

void tesseract::Textord::to_spacing ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks 
)

Definition at line 42 of file tospace.cpp.

45  {
46  TO_BLOCK_IT block_it; //iterator
47  TO_BLOCK *block; //current block;
48  TO_ROW_IT row_it; //row iterator
49  TO_ROW *row; //current row
50  int block_index; //block number
51  int row_index; //row number
52  //estimated width of real spaces for whole block
53  inT16 block_space_gap_width;
54  //estimated width of non space gaps for whole block
55  inT16 block_non_space_gap_width;
56  BOOL8 old_text_ord_proportional;//old fixed/prop result
57  GAPMAP *gapmap = NULL; //map of big vert gaps in blk
58 
59  block_it.set_to_list (blocks);
60  block_index = 1;
61  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
62  block_it.forward ()) {
63  block = block_it.data ();
64  gapmap = new GAPMAP (block);
65  block_spacing_stats(block,
66  gapmap,
67  old_text_ord_proportional,
68  block_space_gap_width,
69  block_non_space_gap_width);
70  // Make sure relative values of block-level space and non-space gap
71  // widths are reasonable. The ratio of 1:3 is also used in
72  // block_spacing_stats, to corrrect the block_space_gap_width
73  // Useful for arabic and hindi, when the non-space gap width is
74  // often over-estimated and should not be trusted. A similar ratio
75  // is found in block_spacing_stats.
77  (float) block_space_gap_width / block_non_space_gap_width < 3.0) {
78  block_non_space_gap_width = (inT16) floor (block_space_gap_width / 3.0);
79  }
80  row_it.set_to_list (block->get_rows ());
81  row_index = 1;
82  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
83  row = row_it.data ();
84  if ((row->pitch_decision == PITCH_DEF_PROP) ||
85  (row->pitch_decision == PITCH_CORR_PROP)) {
86  if ((tosp_debug_level > 0) && !old_text_ord_proportional)
87  tprintf ("Block %d Row %d: Now Proportional\n",
88  block_index, row_index);
89  row_spacing_stats(row,
90  gapmap,
91  block_index,
92  row_index,
93  block_space_gap_width,
94  block_non_space_gap_width);
95  }
96  else {
97  if ((tosp_debug_level > 0) && old_text_ord_proportional)
98  tprintf
99  ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
100  block_index, row_index, row->pitch_decision,
101  row->fixed_pitch);
102  }
103 #ifndef GRAPHICS_DISABLED
106 #endif
107  row_index++;
108  }
109  delete gapmap;
110  block_index++;
111  }
112 }
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:25
bool tosp_old_to_method
Definition: textord.h:263
PITCH_TYPE pitch_decision
Definition: blobbox.h:646
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:266
#define tprintf(...)
Definition: tprintf.h:31
float fixed_pitch
Definition: blobbox.h:647
int16_t inT16
Definition: host.h:36
unsigned char BOOL8
Definition: host.h:44
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
void plot_word_decisions(ScrollView *win, inT16 pitch, TO_ROW *row)
Definition: drawtord.cpp:250
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: gap_map.h:15

◆ use_cjk_fp_model()

bool tesseract::Textord::use_cjk_fp_model ( ) const
inline

Definition at line 92 of file textord.h.

92  {
93  return use_cjk_fp_model_;
94  }

Member Data Documentation

◆ textord_baseline_debug

int tesseract::Textord::textord_baseline_debug = 0

"Baseline debug level"

Definition at line 377 of file textord.h.

◆ textord_blob_size_bigile

double tesseract::Textord::textord_blob_size_bigile = 95

"Percentile for large blobs"

Definition at line 378 of file textord.h.

◆ textord_blob_size_smallile

double tesseract::Textord::textord_blob_size_smallile = 20

"Percentile for small blobs"

Definition at line 381 of file textord.h.

◆ textord_blshift_maxshift

double tesseract::Textord::textord_blshift_maxshift = 0.00

"Max baseline shift"

Definition at line 398 of file textord.h.

◆ textord_blshift_xfraction

double tesseract::Textord::textord_blshift_xfraction = 9.99

"Min size of baseline shift"

Definition at line 399 of file textord.h.

◆ textord_initialasc_ile

double tesseract::Textord::textord_initialasc_ile = 0.90

"Ile of sizes for xheight guess"

Definition at line 383 of file textord.h.

◆ textord_initialx_ile

double tesseract::Textord::textord_initialx_ile = 0.75

"Ile of sizes for xheight guess"

Definition at line 382 of file textord.h.

◆ textord_max_noise_size

int tesseract::Textord::textord_max_noise_size = 7

"Pixel size of noise"

Definition at line 376 of file textord.h.

◆ textord_no_rejects

bool tesseract::Textord::textord_no_rejects = false

"Don't remove noise blobs"

Definition at line 373 of file textord.h.

◆ textord_noise_area_ratio

double tesseract::Textord::textord_noise_area_ratio = 0.7

"Fraction of bounding box for noise"

Definition at line 380 of file textord.h.

◆ textord_noise_debug

bool tesseract::Textord::textord_noise_debug = FALSE

"Debug row garbage detector"

Definition at line 397 of file textord.h.

◆ textord_noise_hfract

double tesseract::Textord::textord_noise_hfract = 1.0/64

"Height fraction to discard outlines as speckle noise"

Definition at line 394 of file textord.h.

◆ textord_noise_normratio

double tesseract::Textord::textord_noise_normratio = 2.0

"Dot to norm ratio for deletion"

Definition at line 387 of file textord.h.

◆ textord_noise_rejrows

bool tesseract::Textord::textord_noise_rejrows = true

"Reject noise-like rows"

Definition at line 389 of file textord.h.

◆ textord_noise_rejwords

bool tesseract::Textord::textord_noise_rejwords = true

"Reject noise-like words"

Definition at line 388 of file textord.h.

◆ textord_noise_rowratio

double tesseract::Textord::textord_noise_rowratio = 6.0

"Dot to norm ratio for deletion"

Definition at line 396 of file textord.h.

◆ textord_noise_sizefraction

int tesseract::Textord::textord_noise_sizefraction = 10

"Fraction of size for maxima"

Definition at line 384 of file textord.h.

◆ textord_noise_sizelimit

double tesseract::Textord::textord_noise_sizelimit = 0.5

"Fraction of x for big t count"

Definition at line 385 of file textord.h.

◆ textord_noise_sncount

int tesseract::Textord::textord_noise_sncount = 1

"super norm blobs to save row"

Definition at line 395 of file textord.h.

◆ textord_noise_sxfract

double tesseract::Textord::textord_noise_sxfract = 0.4

"xh fract width error for norm blobs"

Definition at line 392 of file textord.h.

◆ textord_noise_syfract

double tesseract::Textord::textord_noise_syfract = 0.2

"xh fract error for norm blobs"

Definition at line 390 of file textord.h.

◆ textord_noise_translimit

int tesseract::Textord::textord_noise_translimit = 16

"Transitions for normal blob"

Definition at line 386 of file textord.h.

◆ textord_show_blobs

bool tesseract::Textord::textord_show_blobs = false

"Display unsorted blobs"

Definition at line 374 of file textord.h.

◆ textord_show_boxes

bool tesseract::Textord::textord_show_boxes = false

"Display boxes"

Definition at line 375 of file textord.h.

◆ textord_single_height_mode

bool tesseract::Textord::textord_single_height_mode = false

"Script has no xheight, so use a single mode for horizontal text"

Definition at line 261 of file textord.h.

◆ tosp_all_flips_fuzzy

bool tesseract::Textord::tosp_all_flips_fuzzy = false

"Pass ANY flip to context?"

Definition at line 287 of file textord.h.

◆ tosp_block_use_cert_spaces

bool tesseract::Textord::tosp_block_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 277 of file textord.h.

◆ tosp_debug_level

int tesseract::Textord::tosp_debug_level = 0

"Debug data"

Definition at line 302 of file textord.h.

◆ tosp_dont_fool_with_small_kerns

double tesseract::Textord::tosp_dont_fool_with_small_kerns = -1

"Limit use of xht gap with odd small kns"

Definition at line 365 of file textord.h.

◆ tosp_enough_small_gaps

double tesseract::Textord::tosp_enough_small_gaps = 0.65

"Fract of kerns reqd for isolated row stats"

Definition at line 343 of file textord.h.

◆ tosp_enough_space_samples_for_median

int tesseract::Textord::tosp_enough_space_samples_for_median = 3

"or should we use mean"

Definition at line 304 of file textord.h.

◆ tosp_few_samples

int tesseract::Textord::tosp_few_samples = 40

"No.gaps reqd with 1 large gap to treat as a table"

Definition at line 308 of file textord.h.

◆ tosp_flip_caution

double tesseract::Textord::tosp_flip_caution = 0.0

"Don't autoflip kn to sp when large separation"

Definition at line 361 of file textord.h.

◆ tosp_flip_fuzz_kn_to_sp

bool tesseract::Textord::tosp_flip_fuzz_kn_to_sp = true

"Default flip"

Definition at line 298 of file textord.h.

◆ tosp_flip_fuzz_sp_to_kn

bool tesseract::Textord::tosp_flip_fuzz_sp_to_kn = true

"Default flip"

Definition at line 299 of file textord.h.

◆ tosp_force_wordbreak_on_punct

bool tesseract::Textord::tosp_force_wordbreak_on_punct = false

"Force word breaks on punct to break long lines in non-space " "delimited langs"

Definition at line 271 of file textord.h.

◆ tosp_fuzzy_kn_fraction

double tesseract::Textord::tosp_fuzzy_kn_fraction = 0.5

"New fuzzy kn alg"

Definition at line 350 of file textord.h.

◆ tosp_fuzzy_limit_all

bool tesseract::Textord::tosp_fuzzy_limit_all = true

"Don't restrict kn->sp fuzzy limit to tables"

Definition at line 289 of file textord.h.

◆ tosp_fuzzy_sp_fraction

double tesseract::Textord::tosp_fuzzy_sp_fraction = 0.5

"New fuzzy sp alg"

Definition at line 351 of file textord.h.

◆ tosp_fuzzy_space_factor

double tesseract::Textord::tosp_fuzzy_space_factor = 0.6

"Fract of xheight for fuzz sp"

Definition at line 327 of file textord.h.

◆ tosp_fuzzy_space_factor1

double tesseract::Textord::tosp_fuzzy_space_factor1 = 0.5

"Fract of xheight for fuzz sp"

Definition at line 329 of file textord.h.

◆ tosp_fuzzy_space_factor2

double tesseract::Textord::tosp_fuzzy_space_factor2 = 0.72

"Fract of xheight for fuzz sp"

Definition at line 331 of file textord.h.

◆ tosp_gap_factor

double tesseract::Textord::tosp_gap_factor = 0.83

"gap ratio to flip sp->kern"

Definition at line 332 of file textord.h.

◆ tosp_ignore_big_gaps

double tesseract::Textord::tosp_ignore_big_gaps = -1

"xht multiplier"

Definition at line 339 of file textord.h.

◆ tosp_ignore_very_big_gaps

double tesseract::Textord::tosp_ignore_very_big_gaps = 3.5

"xht multiplier"

Definition at line 340 of file textord.h.

◆ tosp_improve_thresh

bool tesseract::Textord::tosp_improve_thresh = false

"Enable improvement heuristic"

Definition at line 301 of file textord.h.

◆ tosp_init_guess_kn_mult

double tesseract::Textord::tosp_init_guess_kn_mult = 2.2

"Thresh guess - mult kn by this"

Definition at line 355 of file textord.h.

◆ tosp_init_guess_xht_mult

double tesseract::Textord::tosp_init_guess_xht_mult = 0.28

"Thresh guess - mult xht by this"

Definition at line 357 of file textord.h.

◆ tosp_kern_gap_factor1

double tesseract::Textord::tosp_kern_gap_factor1 = 2.0

"gap ratio to flip kern->sp"

Definition at line 334 of file textord.h.

◆ tosp_kern_gap_factor2

double tesseract::Textord::tosp_kern_gap_factor2 = 1.3

"gap ratio to flip kern->sp"

Definition at line 336 of file textord.h.

◆ tosp_kern_gap_factor3

double tesseract::Textord::tosp_kern_gap_factor3 = 2.5

"gap ratio to flip kern->sp"

Definition at line 338 of file textord.h.

◆ tosp_large_kerning

double tesseract::Textord::tosp_large_kerning = 0.19

"Limit use of xht gap with large kns"

Definition at line 363 of file textord.h.

◆ tosp_max_sane_kn_thresh

double tesseract::Textord::tosp_max_sane_kn_thresh = 5.0

"Multiplier on kn to limit thresh"

Definition at line 359 of file textord.h.

◆ tosp_min_sane_kn_sp

double tesseract::Textord::tosp_min_sane_kn_sp = 1.5

"Don't trust spaces less than this time kn"

Definition at line 353 of file textord.h.

◆ tosp_narrow_aspect_ratio

double tesseract::Textord::tosp_narrow_aspect_ratio = 0.48

"narrow if w/h less than this"

Definition at line 322 of file textord.h.

◆ tosp_narrow_blobs_not_cert

bool tesseract::Textord::tosp_narrow_blobs_not_cert = true

"Only stat OBVIOUS spaces"

Definition at line 281 of file textord.h.

◆ tosp_narrow_fraction

double tesseract::Textord::tosp_narrow_fraction = 0.3

"Fract of xheight for narrow"

Definition at line 320 of file textord.h.

◆ tosp_near_lh_edge

double tesseract::Textord::tosp_near_lh_edge = 0

"Don't reduce box if the top left is non blank"

Definition at line 367 of file textord.h.

◆ tosp_old_sp_kn_th_factor

double tesseract::Textord::tosp_old_sp_kn_th_factor = 2.0

"Factor for defining space threshold in terms of space and " "kern sizes"

Definition at line 314 of file textord.h.

◆ tosp_old_to_bug_fix

bool tesseract::Textord::tosp_old_to_bug_fix = false

"Fix suspected bug in old code"

Definition at line 275 of file textord.h.

◆ tosp_old_to_constrain_sp_kn

bool tesseract::Textord::tosp_old_to_constrain_sp_kn = false

"Constrain relative values of inter and intra-word gaps for " "old_to_method."

Definition at line 266 of file textord.h.

◆ tosp_old_to_method

bool tesseract::Textord::tosp_old_to_method = false

"Space stats use prechopping?"

Definition at line 263 of file textord.h.

◆ tosp_only_small_gaps_for_kern

bool tesseract::Textord::tosp_only_small_gaps_for_kern = false

"Better guess"

Definition at line 286 of file textord.h.

◆ tosp_only_use_prop_rows

bool tesseract::Textord::tosp_only_use_prop_rows = true

"Block stats to use fixed pitch rows?"

Definition at line 268 of file textord.h.

◆ tosp_only_use_xht_gaps

bool tesseract::Textord::tosp_only_use_xht_gaps = false

"Only use within xht gap for wd breaks"

Definition at line 295 of file textord.h.

◆ tosp_pass_wide_fuzz_sp_to_context

double tesseract::Textord::tosp_pass_wide_fuzz_sp_to_context = 0.75

"How wide fuzzies need context"

Definition at line 371 of file textord.h.

◆ tosp_recovery_isolated_row_stats

bool tesseract::Textord::tosp_recovery_isolated_row_stats = true

"Use row alone when inadequate cert spaces"

Definition at line 285 of file textord.h.

◆ tosp_redo_kern_limit

int tesseract::Textord::tosp_redo_kern_limit = 10

"No.samples reqd to reestimate for row"

Definition at line 306 of file textord.h.

◆ tosp_rep_space

double tesseract::Textord::tosp_rep_space = 1.6

"rep gap multiplier for space"

Definition at line 341 of file textord.h.

◆ tosp_row_use_cert_spaces

bool tesseract::Textord::tosp_row_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 279 of file textord.h.

◆ tosp_row_use_cert_spaces1

bool tesseract::Textord::tosp_row_use_cert_spaces1 = true

"Only stat OBVIOUS spaces"

Definition at line 283 of file textord.h.

◆ tosp_rule_9_test_punct

bool tesseract::Textord::tosp_rule_9_test_punct = false

"Don't chng kn to space next to punct"

Definition at line 297 of file textord.h.

◆ tosp_sanity_method

int tesseract::Textord::tosp_sanity_method = 1

"How to avoid being silly"

Definition at line 311 of file textord.h.

◆ tosp_short_row

int tesseract::Textord::tosp_short_row = 20

"No.gaps reqd with few cert spaces to use certs"

Definition at line 310 of file textord.h.

◆ tosp_silly_kn_sp_gap

double tesseract::Textord::tosp_silly_kn_sp_gap = 0.2

"Don't let sp minus kn get too small"

Definition at line 369 of file textord.h.

◆ tosp_stats_use_xht_gaps

bool tesseract::Textord::tosp_stats_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 291 of file textord.h.

◆ tosp_table_fuzzy_kn_sp_ratio

double tesseract::Textord::tosp_table_fuzzy_kn_sp_ratio = 3.0

"Fuzzy if less than this"

Definition at line 349 of file textord.h.

◆ tosp_table_kn_sp_ratio

double tesseract::Textord::tosp_table_kn_sp_ratio = 2.25

"Min difference of kn & sp in table"

Definition at line 345 of file textord.h.

◆ tosp_table_xht_sp_ratio

double tesseract::Textord::tosp_table_xht_sp_ratio = 0.33

"Expect spaces bigger than this"

Definition at line 347 of file textord.h.

◆ tosp_threshold_bias1

double tesseract::Textord::tosp_threshold_bias1 = 0

"how far between kern and space?"

Definition at line 316 of file textord.h.

◆ tosp_threshold_bias2

double tesseract::Textord::tosp_threshold_bias2 = 0

"how far between kern and space?"

Definition at line 318 of file textord.h.

◆ tosp_use_pre_chopping

bool tesseract::Textord::tosp_use_pre_chopping = false

"Space stats use prechopping?"

Definition at line 273 of file textord.h.

◆ tosp_use_xht_gaps

bool tesseract::Textord::tosp_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 293 of file textord.h.

◆ tosp_wide_aspect_ratio

double tesseract::Textord::tosp_wide_aspect_ratio = 0.0

"wide if w/h less than this"

Definition at line 325 of file textord.h.

◆ tosp_wide_fraction

double tesseract::Textord::tosp_wide_fraction = 0.52

"Fract of xheight for wide"

Definition at line 323 of file textord.h.


The documentation for this class was generated from the following files: