tesseract  4.00.00dev
tabfind.cpp
Go to the documentation of this file.
1 // File: TabFind.cpp
3 // Description: Subclass of BBGrid to find vertically aligned blobs.
4 // Author: Ray Smith
5 // Created: Fri Mar 21 15:03:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config_auto.h"
22 #endif
23 
24 #include "tabfind.h"
25 #include "alignedblob.h"
26 #include "blobbox.h"
27 #include "colpartitiongrid.h"
28 #include "detlinefit.h"
29 #include "linefind.h"
30 #include "ndminx.h"
31 
32 namespace tesseract {
33 
34 // Multiple of box size to search for initial gaps.
35 const int kTabRadiusFactor = 5;
36 // Min and Max multiple of height to search vertically when extrapolating.
37 const int kMinVerticalSearch = 3;
38 const int kMaxVerticalSearch = 12;
39 const int kMaxRaggedSearch = 25;
40 // Minimum number of lines in a column width to make it interesting.
41 const int kMinLinesInColumn = 10;
42 // Minimum width of a column to be interesting.
43 const int kMinColumnWidth = 200;
44 // Minimum fraction of total column lines for a column to be interesting.
45 const double kMinFractionalLinesInColumn = 0.125;
46 // Fraction of height used as alignment tolerance for aligned tabs.
47 const double kAlignedFraction = 0.03125;
48 // Maximum gutter width (in absolute inch) that we care about
49 const double kMaxGutterWidthAbsolute = 2.00;
50 // Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs.
51 const int kRaggedGutterMultiple = 5;
52 // Min aspect ratio of tall objects to be considered a separator line.
53 // (These will be ignored in searching the gutter for obstructions.)
54 const double kLineFragmentAspectRatio = 10.0;
55 // Min number of points to accept after evaluation.
56 const int kMinEvaluatedTabs = 3;
57 // Up to 30 degrees is allowed for rotations of diacritic blobs.
58 // Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
59 // so that the assert there never fails.
60 const double kCosMaxSkewAngle = 0.866025;
61 
62 BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates");
63 BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors");
64 
65 TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
66  TabVector_LIST* vlines, int vertical_x, int vertical_y,
67  int resolution)
68  : AlignedBlob(gridsize, bleft, tright),
69  resolution_(resolution),
70  image_origin_(0, tright.y() - 1) {
71  width_cb_ = NULL;
72  v_it_.set_to_list(&vectors_);
73  v_it_.add_list_after(vlines);
74  SetVerticalSkewAndParellelize(vertical_x, vertical_y);
76 }
77 
79  if (width_cb_ != NULL)
80  delete width_cb_;
81 }
82 
84 
85 // Insert a list of blobs into the given grid (not necessarily this).
86 // If take_ownership is true, then the blobs are removed from the source list.
87 // See InsertBlob for the other arguments.
88 // It would seem to make more sense to swap this and grid, but this way
89 // around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
90 // while the grid that provides the tab stops(this) has to be derived from
91 // TabFind.
92 void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread,
93  BLOBNBOX_LIST* blobs,
94  BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
95  BLOBNBOX_C_IT>* grid) {
96  BLOBNBOX_IT blob_it(blobs);
97  int b_count = 0;
98  int reject_count = 0;
99  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
100  BLOBNBOX* blob = blob_it.data();
101 // if (InsertBlob(true, true, blob, grid)) {
102  if (InsertBlob(h_spread, v_spread, blob, grid)) {
103  ++b_count;
104  } else {
105  ++reject_count;
106  }
107  }
108  if (textord_debug_tabfind) {
109  tprintf("Inserted %d blobs into grid, %d rejected.\n",
110  b_count, reject_count);
111  }
112 }
113 
114 // Insert a single blob into the given grid (not necessarily this).
115 // If h_spread, then all cells covered horizontally by the box are
116 // used, otherwise, just the bottom-left. Similarly for v_spread.
117 // A side effect is that the left and right rule edges of the blob are
118 // set according to the tab vectors in this (not grid).
119 bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
120  BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
121  BLOBNBOX_C_IT>* grid) {
122  TBOX box = blob->bounding_box();
123  blob->set_left_rule(LeftEdgeForBox(box, false, false));
124  blob->set_right_rule(RightEdgeForBox(box, false, false));
125  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
126  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
127  if (blob->joined_to_prev())
128  return false;
129  grid->InsertBBox(h_spread, v_spread, blob);
130  return true;
131 }
132 
133 // Calls SetBlobRuleEdges for all the blobs in the given block.
135  SetBlobRuleEdges(&block->blobs);
136  SetBlobRuleEdges(&block->small_blobs);
137  SetBlobRuleEdges(&block->noise_blobs);
138  SetBlobRuleEdges(&block->large_blobs);
139 }
140 
141 // Sets the left and right rule and crossing_rules for the blobs in the given
142 // list by fiding the next outermost tabvectors for each blob.
143 void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST* blobs) {
144  BLOBNBOX_IT blob_it(blobs);
145  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
146  BLOBNBOX* blob = blob_it.data();
147  TBOX box = blob->bounding_box();
148  blob->set_left_rule(LeftEdgeForBox(box, false, false));
149  blob->set_right_rule(RightEdgeForBox(box, false, false));
150  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
151  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
152  }
153 }
154 
155 // Returns the gutter width of the given TabVector between the given y limits.
156 // Also returns x-shift to be added to the vector to clear any intersecting
157 // blobs. The shift is deducted from the returned gutter.
158 // If ignore_unmergeables is true, then blobs of UnMergeableType are
159 // ignored as if they don't exist. (Used for text on image.)
160 // max_gutter_width is used as the maximum width worth searching for in case
161 // there is nothing near the TabVector.
162 int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v,
163  bool ignore_unmergeables, int max_gutter_width,
164  int* required_shift) {
165  bool right_to_left = v.IsLeftTab();
166  int bottom_x = v.XAtY(bottom_y);
167  int top_x = v.XAtY(top_y);
168  int start_x = right_to_left ? MAX(top_x, bottom_x) : MIN(top_x, bottom_x);
169  BlobGridSearch sidesearch(this);
170  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
171  int min_gap = max_gutter_width;
172  *required_shift = 0;
173  BLOBNBOX* blob = NULL;
174  while ((blob = sidesearch.NextSideSearch(right_to_left)) != NULL) {
175  const TBOX& box = blob->bounding_box();
176  if (box.bottom() >= top_y || box.top() <= bottom_y)
177  continue; // Doesn't overlap enough.
178  if (box.height() >= gridsize() * 2 &&
179  box.height() > box.width() * kLineFragmentAspectRatio) {
180  // Skip likely separator line residue.
181  continue;
182  }
183  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
184  continue; // Skip non-text if required.
185  int mid_y = (box.bottom() + box.top()) / 2;
186  // We use the x at the mid-y so that the required_shift guarantees
187  // to clear all the blobs on the tab-stop. If we use the min/max
188  // of x at top/bottom of the blob, then exactness would be required,
189  // which is not a good thing.
190  int tab_x = v.XAtY(mid_y);
191  int gap;
192  if (right_to_left) {
193  gap = tab_x - box.right();
194  if (gap < 0 && box.left() - tab_x < *required_shift)
195  *required_shift = box.left() - tab_x;
196  } else {
197  gap = box.left() - tab_x;
198  if (gap < 0 && box.right() - tab_x > *required_shift)
199  *required_shift = box.right() - tab_x;
200  }
201  if (gap > 0 && gap < min_gap)
202  min_gap = gap;
203  }
204  // Result may be negative, in which case, this is a really bad tabstop.
205  return min_gap - abs(*required_shift);
206 }
207 
208 // Find the gutter width and distance to inner neighbour for the given blob.
209 void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height,
210  int max_gutter, bool left,
211  BLOBNBOX* bbox, int* gutter_width,
212  int* neighbour_gap ) {
213  const TBOX& box = bbox->bounding_box();
214  // The gutter and internal sides of the box.
215  int gutter_x = left ? box.left() : box.right();
216  int internal_x = left ? box.right() : box.left();
217  // On ragged edges, the gutter side of the box is away from the tabstop.
218  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
219  *gutter_width = max_gutter;
220  // If the box is away from the tabstop, we need to increase
221  // the allowed gutter width.
222  if (tab_gap > 0)
223  *gutter_width += tab_gap;
224  bool debug = WithinTestRegion(2, box.left(), box.bottom());
225  if (debug)
226  tprintf("Looking in gutter\n");
227  // Find the nearest blob on the outside of the column.
228  BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
229  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
230  *gutter_width, box.top(), box.bottom());
231  if (gutter_bbox != NULL) {
232  const TBOX& gutter_box = gutter_bbox->bounding_box();
233  *gutter_width = left ? tab_x - gutter_box.right()
234  : gutter_box.left() - tab_x;
235  }
236  if (*gutter_width >= max_gutter) {
237  // If there is no box because a tab was in the way, get the tab coord.
238  TBOX gutter_box(box);
239  if (left) {
240  gutter_box.set_left(tab_x - max_gutter - 1);
241  gutter_box.set_right(tab_x - max_gutter);
242  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
243  if (tab_gutter < tab_x - 1)
244  *gutter_width = tab_x - tab_gutter;
245  } else {
246  gutter_box.set_left(tab_x + max_gutter);
247  gutter_box.set_right(tab_x + max_gutter + 1);
248  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
249  if (tab_gutter > tab_x + 1)
250  *gutter_width = tab_gutter - tab_x;
251  }
252  }
253  if (*gutter_width > max_gutter)
254  *gutter_width = max_gutter;
255  // Now look for a neighbour on the inside.
256  if (debug)
257  tprintf("Looking for neighbour\n");
258  BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
259  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
260  *gutter_width, box.top(), box.bottom());
261  int neighbour_edge = left ? RightEdgeForBox(box, true, false)
262  : LeftEdgeForBox(box, true, false);
263  if (neighbour != NULL) {
264  const TBOX& n_box = neighbour->bounding_box();
265  if (debug) {
266  tprintf("Found neighbour:");
267  n_box.print();
268  }
269  if (left && n_box.left() < neighbour_edge)
270  neighbour_edge = n_box.left();
271  else if (!left && n_box.right() > neighbour_edge)
272  neighbour_edge = n_box.right();
273  }
274  *neighbour_gap = left ? neighbour_edge - internal_x
275  : internal_x - neighbour_edge;
276 }
277 
278 // Return the x-coord that corresponds to the right edge for the given
279 // box. If there is a rule line to the right that vertically overlaps it,
280 // then return the x-coord of the rule line, otherwise return the right
281 // edge of the page. For details see RightTabForBox below.
282 int TabFind::RightEdgeForBox(const TBOX& box, bool crossing, bool extended) {
283  TabVector* v = RightTabForBox(box, crossing, extended);
284  return v == NULL ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
285 }
286 // As RightEdgeForBox, but finds the left Edge instead.
287 int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) {
288  TabVector* v = LeftTabForBox(box, crossing, extended);
289  return v == NULL ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
290 }
291 
292 // This comment documents how this function works.
293 // For its purpose and arguments, see the comment in tabfind.h.
294 // TabVectors are stored sorted by perpendicular distance of middle from
295 // the global mean vertical vector. Since the individual vectors can have
296 // differing directions, their XAtY for a given y is not necessarily in the
297 // right order. Therefore the search has to be run with a margin.
298 // The middle of a vector that passes through (x,y) cannot be higher than
299 // halfway from y to the top, or lower than halfway from y to the bottom
300 // of the coordinate range; therefore, the search margin is the range of
301 // sort keys between these halfway points. Any vector with a sort key greater
302 // than the upper margin must be to the right of x at y, and likewise any
303 // vector with a sort key less than the lower margin must pass to the left
304 // of x at y.
305 TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing,
306  bool extended) {
307  if (v_it_.empty())
308  return NULL;
309  int top_y = box.top();
310  int bottom_y = box.bottom();
311  int mid_y = (top_y + bottom_y) / 2;
312  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
313  int min_key, max_key;
314  SetupTabSearch(right, mid_y, &min_key, &max_key);
315  // Position the iterator at the first TabVector with sort_key >= min_key.
316  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
317  v_it_.backward();
318  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
319  v_it_.forward();
320  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
321  TabVector* best_v = NULL;
322  int best_x = -1;
323  int key_limit = -1;
324  do {
325  TabVector* v = v_it_.data();
326  int x = v->XAtY(mid_y);
327  if (x >= right &&
328  (v->VOverlap(top_y, bottom_y) > 0 ||
329  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
330  if (best_v == NULL || x < best_x) {
331  best_v = v;
332  best_x = x;
333  // We can guarantee that no better vector can be found if the
334  // sort key exceeds that of the best by max_key - min_key.
335  key_limit = v->sort_key() + max_key - min_key;
336  }
337  }
338  // Break when the search is done to avoid wrapping the iterator and
339  // thereby potentially slowing the next search.
340  if (v_it_.at_last() ||
341  (best_v != NULL && v->sort_key() > key_limit))
342  break; // Prevent restarting list for next call.
343  v_it_.forward();
344  } while (!v_it_.at_first());
345  return best_v;
346 }
347 
348 // As RightTabForBox, but finds the left TabVector instead.
349 TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing,
350  bool extended) {
351  if (v_it_.empty())
352  return NULL;
353  int top_y = box.top();
354  int bottom_y = box.bottom();
355  int mid_y = (top_y + bottom_y) / 2;
356  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
357  int min_key, max_key;
358  SetupTabSearch(left, mid_y, &min_key, &max_key);
359  // Position the iterator at the last TabVector with sort_key <= max_key.
360  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
361  v_it_.forward();
362  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
363  v_it_.backward();
364  }
365  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
366  TabVector* best_v = NULL;
367  int best_x = -1;
368  int key_limit = -1;
369  do {
370  TabVector* v = v_it_.data();
371  int x = v->XAtY(mid_y);
372  if (x <= left &&
373  (v->VOverlap(top_y, bottom_y) > 0 ||
374  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
375  if (best_v == NULL || x > best_x) {
376  best_v = v;
377  best_x = x;
378  // We can guarantee that no better vector can be found if the
379  // sort key is less than that of the best by max_key - min_key.
380  key_limit = v->sort_key() - (max_key - min_key);
381  }
382  }
383  // Break when the search is done to avoid wrapping the iterator and
384  // thereby potentially slowing the next search.
385  if (v_it_.at_first() ||
386  (best_v != NULL && v->sort_key() < key_limit))
387  break; // Prevent restarting list for next call.
388  v_it_.backward();
389  } while (!v_it_.at_last());
390  return best_v;
391 }
392 
393 // Return true if the given width is close to one of the common
394 // widths in column_widths_.
395 bool TabFind::CommonWidth(int width) {
396  width /= kColumnWidthFactor;
397  ICOORDELT_IT it(&column_widths_);
398  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
399  ICOORDELT* w = it.data();
400  if (w->x() - 1 <= width && width <= w->y() + 1)
401  return true;
402  }
403  return false;
404 }
405 
406 // Return true if the sizes are more than a
407 // factor of 2 different.
408 bool TabFind::DifferentSizes(int size1, int size2) {
409  return size1 > size2 * 2 || size2 > size1 * 2;
410 }
411 
412 // Return true if the sizes are more than a
413 // factor of 5 different.
414 bool TabFind::VeryDifferentSizes(int size1, int size2) {
415  return size1 > size2 * 5 || size2 > size1 * 5;
416 }
417 
419 
420 // Top-level function to find TabVectors in an input page block.
421 // Returns false if the detected skew angle is impossible.
422 // Applies the detected skew angle to deskew the tabs, blobs and part_grid.
423 bool TabFind::FindTabVectors(TabVector_LIST* hlines,
424  BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
425  int min_gutter_width,
426  double tabfind_aligned_gap_fraction,
427  ColPartitionGrid* part_grid,
428  FCOORD* deskew, FCOORD* reskew) {
429  ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
430  tabfind_aligned_gap_fraction,
431  block);
432  ComputeColumnWidths(tab_win, part_grid);
434  SortVectors();
435  CleanupTabs();
436  if (!Deskew(hlines, image_blobs, block, deskew, reskew))
437  return false; // Skew angle is too large.
438  part_grid->Deskew(*deskew);
439  ApplyTabConstraints();
440  #ifndef GRAPHICS_DISABLED
442  tab_win = MakeWindow(640, 50, "FinalTabs");
443  DisplayBoxes(tab_win);
444  DisplayTabs("FinalTabs", tab_win);
445  tab_win = DisplayTabVectors(tab_win);
446  }
447  #endif // GRAPHICS_DISABLED
448  return true;
449 }
450 
451 // Top-level function to not find TabVectors in an input page block,
452 // but setup for single column mode.
453 void TabFind::DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
454  FCOORD* deskew, FCOORD* reskew) {
455  InsertBlobsToGrid(false, false, image_blobs, this);
456  InsertBlobsToGrid(true, false, &block->blobs, this);
457  deskew->set_x(1.0f);
458  deskew->set_y(0.0f);
459  reskew->set_x(1.0f);
460  reskew->set_y(0.0f);
461 }
462 
463 // Cleans up the lists of blobs in the block ready for use by TabFind.
464 // Large blobs that look like text are moved to the main blobs list.
465 // Main blobs that are superseded by the image blobs are deleted.
467  BLOBNBOX_IT large_it = &block->large_blobs;
468  BLOBNBOX_IT blob_it = &block->blobs;
469  int b_count = 0;
470  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
471  BLOBNBOX* large_blob = large_it.data();
472  if (large_blob->owner() != NULL) {
473  blob_it.add_to_end(large_it.extract());
474  ++b_count;
475  }
476  }
477  if (textord_debug_tabfind) {
478  tprintf("Moved %d large blobs to normal list\n",
479  b_count);
480  #ifndef GRAPHICS_DISABLED
481  ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
482  block->plot_graded_blobs(rej_win);
483  block->plot_noise_blobs(rej_win);
484  rej_win->Update();
485  #endif // GRAPHICS_DISABLED
486  }
487  block->DeleteUnownedNoise();
488 }
489 
490 // Helper function to setup search limits for *TabForBox.
491 void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) {
492  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
493  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
494  *min_key = MIN(key1, key2);
495  *max_key = MAX(key1, key2);
496 }
497 
499 #ifndef GRAPHICS_DISABLED
500  // For every vector, display it.
501  TabVector_IT it(&vectors_);
502  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
503  TabVector* vector = it.data();
504  vector->Display(tab_win);
505  }
506  tab_win->Update();
507 #endif
508  return tab_win;
509 }
510 
511 // PRIVATE CODE.
512 //
513 // First part of FindTabVectors, which may be used twice if the text
514 // is mostly of vertical alignment.
515 ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
516  int min_gutter_width,
517  double tabfind_aligned_gap_fraction,
518  TO_BLOCK* block) {
520  ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
521  line_win = DisplayTabVectors(line_win);
522  }
523  // Prepare the grid.
524  if (image_blobs != NULL)
525  InsertBlobsToGrid(true, false, image_blobs, this);
526  InsertBlobsToGrid(true, false, &block->blobs, this);
527  ScrollView* initial_win = FindTabBoxes(min_gutter_width,
528  tabfind_aligned_gap_fraction);
529  FindAllTabVectors(min_gutter_width);
530 
532  SortVectors();
533  EvaluateTabs();
534  if (textord_tabfind_show_initialtabs && initial_win != NULL)
535  initial_win = DisplayTabVectors(initial_win);
536  MarkVerticalText();
537  return initial_win;
538 }
539 
540 // Helper displays all the boxes in the given vector on the given window.
541 static void DisplayBoxVector(const GenericVector<BLOBNBOX*>& boxes,
542  ScrollView* win) {
543  #ifndef GRAPHICS_DISABLED
544  for (int i = 0; i < boxes.size(); ++i) {
545  TBOX box = boxes[i]->bounding_box();
546  int left_x = box.left();
547  int right_x = box.right();
548  int top_y = box.top();
549  int bottom_y = box.bottom();
550  ScrollView::Color box_color = boxes[i]->BoxColor();
551  win->Pen(box_color);
552  win->Rectangle(left_x, bottom_y, right_x, top_y);
553  }
554  win->Update();
555  #endif // GRAPHICS_DISABLED
556 }
557 
558 // For each box in the grid, decide whether it is a candidate tab-stop,
559 // and if so add it to the left/right tab boxes.
560 ScrollView* TabFind::FindTabBoxes(int min_gutter_width,
561  double tabfind_aligned_gap_fraction) {
562  left_tab_boxes_.clear();
563  right_tab_boxes_.clear();
564  // For every bbox in the grid, determine whether it uses a tab on an edge.
566  gsearch.StartFullSearch();
567  BLOBNBOX* bbox;
568  while ((bbox = gsearch.NextFullSearch()) != NULL) {
569  if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
570  // If it is any kind of tab, insert it into the vectors.
571  if (bbox->left_tab_type() != TT_NONE)
572  left_tab_boxes_.push_back(bbox);
573  if (bbox->right_tab_type() != TT_NONE)
574  right_tab_boxes_.push_back(bbox);
575  }
576  }
577  // Sort left tabs by left and right by right to see the outermost one first
578  // on a ragged tab.
579  left_tab_boxes_.sort(SortByBoxLeft<BLOBNBOX>);
580  right_tab_boxes_.sort(SortRightToLeft<BLOBNBOX>);
581  ScrollView* tab_win = NULL;
582  #ifndef GRAPHICS_DISABLED
584  tab_win = MakeWindow(0, 100, "InitialTabs");
585  tab_win->Pen(ScrollView::BLUE);
586  tab_win->Brush(ScrollView::NONE);
587  // Display the left and right tab boxes.
588  DisplayBoxVector(left_tab_boxes_, tab_win);
589  DisplayBoxVector(right_tab_boxes_, tab_win);
590  tab_win = DisplayTabs("Tabs", tab_win);
591  }
592  #endif // GRAPHICS_DISABLED
593  return tab_win;
594 }
595 
596 bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width,
597  double tabfind_aligned_gap_fraction) {
599  TBOX box = bbox->bounding_box();
600  // If there are separator lines, get the column edges.
601  int left_column_edge = bbox->left_rule();
602  int right_column_edge = bbox->right_rule();
603  // The edges of the bounding box of the blob being processed.
604  int left_x = box.left();
605  int right_x = box.right();
606  int top_y = box.top();
607  int bottom_y = box.bottom();
608  int height = box.height();
609  bool debug = WithinTestRegion(3, left_x, top_y);
610  if (debug) {
611  tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
612  left_x, top_y, right_x, bottom_y,
613  left_column_edge, right_column_edge);
614  }
615  // Compute a search radius based on a multiple of the height.
616  int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_;
617  radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius);
618  // In Vertical Page mode, once we have an estimate of the vertical line
619  // spacing, the minimum amount of gutter space before a possible tab is
620  // increased under the assumption that column partition is always larger
621  // than line spacing.
622  int min_spacing =
623  static_cast<int>(height * tabfind_aligned_gap_fraction);
624  if (min_gutter_width > min_spacing)
625  min_spacing = min_gutter_width;
626  int min_ragged_gutter = kRaggedGutterMultiple * gridsize();
627  if (min_gutter_width > min_ragged_gutter)
628  min_ragged_gutter = min_gutter_width;
629  int target_right = left_x - min_spacing;
630  int target_left = right_x + min_spacing;
631  // We will be evaluating whether the left edge could be a left tab, and
632  // whether the right edge could be a right tab.
633  // A box can be a tab if its bool is_(left/right)_tab remains true, meaning
634  // that no blobs have been found in the gutter during the radial search.
635  // A box can also be a tab if there are objects in the gutter only above
636  // or only below, and there are aligned objects on the opposite side, but
637  // not too many unaligned objects. The maybe_(left/right)_tab_up counts
638  // aligned objects above and negatively counts unaligned objects above,
639  // and is set to -MAX_INT32 if a gutter object is found above.
640  // The other 3 maybe ints work similarly for the other sides.
641  // These conditions are very strict, to minimize false positives, and really
642  // only aligned tabs and outermost ragged tab blobs will qualify, so we
643  // also have maybe_ragged_left/right with less stringent rules.
644  // A blob that is maybe_ragged_left/right will be further qualified later,
645  // using the min_ragged_gutter.
646  bool is_left_tab = true;
647  bool is_right_tab = true;
648  bool maybe_ragged_left = true;
649  bool maybe_ragged_right = true;
650  int maybe_left_tab_up = 0;
651  int maybe_right_tab_up = 0;
652  int maybe_left_tab_down = 0;
653  int maybe_right_tab_down = 0;
654  if (bbox->leader_on_left()) {
655  is_left_tab = false;
656  maybe_ragged_left = false;
657  maybe_left_tab_up = -MAX_INT32;
658  maybe_left_tab_down = -MAX_INT32;
659  }
660  if (bbox->leader_on_right()) {
661  is_right_tab = false;
662  maybe_ragged_right = false;
663  maybe_right_tab_up = -MAX_INT32;
664  maybe_right_tab_down = -MAX_INT32;
665  }
666  int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction);
667  BLOBNBOX* neighbour = NULL;
668  while ((neighbour = radsearch.NextRadSearch()) != NULL) {
669  if (neighbour == bbox)
670  continue;
671  TBOX nbox = neighbour->bounding_box();
672  int n_left = nbox.left();
673  int n_right = nbox.right();
674  if (debug)
675  tprintf("Neighbour at (%d,%d)->(%d,%d)\n",
676  n_left, nbox.bottom(), n_right, nbox.top());
677  // If the neighbouring blob is the wrong side of a separator line, then it
678  // "doesn't exist" as far as we are concerned.
679  if (n_right > right_column_edge || n_left < left_column_edge ||
680  left_x < neighbour->left_rule() || right_x > neighbour->right_rule())
681  continue; // Separator line in the way.
682  int n_mid_x = (n_left + n_right) / 2;
683  int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
684  if (n_mid_x <= left_x && n_right >= target_right) {
685  if (debug)
686  tprintf("Not a left tab\n");
687  is_left_tab = false;
688  if (n_mid_y < top_y)
689  maybe_left_tab_down = -MAX_INT32;
690  if (n_mid_y > bottom_y)
691  maybe_left_tab_up = -MAX_INT32;
692  } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) {
693  if (debug)
694  tprintf("Maybe a left tab\n");
695  if (n_mid_y > top_y && maybe_left_tab_up > -MAX_INT32)
696  ++maybe_left_tab_up;
697  if (n_mid_y < bottom_y && maybe_left_tab_down > -MAX_INT32)
698  ++maybe_left_tab_down;
699  } else if (n_left < left_x && n_right >= left_x) {
700  // Overlaps but not aligned so negative points on a maybe.
701  if (debug)
702  tprintf("Maybe Not a left tab\n");
703  if (n_mid_y > top_y && maybe_left_tab_up > -MAX_INT32)
704  --maybe_left_tab_up;
705  if (n_mid_y < bottom_y && maybe_left_tab_down > -MAX_INT32)
706  --maybe_left_tab_down;
707  }
708  if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) {
709  maybe_ragged_left = false;
710  if (debug)
711  tprintf("Not a ragged left\n");
712  }
713  if (n_mid_x >= right_x && n_left <= target_left) {
714  if (debug)
715  tprintf("Not a right tab\n");
716  is_right_tab = false;
717  if (n_mid_y < top_y)
718  maybe_right_tab_down = -MAX_INT32;
719  if (n_mid_y > bottom_y)
720  maybe_right_tab_up = -MAX_INT32;
721  } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) {
722  if (debug)
723  tprintf("Maybe a right tab\n");
724  if (n_mid_y > top_y && maybe_right_tab_up > -MAX_INT32)
725  ++maybe_right_tab_up;
726  if (n_mid_y < bottom_y && maybe_right_tab_down > -MAX_INT32)
727  ++maybe_right_tab_down;
728  } else if (n_right > right_x && n_left <= right_x) {
729  // Overlaps but not aligned so negative points on a maybe.
730  if (debug)
731  tprintf("Maybe Not a right tab\n");
732  if (n_mid_y > top_y && maybe_right_tab_up > -MAX_INT32)
733  --maybe_right_tab_up;
734  if (n_mid_y < bottom_y && maybe_right_tab_down > -MAX_INT32)
735  --maybe_right_tab_down;
736  }
737  if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) {
738  maybe_ragged_right = false;
739  if (debug)
740  tprintf("Not a ragged right\n");
741  }
742  if (maybe_left_tab_down == -MAX_INT32 && maybe_left_tab_up == -MAX_INT32 &&
743  maybe_right_tab_down == -MAX_INT32 && maybe_right_tab_up == -MAX_INT32)
744  break;
745  }
746  if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
748  } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
750  } else {
751  bbox->set_left_tab_type(TT_NONE);
752  }
753  if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
755  } else if (maybe_ragged_right &&
756  ConfirmRaggedRight(bbox, min_ragged_gutter)) {
758  } else {
760  }
761  if (debug) {
762  tprintf("Left result = %s, Right result=%s\n",
763  bbox->left_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
764  (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"),
765  bbox->right_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
766  (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"));
767  }
768  return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE;
769 }
770 
771 // Returns true if there is nothing in the rectangle of width min_gutter to
772 // the left of bbox.
773 bool TabFind::ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter) {
774  TBOX search_box(bbox->bounding_box());
775  search_box.set_right(search_box.left());
776  search_box.set_left(search_box.left() - min_gutter);
777  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
778 }
779 
780 // Returns true if there is nothing in the rectangle of width min_gutter to
781 // the right of bbox.
782 bool TabFind::ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter) {
783  TBOX search_box(bbox->bounding_box());
784  search_box.set_left(search_box.right());
785  search_box.set_right(search_box.right() + min_gutter);
786  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
787 }
788 
789 // Returns true if there is nothing in the given search_box that vertically
790 // overlaps target_box other than target_box itself.
791 bool TabFind::NothingYOverlapsInBox(const TBOX& search_box,
792  const TBOX& target_box) {
793  BlobGridSearch rsearch(this);
794  rsearch.StartRectSearch(search_box);
795  BLOBNBOX* blob;
796  while ((blob = rsearch.NextRectSearch()) != NULL) {
797  const TBOX& box = blob->bounding_box();
798  if (box.y_overlap(target_box) && !(box == target_box))
799  return false;
800  }
801  return true;
802 }
803 
804 void TabFind::FindAllTabVectors(int min_gutter_width) {
805  // A list of vectors that will be created in estimating the skew.
806  TabVector_LIST dummy_vectors;
807  // An estimate of the vertical direction, revised as more lines are added.
808  int vertical_x = 0;
809  int vertical_y = 1;
810  // Find an estimate of the vertical direction by finding some tab vectors.
811  // Slowly up the search size until we get some vectors.
812  for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch;
813  search_size += kMinVerticalSearch) {
814  int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED,
815  min_gutter_width,
816  &dummy_vectors,
817  &vertical_x, &vertical_y);
818  vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED,
819  min_gutter_width,
820  &dummy_vectors,
821  &vertical_x, &vertical_y);
822  if (vector_count > 0)
823  break;
824  }
825  // Get rid of the test vectors and reset the types of the tabs.
826  dummy_vectors.clear();
827  for (int i = 0; i < left_tab_boxes_.size(); ++i) {
828  BLOBNBOX* bbox = left_tab_boxes_[i];
829  if (bbox->left_tab_type() == TT_CONFIRMED)
831  }
832  for (int i = 0; i < right_tab_boxes_.size(); ++i) {
833  BLOBNBOX* bbox = right_tab_boxes_[i];
834  if (bbox->right_tab_type() == TT_CONFIRMED)
836  }
837  if (textord_debug_tabfind) {
838  tprintf("Beginning real tab search with vertical = %d,%d...\n",
839  vertical_x, vertical_y);
840  }
841  // Now do the real thing ,but keep the vectors in the dummy_vectors list
842  // until they are all done, so we don't get the tab vectors confused with
843  // the rule line vectors.
844  FindTabVectors(kMaxVerticalSearch, TA_LEFT_ALIGNED, min_gutter_width,
845  &dummy_vectors, &vertical_x, &vertical_y);
846  FindTabVectors(kMaxVerticalSearch, TA_RIGHT_ALIGNED, min_gutter_width,
847  &dummy_vectors, &vertical_x, &vertical_y);
848  FindTabVectors(kMaxRaggedSearch, TA_LEFT_RAGGED, min_gutter_width,
849  &dummy_vectors, &vertical_x, &vertical_y);
850  FindTabVectors(kMaxRaggedSearch, TA_RIGHT_RAGGED, min_gutter_width,
851  &dummy_vectors, &vertical_x, &vertical_y);
852  // Now add the vectors to the vectors_ list.
853  TabVector_IT v_it(&vectors_);
854  v_it.add_list_after(&dummy_vectors);
855  // Now use the summed (mean) vertical vector as the direction for everything.
856  SetVerticalSkewAndParellelize(vertical_x, vertical_y);
857 }
858 
859 // Helper for FindAllTabVectors finds the vectors of a particular type.
860 int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment,
861  int min_gutter_width, TabVector_LIST* vectors,
862  int* vertical_x, int* vertical_y) {
863  TabVector_IT vector_it(vectors);
864  int vector_count = 0;
865  // Search the right or left tab boxes, looking for tab vectors.
866  bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED;
867  const GenericVector<BLOBNBOX*>& boxes = right ? right_tab_boxes_
868  : left_tab_boxes_;
869  for (int i = 0; i < boxes.size(); ++i) {
870  BLOBNBOX* bbox = boxes[i];
871  if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) ||
872  (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) {
873  TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width,
874  alignment,
875  bbox, vertical_x, vertical_y);
876  if (vector != NULL) {
877  ++vector_count;
878  vector_it.add_to_end(vector);
879  }
880  }
881  }
882  return vector_count;
883 }
884 
885 // Finds a vector corresponding to a tabstop running through the
886 // given box of the given alignment type.
887 // search_size_multiple is a multiple of height used to control
888 // the size of the search.
889 // vertical_x and y are updated with an estimate of the real
890 // vertical direction. (skew finding.)
891 // Returns NULL if no decent tabstop can be found.
892 TabVector* TabFind::FindTabVector(int search_size_multiple,
893  int min_gutter_width,
894  TabAlignment alignment,
895  BLOBNBOX* bbox,
896  int* vertical_x, int* vertical_y) {
897  int height = MAX(bbox->bounding_box().height(), gridsize());
898  AlignedBlobParams align_params(*vertical_x, *vertical_y,
899  height,
900  search_size_multiple, min_gutter_width,
901  resolution_, alignment);
902  // FindVerticalAlignment is in the parent (AlignedBlob) class.
903  return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
904 }
905 
906 // Set the vertical_skew_ member from the given vector and refit
907 // all vectors parallel to the skew vector.
908 void TabFind::SetVerticalSkewAndParellelize(int vertical_x, int vertical_y) {
909  // Fit the vertical vector into an ICOORD, which is 16 bit.
910  vertical_skew_.set_with_shrink(vertical_x, vertical_y);
912  tprintf("Vertical skew vector=(%d,%d)\n",
914  v_it_.set_to_list(&vectors_);
915  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
916  TabVector* v = v_it_.data();
917  v->Fit(vertical_skew_, true);
918  }
919  // Now sort the vectors as their direction has potentially changed.
920  SortVectors();
921 }
922 
923 // Sort all the current vectors using the given vertical direction vector.
924 void TabFind::SortVectors() {
925  vectors_.sort(TabVector::SortVectorsByKey);
926  v_it_.set_to_list(&vectors_);
927 }
928 
929 // Evaluate all the current tab vectors.
930 void TabFind::EvaluateTabs() {
931  TabVector_IT rule_it(&vectors_);
932  for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
933  TabVector* tab = rule_it.data();
934  if (!tab->IsSeparator()) {
935  tab->Evaluate(vertical_skew_, this);
936  if (tab->BoxCount() < kMinEvaluatedTabs) {
937  if (textord_debug_tabfind > 2)
938  tab->Print("Too few boxes");
939  delete rule_it.extract();
940  v_it_.set_to_list(&vectors_);
941  } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) {
942  tab->Print("Evaluated tab");
943  }
944  }
945  }
946 }
947 
948 // Trace textlines from one side to the other of each tab vector, saving
949 // the most frequent column widths found in a list so that a given width
950 // can be tested for being a common width with a simple callback function.
951 void TabFind::ComputeColumnWidths(ScrollView* tab_win,
952  ColPartitionGrid* part_grid) {
953  #ifndef GRAPHICS_DISABLED
954  if (tab_win != NULL)
955  tab_win->Pen(ScrollView::WHITE);
956  #endif // GRAPHICS_DISABLED
957  // Accumulate column sections into a STATS
958  int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
959  STATS col_widths(0, col_widths_size + 1);
960  ApplyPartitionsToColumnWidths(part_grid, &col_widths);
961  #ifndef GRAPHICS_DISABLED
962  if (tab_win != NULL) {
963  tab_win->Update();
964  }
965  #endif // GRAPHICS_DISABLED
966  if (textord_debug_tabfind > 1)
967  col_widths.print();
968  // Now make a list of column widths.
969  MakeColumnWidths(col_widths_size, &col_widths);
970  // Turn the column width into a range.
971  ApplyPartitionsToColumnWidths(part_grid, NULL);
972 }
973 
974 // Finds column width and:
975 // if col_widths is not null (pass1):
976 // pair-up tab vectors with existing ColPartitions and accumulate widths.
977 // else (pass2):
978 // find the largest real partition width for each recorded column width,
979 // to be used as the minimum acceptable width.
980 void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
981  STATS* col_widths) {
982  // For every ColPartition in the part_grid, add partners to the tabvectors
983  // and accumulate the column widths.
984  ColPartitionGridSearch gsearch(part_grid);
985  gsearch.StartFullSearch();
986  ColPartition* part;
987  while ((part = gsearch.NextFullSearch()) != NULL) {
988  BLOBNBOX_C_IT blob_it(part->boxes());
989  if (blob_it.empty())
990  continue;
991  BLOBNBOX* left_blob = blob_it.data();
992  blob_it.move_to_last();
993  BLOBNBOX* right_blob = blob_it.data();
994  TabVector* left_vector = LeftTabForBox(left_blob->bounding_box(),
995  true, false);
996  if (left_vector == NULL || left_vector->IsRightTab())
997  continue;
998  TabVector* right_vector = RightTabForBox(right_blob->bounding_box(),
999  true, false);
1000  if (right_vector == NULL || right_vector->IsLeftTab())
1001  continue;
1002 
1003  int line_left = left_vector->XAtY(left_blob->bounding_box().bottom());
1004  int line_right = right_vector->XAtY(right_blob->bounding_box().bottom());
1005  // Add to STATS of measurements if the width is significant.
1006  int width = line_right - line_left;
1007  if (col_widths != NULL) {
1008  AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
1009  if (width >= kMinColumnWidth)
1010  col_widths->add(width / kColumnWidthFactor, 1);
1011  } else {
1012  width /= kColumnWidthFactor;
1013  ICOORDELT_IT it(&column_widths_);
1014  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1015  ICOORDELT* w = it.data();
1016  if (NearlyEqual<int>(width, w->y(), 1)) {
1017  int true_width = part->bounding_box().width() / kColumnWidthFactor;
1018  if (true_width <= w->y() && true_width > w->x())
1019  w->set_x(true_width);
1020  break;
1021  }
1022  }
1023  }
1024  }
1025 }
1026 
1027 // Helper makes the list of common column widths in column_widths_ from the
1028 // input col_widths. Destroys the content of col_widths by repeatedly
1029 // finding the mode and erasing the peak.
1030 void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) {
1031  ICOORDELT_IT w_it(&column_widths_);
1032  int total_col_count = col_widths->get_total();
1033  while (col_widths->get_total() > 0) {
1034  int width = col_widths->mode();
1035  int col_count = col_widths->pile_count(width);
1036  col_widths->add(width, -col_count);
1037  // Get the entire peak.
1038  for (int left = width - 1; left > 0 &&
1039  col_widths->pile_count(left) > 0;
1040  --left) {
1041  int new_count = col_widths->pile_count(left);
1042  col_count += new_count;
1043  col_widths->add(left, -new_count);
1044  }
1045  for (int right = width + 1; right < col_widths_size &&
1046  col_widths->pile_count(right) > 0;
1047  ++right) {
1048  int new_count = col_widths->pile_count(right);
1049  col_count += new_count;
1050  col_widths->add(right, -new_count);
1051  }
1052  if (col_count > kMinLinesInColumn &&
1053  col_count > kMinFractionalLinesInColumn * total_col_count) {
1054  ICOORDELT* w = new ICOORDELT(0, width);
1055  w_it.add_after_then_move(w);
1057  tprintf("Column of width %d has %d = %.2f%% lines\n",
1058  width * kColumnWidthFactor, col_count,
1059  100.0 * col_count / total_col_count);
1060  }
1061  }
1062 }
1063 
1064 // Mark blobs as being in a vertical text line where that is the case.
1065 // Returns true if the majority of the image is vertical text lines.
1066 void TabFind::MarkVerticalText() {
1068  tprintf("Checking for vertical lines\n");
1069  BlobGridSearch gsearch(this);
1070  gsearch.StartFullSearch();
1071  BLOBNBOX* blob = NULL;
1072  while ((blob = gsearch.NextFullSearch()) != NULL) {
1073  if (blob->region_type() < BRT_UNKNOWN)
1074  continue;
1075  if (blob->UniquelyVertical()) {
1077  }
1078  }
1079 }
1080 
1081 int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
1082  TabVector_IT it(lines);
1083  int prev_right = -1;
1084  int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
1085  STATS gaps(0, max_gap);
1086  STATS heights(0, max_gap);
1087  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1088  TabVector* v = it.data();
1089  TabVector* partner = v->GetSinglePartner();
1090  if (!v->IsLeftTab() || v->IsSeparator() || !partner) continue;
1091  heights.add(partner->startpt().x() - v->startpt().x(), 1);
1092  if (prev_right > 0 && v->startpt().x() > prev_right) {
1093  gaps.add(v->startpt().x() - prev_right, 1);
1094  }
1095  prev_right = partner->startpt().x();
1096  }
1098  tprintf("TabGutter total %d median_gap %.2f median_hgt %.2f\n",
1099  gaps.get_total(), gaps.median(), heights.median());
1100  if (gaps.get_total() < kMinLinesInColumn) return 0;
1101  return static_cast<int>(gaps.median());
1102 }
1103 
1104 // Find the next adjacent (looking to the left or right) blob on this text
1105 // line, with the constraint that it must vertically significantly overlap
1106 // the [top_y, bottom_y] range.
1107 // If ignore_images is true, then blobs with aligned_text() < 0 are treated
1108 // as if they do not exist.
1109 BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox,
1110  bool look_left, bool ignore_images,
1111  double min_overlap_fraction,
1112  int gap_limit, int top_y, int bottom_y) {
1114  const TBOX& box = bbox->bounding_box();
1115  int left = box.left();
1116  int right = box.right();
1117  int mid_x = (left + right) / 2;
1118  sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
1119  int best_gap = 0;
1120  bool debug = WithinTestRegion(3, left, bottom_y);
1121  BLOBNBOX* result = NULL;
1122  BLOBNBOX* neighbour = NULL;
1123  while ((neighbour = sidesearch.NextSideSearch(look_left)) != NULL) {
1124  if (debug) {
1125  tprintf("Adjacent blob: considering box:");
1126  neighbour->bounding_box().print();
1127  }
1128  if (neighbour == bbox ||
1129  (ignore_images && neighbour->region_type() < BRT_UNKNOWN))
1130  continue;
1131  const TBOX& nbox = neighbour->bounding_box();
1132  int n_top_y = nbox.top();
1133  int n_bottom_y = nbox.bottom();
1134  int v_overlap = MIN(n_top_y, top_y) - MAX(n_bottom_y, bottom_y);
1135  int height = top_y - bottom_y;
1136  int n_height = n_top_y - n_bottom_y;
1137  if (v_overlap > min_overlap_fraction * MIN(height, n_height) &&
1138  (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) {
1139  int n_left = nbox.left();
1140  int n_right = nbox.right();
1141  int h_gap = MAX(n_left, left) - MIN(n_right, right);
1142  int n_mid_x = (n_left + n_right) / 2;
1143  if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
1144  if (h_gap > gap_limit) {
1145  // Hit a big gap before next tab so don't return anything.
1146  if (debug)
1147  tprintf("Giving up due to big gap = %d vs %d\n",
1148  h_gap, gap_limit);
1149  return result;
1150  }
1151  if (h_gap > 0 && (look_left ? neighbour->right_tab_type()
1152  : neighbour->left_tab_type()) >= TT_CONFIRMED) {
1153  // Hit a tab facing the wrong way. Stop in case we are crossing
1154  // the column boundary.
1155  if (debug)
1156  tprintf("Collision with like tab of type %d at %d,%d\n",
1157  look_left ? neighbour->right_tab_type()
1158  : neighbour->left_tab_type(),
1159  n_left, nbox.bottom());
1160  return result;
1161  }
1162  // This is a good fit to the line. Continue with this
1163  // neighbour as the bbox if the best gap.
1164  if (result == NULL || h_gap < best_gap) {
1165  if (debug)
1166  tprintf("Good result\n");
1167  result = neighbour;
1168  best_gap = h_gap;
1169  } else {
1170  // The new one is worse, so we probably already have the best result.
1171  return result;
1172  }
1173  } else if (debug) {
1174  tprintf("Wrong way\n");
1175  }
1176  } else if (debug) {
1177  tprintf("Insufficient overlap\n");
1178  }
1179  }
1180  if (WithinTestRegion(3, left, box.top()))
1181  tprintf("Giving up due to end of search\n");
1182  return result; // Hit the edge and found nothing.
1183 }
1184 
1185 // Add a bi-directional partner relationship between the left
1186 // and the right. If one (or both) of the vectors is a separator,
1187 // extend a nearby extendable vector or create a new one of the
1188 // correct type, using the given left or right blob as a guide.
1189 void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
1190  TabVector* left, TabVector* right) {
1191  const TBOX& left_box = left_blob->bounding_box();
1192  const TBOX& right_box = right_blob->bounding_box();
1193  if (left->IsSeparator()) {
1194  // Try to find a nearby left edge to extend.
1195  TabVector* v = LeftTabForBox(left_box, true, true);
1196  if (v != NULL && v != left && v->IsLeftTab() &&
1197  v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
1198  left = v; // Found a good replacement.
1199  left->ExtendToBox(left_blob);
1200  } else {
1201  // Fake a vector.
1202  left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob);
1203  vectors_.add_sorted(TabVector::SortVectorsByKey, left);
1204  v_it_.move_to_first();
1205  }
1206  }
1207  if (right->IsSeparator()) {
1208  // Try to find a nearby left edge to extend.
1209  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1210  tprintf("Box edge (%d,%d-%d)",
1211  right_box.right(), right_box.bottom(), right_box.top());
1212  right->Print(" looking for improvement for");
1213  }
1214  TabVector* v = RightTabForBox(right_box, true, true);
1215  if (v != NULL && v != right && v->IsRightTab() &&
1216  v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
1217  right = v; // Found a good replacement.
1218  right->ExtendToBox(right_blob);
1219  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1220  right->Print("Extended vector");
1221  }
1222  } else {
1223  // Fake a vector.
1224  right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_,
1225  right_blob);
1226  vectors_.add_sorted(TabVector::SortVectorsByKey, right);
1227  v_it_.move_to_first();
1228  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1229  right->Print("Created new vector");
1230  }
1231  }
1232  }
1233  left->AddPartner(right);
1234  right->AddPartner(left);
1235 }
1236 
1237 // Remove separators and unused tabs from the main vectors_ list
1238 // to the dead_vectors_ list.
1239 void TabFind::CleanupTabs() {
1240  // TODO(rays) Before getting rid of separators and unused vectors, it
1241  // would be useful to try moving ragged vectors outwards to see if this
1242  // allows useful extension. Could be combined with checking ends of partners.
1243  TabVector_IT it(&vectors_);
1244  TabVector_IT dead_it(&dead_vectors_);
1245  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1246  TabVector* v = it.data();
1247  if (v->IsSeparator() || v->Partnerless()) {
1248  dead_it.add_after_then_move(it.extract());
1249  v_it_.set_to_list(&vectors_);
1250  } else {
1252  }
1253  }
1254 }
1255 
1256 // Apply the given rotation to the given list of blobs.
1257 void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) {
1258  BLOBNBOX_IT it(blobs);
1259  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1260  it.data()->rotate_box(rotation);
1261  }
1262 }
1263 
1264 // Recreate the grid with deskewed BLOBNBOXes.
1265 // Returns false if the detected skew angle is impossible.
1266 bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
1267  TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) {
1268  ComputeDeskewVectors(deskew, reskew);
1269  if (deskew->x() < kCosMaxSkewAngle)
1270  return false;
1271  RotateBlobList(*deskew, image_blobs);
1272  RotateBlobList(*deskew, &block->blobs);
1273  RotateBlobList(*deskew, &block->small_blobs);
1274  RotateBlobList(*deskew, &block->noise_blobs);
1275 
1276  // Rotate the horizontal vectors. The vertical vectors don't need
1277  // rotating as they can just be refitted.
1278  TabVector_IT h_it(hlines);
1279  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1280  TabVector* h = h_it.data();
1281  h->Rotate(*deskew);
1282  }
1283  TabVector_IT d_it(&dead_vectors_);
1284  for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
1285  TabVector* d = d_it.data();
1286  d->Rotate(*deskew);
1287  }
1288  SetVerticalSkewAndParellelize(0, 1);
1289  // Rebuild the grid to the new size.
1290  TBOX grid_box(bleft_, tright_);
1291  grid_box.rotate_large(*deskew);
1292  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1293  InsertBlobsToGrid(false, false, image_blobs, this);
1294  InsertBlobsToGrid(true, false, &block->blobs, this);
1295  return true;
1296 }
1297 
1298 // Flip the vertical and horizontal lines and rotate the grid ready
1299 // for working on the rotated image.
1300 // This also makes parameter adjustments for FindInitialTabVectors().
1301 void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
1302  TabVector_LIST* horizontal_lines,
1303  int* min_gutter_width) {
1304  // Rotate the horizontal and vertical vectors and swap them over.
1305  // Only the separators are kept and rotated; other tabs are used
1306  // to estimate the gutter width then thrown away.
1307  TabVector_LIST ex_verticals;
1308  TabVector_IT ex_v_it(&ex_verticals);
1309  TabVector_LIST vlines;
1310  TabVector_IT v_it(&vlines);
1311  while (!v_it_.empty()) {
1312  TabVector* v = v_it_.extract();
1313  if (v->IsSeparator()) {
1314  v->Rotate(rotate);
1315  ex_v_it.add_after_then_move(v);
1316  } else {
1317  v_it.add_after_then_move(v);
1318  }
1319  v_it_.forward();
1320  }
1321 
1322  // Adjust the min gutter width for better tabbox selection
1323  // in 2nd call to FindInitialTabVectors().
1324  int median_gutter = FindMedianGutterWidth(&vlines);
1325  if (median_gutter > *min_gutter_width)
1326  *min_gutter_width = median_gutter;
1327 
1328  TabVector_IT h_it(horizontal_lines);
1329  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1330  TabVector* h = h_it.data();
1331  h->Rotate(rotate);
1332  }
1333  v_it_.add_list_after(horizontal_lines);
1334  v_it_.move_to_first();
1335  h_it.set_to_list(horizontal_lines);
1336  h_it.add_list_after(&ex_verticals);
1337 
1338  // Rebuild the grid to the new size.
1339  TBOX grid_box(bleft(), tright());
1340  grid_box.rotate_large(rotate);
1341  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1342 }
1343 
1344 // Clear the grid and get rid of the tab vectors, but not separators,
1345 // ready to start again.
1347  v_it_.move_to_first();
1348  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1349  if (!v_it_.data()->IsSeparator())
1350  delete v_it_.extract();
1351  }
1352  Clear();
1353 }
1354 
1355 // Reflect the separator tab vectors and the grids in the y-axis.
1356 // Can only be called after Reset!
1358  TabVector_LIST temp_list;
1359  TabVector_IT temp_it(&temp_list);
1360  v_it_.move_to_first();
1361  // The TabVector list only contains vertical lines, but they need to be
1362  // reflected and the list needs to be reversed, so they are still in
1363  // sort_key order.
1364  while (!v_it_.empty()) {
1365  TabVector* v = v_it_.extract();
1366  v_it_.forward();
1367  v->ReflectInYAxis();
1368  temp_it.add_before_then_move(v);
1369  }
1370  v_it_.add_list_after(&temp_list);
1371  v_it_.move_to_first();
1372  // Reset this grid with reflected bounding boxes.
1373  TBOX grid_box(bleft(), tright());
1374  int tmp = grid_box.left();
1375  grid_box.set_left(-grid_box.right());
1376  grid_box.set_right(-tmp);
1377  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1378 }
1379 
1380 // Compute the rotation required to deskew, and its inverse rotation.
1381 void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
1382  double length = vertical_skew_ % vertical_skew_;
1383  length = sqrt(length);
1384  deskew->set_x(static_cast<float>(vertical_skew_.y() / length));
1385  deskew->set_y(static_cast<float>(vertical_skew_.x() / length));
1386  reskew->set_x(deskew->x());
1387  reskew->set_y(-deskew->y());
1388 }
1389 
1390 // Compute and apply constraints to the end positions of TabVectors so
1391 // that where possible partners end at the same y coordinate.
1392 void TabFind::ApplyTabConstraints() {
1393  TabVector_IT it(&vectors_);
1394  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1395  TabVector* v = it.data();
1396  v->SetupConstraints();
1397  }
1398  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1399  TabVector* v = it.data();
1400  // With the first and last partner, we want a common bottom and top,
1401  // respectively, and for each change of partner, we want a common
1402  // top of first with bottom of next.
1404  }
1405  // TODO(rays) The back-to-back pairs should really be done like the
1406  // front-to-front pairs, but there is no convenient way of producing the
1407  // list of partners like there is with the front-to-front.
1408  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1409  TabVector* v = it.data();
1410  if (!v->IsRightTab())
1411  continue;
1412  // For each back-to-back pair of vectors, try for common top and bottom.
1413  TabVector_IT partner_it(it);
1414  for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
1415  TabVector* partner = partner_it.data();
1416  if (!partner->IsLeftTab() || !v->VOverlap(*partner))
1417  continue;
1418  v->SetupPartnerConstraints(partner);
1419  }
1420  }
1421  // Now actually apply the constraints to get common start/end points.
1422  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1423  TabVector* v = it.data();
1424  if (!v->IsSeparator())
1425  v->ApplyConstraints();
1426  }
1427  // TODO(rays) Where constraint application fails, it would be good to try
1428  // checking the ends to see if they really should be moved.
1429 }
1430 
1431 } // namespace tesseract.
TabVector * GetSinglePartner()
Definition: tabvector.cpp:874
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:491
void rotate_large(const FCOORD &vec)
Definition: rect.cpp:72
bool y_overlap(const TBOX &box) const
Definition: rect.h:418
const int kMinVerticalSearch
Definition: tabfind.cpp:37
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:119
static int SortVectorsByKey(const void *v1, const void *v2)
Definition: tabvector.h:294
Definition: points.h:189
inT32 get_total() const
Definition: statistc.h:86
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
void SetBlockRuleEdges(TO_BLOCK *block)
Definition: tabfind.cpp:134
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:92
bool textord_tabfind_show_finaltabs
Definition: tabfind.cpp:63
ICOORD tright_
Definition: bbgrid.h:92
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
Definition: tabfind.cpp:162
TabVector_LIST * vectors()
Definition: tabfind.h:173
void ExtendToBox(BLOBNBOX *blob)
Definition: tabvector.cpp:246
void set_right_tab_type(TabType new_type)
Definition: blobbox.h:265
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1067
const double kMinFractionalLinesInColumn
Definition: tabfind.cpp:45
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
void Brush(Color color)
Definition: scrollview.cpp:732
#define MAX_INT32
Definition: host.h:62
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
const int kTabRadiusFactor
Definition: tabfind.cpp:35
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:498
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:448
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:187
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:593
int left_rule() const
Definition: blobbox.h:298
const ICOORD & bleft() const
Definition: bbgrid.h:73
int push_back(T object)
inT16 x() const
access function
Definition: points.h:52
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:282
#define tprintf(...)
Definition: tprintf.h:31
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1059
const int kRaggedGutterMultiple
Definition: tabfind.cpp:51
int gridsize() const
Definition: bbgrid.h:64
bool UniquelyVertical() const
Definition: blobbox.h:395
bool Fit(ICOORD vertical, bool force_parallel)
Definition: tabvector.cpp:788
bool joined_to_prev() const
Definition: blobbox.h:241
void set_x(float xin)
rewrite function
Definition: points.h:216
void set_left_rule(int new_left)
Definition: blobbox.h:301
void Deskew(const FCOORD &deskew)
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
#define BOOL_VAR(name, val, comment)
Definition: params.h:279
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:87
const int kMaxRaggedSearch
Definition: tabfind.cpp:39
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:319
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:208
int size() const
Definition: genericvector.h:72
void set_y(float yin)
rewrite function
Definition: points.h:220
BlobRegionType region_type() const
Definition: blobbox.h:268
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:453
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:271
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
Definition: tabfind.cpp:65
void Rotate(const FCOORD &rotation)
Definition: tabvector.cpp:281
inT16 left() const
Definition: rect.h:68
int VOverlap(const TabVector &other) const
Definition: tabvector.h:199
TabType left_tab_type() const
Definition: blobbox.h:256
const double kAlignedFraction
Definition: alignedblob.cpp:38
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:143
const ICOORD & startpt() const
Definition: tabvector.h:146
BBC * NextFullSearch()
Definition: bbgrid.h:679
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:1257
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
bool CommonWidth(int width)
Definition: tabfind.cpp:395
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
int textord_debug_tabfind
Definition: alignedblob.cpp:27
ICOORD vertical_skew_
Definition: tabfind.h:367
int sort_key() const
Definition: tabvector.h:158
double median() const
Definition: statistc.cpp:239
void Evaluate(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:587
const int kMinLinesInColumn
Definition: tabfind.cpp:41
bool leader_on_left() const
Definition: blobbox.h:343
TabType right_tab_type() const
Definition: blobbox.h:262
const TBOX & bounding_box() const
Definition: colpartition.h:109
static void Update()
Definition: scrollview.cpp:715
inT16 y() const
access_function
Definition: points.h:56
const ICOORD & tright() const
Definition: bbgrid.h:76
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void DeleteUnownedNoise()
Definition: blobbox.cpp:1033
void StartSideSearch(int x, int ymin, int ymax)
Definition: bbgrid.h:750
const double kCosMaxSkewAngle
Definition: tabfind.cpp:60
void StartRadSearch(int x, int y, int max_radius)
Definition: bbgrid.h:702
bool leader_on_right() const
Definition: blobbox.h:349
void SetupPartnerConstraints()
Definition: tabvector.cpp:302
static bool WithinTestRegion(int detail_level, int x, int y)
void add(inT32 value, inT32 count)
Definition: statistc.cpp:101
const double kMinColumnWidth
inT16 top() const
Definition: rect.h:54
const ICOORD & topright() const
Definition: rect.h:100
bool textord_tabfind_show_initialtabs
Definition: tabfind.cpp:62
#define MAX(x, y)
Definition: ndminx.h:24
int right_rule() const
Definition: blobbox.h:304
inT32 mode() const
Definition: statistc.cpp:115
void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:573
const int kMinEvaluatedTabs
Definition: tabfind.cpp:56
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:54
Definition: rect.h:30
bool IsLeftTab() const
Definition: tabvector.h:213
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:408
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
#define MIN(x, y)
Definition: ndminx.h:28
void TidyBlobs(TO_BLOCK *block)
Definition: tabfind.cpp:466
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:415
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
Definition: tabfind.cpp:1301
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:515
int XAtY(int y) const
Definition: tabvector.h:189
BBC * NextSideSearch(bool right_to_left)
Definition: bbgrid.h:765
inT16 height() const
Definition: rect.h:104
BBC * NextRectSearch()
Definition: bbgrid.h:846
float y() const
Definition: points.h:212
inT16 right() const
Definition: rect.h:75
static bool VeryDifferentSizes(int size1, int size2)
Definition: tabfind.cpp:414
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:313
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
Definition: tabfind.cpp:209
inT16 width() const
Definition: rect.h:111
void set_right(int x)
Definition: rect.h:78
void set_left(int x)
Definition: rect.h:71
Definition: statistc.h:33
void print() const
Definition: rect.h:270
void StartFullSearch()
Definition: bbgrid.h:669
inT16 bottom() const
Definition: rect.h:61
virtual ~TabFind()
Definition: tabfind.cpp:78
void Print(const char *prefix)
Definition: tabvector.cpp:525
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:423
void AddPartner(TabVector *partner)
Definition: tabvector.cpp:492
void StartRectSearch(const TBOX &rect)
Definition: bbgrid.h:834
void set_with_shrink(int x, int y)
Set from the given x,y, shrinking the vector to fit if needed.
Definition: points.cpp:43
void set_right_rule(int new_right)
Definition: blobbox.h:307
void print() const
Definition: statistc.cpp:534
const ICOORD & botleft() const
Definition: rect.h:88
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:305
const int kMaxVerticalSearch
Definition: tabfind.cpp:38
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:349
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
BBC * NextRadSearch()
Definition: bbgrid.h:717
void set_left_tab_type(TabType new_type)
Definition: blobbox.h:259
bool IsSeparator() const
Definition: tabvector.h:221
double v[max]
float x() const
Definition: points.h:209
void Display(ScrollView *tab_win)
Definition: tabvector.cpp:547
const TBOX & bounding_box() const
Definition: blobbox.h:215
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:361
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
bool IsRightTab() const
Definition: tabvector.h:217
BlobTextFlowType flow() const
Definition: blobbox.h:280
const int kColumnWidthFactor
Definition: tabfind.h:42
void Pen(Color color)
Definition: scrollview.cpp:726
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:287
integer coordinate
Definition: points.h:30
const double kMaxGutterWidthAbsolute
Definition: tabfind.cpp:49
TabVector * FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)