tesseract  4.00.00dev
stepblob.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: stepblob.cpp (Formerly cblob.c)
3  * Description: Code for C_BLOB class.
4  * Author: Ray Smith
5  * Created: Tue Oct 08 10:41:13 BST 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "stepblob.h"
21 #include "allheaders.h"
22 
23 // Include automatically generated configuration file if running autoconf.
24 #ifdef HAVE_CONFIG_H
25 #include "config_auto.h"
26 #endif
27 
28 // Max perimeter to width ratio for a baseline position above box bottom.
29 const double kMaxPerimeterWidthRatio = 8.0;
30 
32 /**********************************************************************
33  * position_outline
34  *
35  * Position the outline in the given list at the relevant place
36  * according to its nesting.
37  **********************************************************************/
38 static void position_outline( //put in place
39  C_OUTLINE *outline, //thing to place
40  C_OUTLINE_LIST *destlist //desstination list
41  ) {
42  C_OUTLINE *dest_outline; //outline from dest list
43  C_OUTLINE_IT it = destlist; //iterator
44  //iterator on children
45  C_OUTLINE_IT child_it = outline->child ();
46 
47  if (!it.empty ()) {
48  do {
49  dest_outline = it.data (); //get destination
50  //encloses dest
51  if (*dest_outline < *outline) {
52  //take off list
53  dest_outline = it.extract ();
54  //put this in place
55  it.add_after_then_move (outline);
56  //make it a child
57  child_it.add_to_end (dest_outline);
58  while (!it.at_last ()) {
59  it.forward (); //do rest of list
60  //check for other children
61  dest_outline = it.data ();
62  if (*dest_outline < *outline) {
63  //take off list
64  dest_outline = it.extract ();
65  child_it.add_to_end (dest_outline);
66  //make it a child
67  if (it.empty ())
68  break;
69  }
70  }
71  return; //finished
72  }
73  //enclosed by dest
74  else if (*outline < *dest_outline) {
75  position_outline (outline, dest_outline->child ());
76  //place in child list
77  return; //finished
78  }
79  it.forward ();
80  }
81  while (!it.at_first ());
82  }
83  it.add_to_end (outline); //at outer level
84 }
85 
86 
87 /**********************************************************************
88  * plot_outline_list
89  *
90  * Draw a list of outlines in the given colour and their children
91  * in the child colour.
92  **********************************************************************/
93 
94 #ifndef GRAPHICS_DISABLED
95 static void plot_outline_list( //draw outlines
96  C_OUTLINE_LIST *list, //outline to draw
97  ScrollView* window, //window to draw in
98  ScrollView::Color colour, //colour to use
99  ScrollView::Color child_colour //colour of children
100  ) {
101  C_OUTLINE *outline; //current outline
102  C_OUTLINE_IT it = list; //iterator
103 
104  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
105  outline = it.data ();
106  //draw it
107  outline->plot (window, colour);
108  if (!outline->child ()->empty ())
109  plot_outline_list (outline->child (), window,
110  child_colour, child_colour);
111  }
112 }
113 // Draws the outlines in the given colour, and child_colour, normalized
114 // using the given denorm, making use of sub-pixel accurate information
115 // if available.
116 static void plot_normed_outline_list(const DENORM& denorm,
117  C_OUTLINE_LIST *list,
118  ScrollView::Color colour,
119  ScrollView::Color child_colour,
120  ScrollView* window) {
121  C_OUTLINE_IT it(list);
122  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
123  C_OUTLINE* outline = it.data();
124  outline->plot_normed(denorm, colour, window);
125  if (!outline->child()->empty())
126  plot_normed_outline_list(denorm, outline->child(), child_colour,
127  child_colour, window);
128  }
129 }
130 #endif
131 
132 
133 /**********************************************************************
134  * reverse_outline_list
135  *
136  * Reverse a list of outlines and their children.
137  **********************************************************************/
138 
139 static void reverse_outline_list(C_OUTLINE_LIST *list) {
140  C_OUTLINE_IT it = list; // iterator
141 
142  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
143  C_OUTLINE* outline = it.data();
144  outline->reverse(); // reverse it
145  outline->set_flag(COUT_INVERSE, TRUE);
146  if (!outline->child()->empty())
147  reverse_outline_list(outline->child());
148  }
149 }
150 
151 
152 /**********************************************************************
153  * C_BLOB::C_BLOB
154  *
155  * Constructor to build a C_BLOB from a list of C_OUTLINEs.
156  * The C_OUTLINEs are not copied so the source list is emptied.
157  * The C_OUTLINEs are nested correctly in the blob.
158  **********************************************************************/
159 
160 C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) {
161  for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
162  C_OUTLINE* outline = ol_it.extract();
163  // Position this outline in appropriate position in the hierarchy.
164  position_outline(outline, &outlines);
165  }
167 }
168 
169 // Simpler constructor to build a blob from a single outline that has
170 // already been fully initialized.
172  C_OUTLINE_IT it(&outlines);
173  it.add_to_end(outline);
174 }
175 
176 // Builds a set of one or more blobs from a list of outlines.
177 // Input: one outline on outline_list contains all the others, but the
178 // nesting and order are undefined.
179 // If good_blob is true, the blob is added to good_blobs_it, unless
180 // an illegal (generation-skipping) parent-child relationship is found.
181 // If so, the parent blob goes to bad_blobs_it, and the immediate children
182 // are promoted to the top level, recursively being sent to good_blobs_it.
183 // If good_blob is false, all created blobs will go to the bad_blobs_it.
184 // Output: outline_list is empty. One or more blobs are added to
185 // good_blobs_it and/or bad_blobs_it.
187  C_OUTLINE_LIST* outline_list,
188  C_BLOB_IT* good_blobs_it,
189  C_BLOB_IT* bad_blobs_it) {
190  // List of top-level outlines with correctly nested children.
191  C_OUTLINE_LIST nested_outlines;
192  for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
193  C_OUTLINE* outline = ol_it.extract();
194  // Position this outline in appropriate position in the hierarchy.
195  position_outline(outline, &nested_outlines);
196  }
197  // Check for legal nesting and reassign as required.
198  for (C_OUTLINE_IT ol_it(&nested_outlines); !ol_it.empty(); ol_it.forward()) {
199  C_OUTLINE* outline = ol_it.extract();
200  bool blob_is_good = good_blob;
201  if (!outline->IsLegallyNested()) {
202  // The blob is illegally nested.
203  // Mark it bad, and add all its children to the top-level list.
204  blob_is_good = false;
205  ol_it.add_list_after(outline->child());
206  }
207  C_BLOB* blob = new C_BLOB(outline);
208  // Set inverse flag and reverse if needed.
210  // Put on appropriate list.
211  if (!blob_is_good && bad_blobs_it != NULL)
212  bad_blobs_it->add_after_then_move(blob);
213  else
214  good_blobs_it->add_after_then_move(blob);
215  }
216 }
217 
218 // Sets the COUT_INVERSE flag appropriately on the outlines and their
219 // children recursively, reversing the outlines if needed so that
220 // everything has an anticlockwise top-level.
222  C_OUTLINE_IT ol_it(&outlines);
223  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
224  C_OUTLINE* outline = ol_it.data();
225  if (outline->turn_direction() < 0) {
226  outline->reverse();
227  reverse_outline_list(outline->child());
228  outline->set_flag(COUT_INVERSE, TRUE);
229  } else {
230  outline->set_flag(COUT_INVERSE, FALSE);
231  }
232  }
233 }
234 
235 
236 // Build and return a fake blob containing a single fake outline with no
237 // steps.
239  C_OUTLINE_LIST outlines;
240  C_OUTLINE::FakeOutline(box, &outlines);
241  return new C_BLOB(&outlines);
242 }
243 
244 /**********************************************************************
245  * C_BLOB::bounding_box
246  *
247  * Return the bounding box of the blob.
248  **********************************************************************/
249 
250 TBOX C_BLOB::bounding_box() const { // bounding box
251  C_OUTLINE *outline; // current outline
252  // This is a read-only iteration of the outlines.
253  C_OUTLINE_IT it = const_cast<C_OUTLINE_LIST*>(&outlines);
254  TBOX box; // bounding box
255 
256  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
257  outline = it.data ();
258  box += outline->bounding_box ();
259  }
260  return box;
261 }
262 
263 
264 /**********************************************************************
265  * C_BLOB::area
266  *
267  * Return the area of the blob.
268  **********************************************************************/
269 
270 inT32 C_BLOB::area() { //area
271  C_OUTLINE *outline; //current outline
272  C_OUTLINE_IT it = &outlines; //outlines of blob
273  inT32 total; //total area
274 
275  total = 0;
276  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
277  outline = it.data ();
278  total += outline->area ();
279  }
280  return total;
281 }
282 
283 /**********************************************************************
284  * C_BLOB::perimeter
285  *
286  * Return the perimeter of the top and 2nd level outlines.
287  **********************************************************************/
288 
290  C_OUTLINE *outline; // current outline
291  C_OUTLINE_IT it = &outlines; // outlines of blob
292  inT32 total; // total perimeter
293 
294  total = 0;
295  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
296  outline = it.data();
297  total += outline->perimeter();
298  }
299  return total;
300 }
301 
302 
303 /**********************************************************************
304  * C_BLOB::outer_area
305  *
306  * Return the area of the blob.
307  **********************************************************************/
308 
310  C_OUTLINE *outline; //current outline
311  C_OUTLINE_IT it = &outlines; //outlines of blob
312  inT32 total; //total area
313 
314  total = 0;
315  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
316  outline = it.data ();
317  total += outline->outer_area ();
318  }
319  return total;
320 }
321 
322 
323 /**********************************************************************
324  * C_BLOB::count_transitions
325  *
326  * Return the total x and y maxes and mins in the blob.
327  * Chlid outlines are not counted.
328  **********************************************************************/
329 
331  inT32 threshold //on size
332  ) {
333  C_OUTLINE *outline; //current outline
334  C_OUTLINE_IT it = &outlines; //outlines of blob
335  inT32 total; //total area
336 
337  total = 0;
338  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
339  outline = it.data ();
340  total += outline->count_transitions (threshold);
341  }
342  return total;
343 }
344 
345 
346 /**********************************************************************
347  * C_BLOB::move
348  *
349  * Move C_BLOB by vector
350  **********************************************************************/
351 
352 void C_BLOB::move( // reposition blob
353  const ICOORD vec // by vector
354  ) {
355  C_OUTLINE_IT it(&outlines); // iterator
356 
357  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
358  it.data ()->move (vec); // move each outline
359 }
360 
361 // Static helper for C_BLOB::rotate to allow recursion of child outlines.
362 void RotateOutlineList(const FCOORD& rotation, C_OUTLINE_LIST* outlines) {
363  C_OUTLINE_LIST new_outlines;
364  C_OUTLINE_IT src_it(outlines);
365  C_OUTLINE_IT dest_it(&new_outlines);
366  while (!src_it.empty()) {
367  C_OUTLINE* old_outline = src_it.extract();
368  src_it.forward();
369  C_OUTLINE* new_outline = new C_OUTLINE(old_outline, rotation);
370  if (!old_outline->child()->empty()) {
371  RotateOutlineList(rotation, old_outline->child());
372  C_OUTLINE_IT child_it(new_outline->child());
373  child_it.add_list_after(old_outline->child());
374  }
375  delete old_outline;
376  dest_it.add_to_end(new_outline);
377  }
378  src_it.add_list_after(&new_outlines);
379 }
380 
381 /**********************************************************************
382  * C_BLOB::rotate
383  *
384  * Rotate C_BLOB by rotation.
385  * Warning! has to rebuild all the C_OUTLINEs.
386  **********************************************************************/
387 void C_BLOB::rotate(const FCOORD& rotation) {
388  RotateOutlineList(rotation, &outlines);
389 }
390 
391 // Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
392 // outline list and its children.
393 static void ComputeEdgeOffsetsOutlineList(int threshold, Pix* pix,
394  C_OUTLINE_LIST *list) {
395  C_OUTLINE_IT it(list);
396  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
397  C_OUTLINE* outline = it.data();
398  if (pix != NULL && pixGetDepth(pix) == 8)
399  outline->ComputeEdgeOffsets(threshold, pix);
400  else
401  outline->ComputeBinaryOffsets();
402  if (!outline->child()->empty())
403  ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
404  }
405 }
406 
407 // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
408 // if the supplied pix is 8-bit or the binary edges if NULL.
409 void C_BLOB::ComputeEdgeOffsets(int threshold, Pix* pix) {
410  ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
411 }
412 
413 // Estimates and returns the baseline position based on the shape of the
414 // outlines.
415 // We first find the minimum y-coord (y_mins) at each x-coord within the blob.
416 // If there is a run of some y or y+1 in y_mins that is longer than the total
417 // number of positions at bottom or bottom+1, subject to the additional
418 // condition that at least one side of the y/y+1 run is higher than y+1, so it
419 // is not a local minimum, then y, not the bottom, makes a good candidate
420 // baseline position for this blob. Eg
421 // | ---|
422 // | |
423 // |- -----------| <= Good candidate baseline position.
424 // |- -|
425 // | -|
426 // |---| <= Bottom of blob
428  TBOX box = bounding_box();
429  int left = box.left();
430  int width = box.width();
431  int bottom = box.bottom();
432  if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio)
433  return bottom; // This is only for non-CJK blobs.
434  // Get the minimum y coordinate at each x-coordinate.
435  GenericVector<int> y_mins;
436  y_mins.init_to_size(width + 1, box.top());
437  C_OUTLINE_IT it(&outlines);
438  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
439  C_OUTLINE* outline = it.data();
440  ICOORD pos = outline->start_pos();
441  for (int s = 0; s < outline->pathlength(); ++s) {
442  if (pos.y() < y_mins[pos.x() - left])
443  y_mins[pos.x() - left] = pos.y();
444  pos += outline->step(s);
445  }
446  }
447  // Find the total extent of the bottom or bottom + 1.
448  int bottom_extent = 0;
449  for (int x = 0; x <= width; ++x) {
450  if (y_mins[x] == bottom || y_mins[x] == bottom + 1)
451  ++bottom_extent;
452  }
453  // Find the lowest run longer than the bottom extent that is not the bottom.
454  int best_min = box.top();
455  int prev_run = 0;
456  int prev_y = box.top();
457  int prev_prev_y = box.top();
458  for (int x = 0; x < width; x += prev_run) {
459  // Find the length of the current run.
460  int y_at_x = y_mins[x];
461  int run = 1;
462  while (x + run <= width && y_mins[x + run] == y_at_x) ++run;
463  if (y_at_x > bottom + 1) {
464  // Possible contender.
465  int total_run = run;
466  // Find extent of current value or +1 to the right of x.
467  while (x + total_run <= width &&
468  (y_mins[x + total_run] == y_at_x ||
469  y_mins[x + total_run] == y_at_x + 1)) ++total_run;
470  // At least one end has to be higher so it is not a local max.
471  if (prev_prev_y > y_at_x + 1 || x + total_run > width ||
472  y_mins[x + total_run] > y_at_x + 1) {
473  // If the prev_run is at y + 1, then we can add that too. There cannot
474  // be a suitable run at y before that or we would have found it already.
475  if (prev_run > 0 && prev_y == y_at_x + 1) total_run += prev_run;
476  if (total_run > bottom_extent && y_at_x < best_min) {
477  best_min = y_at_x;
478  }
479  }
480  }
481  prev_run = run;
482  prev_prev_y = prev_y;
483  prev_y = y_at_x;
484  }
485  return best_min == box.top() ? bottom : best_min;
486 }
487 
488 static void render_outline_list(C_OUTLINE_LIST *list,
489  int left, int top, Pix* pix) {
490  C_OUTLINE_IT it(list);
491  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
492  C_OUTLINE* outline = it.data();
493  outline->render(left, top, pix);
494  if (!outline->child()->empty())
495  render_outline_list(outline->child(), left, top, pix);
496  }
497 }
498 
499 static void render_outline_list_outline(C_OUTLINE_LIST *list,
500  int left, int top, Pix* pix) {
501  C_OUTLINE_IT it(list);
502  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
503  C_OUTLINE* outline = it.data();
504  outline->render_outline(left, top, pix);
505  }
506 }
507 
508 // Returns a Pix rendering of the blob. pixDestroy after use.
510  TBOX box = bounding_box();
511  Pix* pix = pixCreate(box.width(), box.height(), 1);
512  render_outline_list(&outlines, box.left(), box.top(), pix);
513  return pix;
514 }
515 
516 // Returns a Pix rendering of the outline of the blob. (no fill).
517 // pixDestroy after use.
519  TBOX box = bounding_box();
520  Pix* pix = pixCreate(box.width(), box.height(), 1);
521  render_outline_list_outline(&outlines, box.left(), box.top(), pix);
522  return pix;
523 }
524 
525 /**********************************************************************
526  * C_BLOB::plot
527  *
528  * Draw the C_BLOB in the given colour.
529  **********************************************************************/
530 
531 #ifndef GRAPHICS_DISABLED
532 void C_BLOB::plot(ScrollView* window, // window to draw in
533  ScrollView::Color blob_colour, // main colour
534  ScrollView::Color child_colour) { // for holes
535  plot_outline_list(&outlines, window, blob_colour, child_colour);
536 }
537 // Draws the blob in the given colour, and child_colour, normalized
538 // using the given denorm, making use of sub-pixel accurate information
539 // if available.
540 void C_BLOB::plot_normed(const DENORM& denorm,
541  ScrollView::Color blob_colour,
542  ScrollView::Color child_colour,
543  ScrollView* window) {
544  plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour,
545  window);
546 }
547 #endif
static void FakeOutline(const TBOX &box, C_OUTLINE_LIST *outlines)
Definition: coutln.cpp:239
void ComputeBinaryOffsets()
Definition: coutln.cpp:850
C_OUTLINE_LIST * child()
Definition: coutln.h:106
inT32 outer_area()
Definition: stepblob.cpp:309
#define TRUE
Definition: capi.h:45
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
Definition: points.h:189
void render(int left, int top, Pix *pix) const
Definition: coutln.cpp:908
int32_t inT32
Definition: host.h:38
void init_to_size(int size, T t)
const double kMaxPerimeterWidthRatio
Definition: stepblob.cpp:29
void plot_normed(const DENORM &denorm, ScrollView::Color colour, ScrollView *window) const
Definition: coutln.cpp:988
Pix * render_outline()
Definition: stepblob.cpp:518
inT32 area()
Definition: stepblob.cpp:270
inT16 x() const
access function
Definition: points.h:52
inT16 turn_direction() const
Definition: coutln.cpp:537
void ComputeEdgeOffsets(int threshold, Pix *pix)
Definition: coutln.cpp:733
void plot(ScrollView *window, ScrollView::Color colour) const
Definition: coutln.cpp:956
int16_t inT16
Definition: host.h:36
void reverse()
Definition: coutln.cpp:565
inT16 left() const
Definition: rect.h:68
void RotateOutlineList(const FCOORD &rotation, C_OUTLINE_LIST *outlines)
Definition: stepblob.cpp:362
static void ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list, C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it)
Definition: stepblob.cpp:186
#define FALSE
Definition: capi.h:46
const TBOX & bounding_box() const
Definition: coutln.h:111
inT16 y() const
access_function
Definition: points.h:56
void CheckInverseFlagAndDirection()
Definition: stepblob.cpp:221
void ComputeEdgeOffsets(int threshold, Pix *pix)
Definition: stepblob.cpp:409
void render_outline(int left, int top, Pix *pix) const
Definition: coutln.cpp:930
void plot_normed(const DENORM &denorm, ScrollView::Color blob_colour, ScrollView::Color child_colour, ScrollView *window)
Definition: stepblob.cpp:540
inT32 outer_area() const
Definition: coutln.cpp:308
inT16 top() const
Definition: rect.h:54
void plot(ScrollView *window, ScrollView::Color blob_colour, ScrollView::Color child_colour)
Definition: stepblob.cpp:532
inT32 area() const
Definition: coutln.cpp:255
Definition: rect.h:30
inT32 count_transitions(inT32 threshold)
Definition: stepblob.cpp:330
bool IsLegallyNested() const
Definition: coutln.cpp:604
ICOORD step(int index) const
Definition: coutln.h:142
void move(const ICOORD vec)
Definition: stepblob.cpp:352
TBOX bounding_box() const
Definition: stepblob.cpp:250
const ICOORD & start_pos() const
Definition: coutln.h:146
inT16 height() const
Definition: rect.h:104
Pix * render()
Definition: stepblob.cpp:509
inT16 width() const
Definition: rect.h:111
inT32 perimeter() const
Definition: coutln.cpp:289
void set_flag(C_OUTLINE_FLAGS mask, BOOL8 value)
Definition: coutln.h:100
inT16 bottom() const
Definition: rect.h:61
inT16 EstimateBaselinePosition()
Definition: stepblob.cpp:427
#define ELISTIZE(CLASSNAME)
Definition: elst.h:961
void rotate(const FCOORD &rotation)
Definition: stepblob.cpp:387
inT32 perimeter()
Definition: stepblob.cpp:289
class DLLSYM C_OUTLINE
Definition: coutln.h:65
inT32 pathlength() const
Definition: coutln.h:133
C_BLOB()
Definition: stepblob.h:33
inT32 count_transitions(inT32 threshold)
Definition: coutln.cpp:340
integer coordinate
Definition: points.h:30