tesseract  4.00.00dev
split.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: split.c (Formerly split.c)
5  * Description:
6  * Author: Mark Seaman, OCR Technology
7  * Created: Fri Oct 16 14:37:00 1987
8  * Modified: Fri May 17 16:27:49 1991 (Mark Seaman) marks@hpgrlt
9  * Language: C
10  * Package: N/A
11  * Status: Reusable Software Component
12  *
13  * (c) Copyright 1987, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *************************************************************************/
25 /*----------------------------------------------------------------------
26  I n c l u d e s
27 ----------------------------------------------------------------------*/
28 // Include automatically generated configuration file if running autoconf.
29 #ifdef HAVE_CONFIG_H
30 #include "config_auto.h"
31 #endif
32 
33 #include "split.h"
34 #include "coutln.h"
35 #include "tprintf.h"
36 
37 #ifdef __UNIX__
38 #include <assert.h>
39 #endif
40 
41 /*----------------------------------------------------------------------
42  V a r i a b l e s
43 ----------------------------------------------------------------------*/
44 // Limit on the amount of penalty for the chop being off-center.
45 const int kCenterGradeCap = 25;
46 // Ridiculously large priority for splits that are no use.
47 const double kBadPriority = 999.0;
48 
49 BOOL_VAR(wordrec_display_splits, 0, "Display splits");
50 
51 // Returns the bounding box of all the points in the split.
53  return TBOX(
56 }
57 
58 // Hides the SPLIT so the outlines appear not to be cut by it.
59 void SPLIT::Hide() const {
60  EDGEPT* edgept = point1;
61  do {
62  edgept->Hide();
63  edgept = edgept->next;
64  } while (!edgept->EqualPos(*point2) && edgept != point1);
65  edgept = point2;
66  do {
67  edgept->Hide();
68  edgept = edgept->next;
69  } while (!edgept->EqualPos(*point1) && edgept != point2);
70 }
71 
72 // Undoes hide, so the outlines are cut by the SPLIT.
73 void SPLIT::Reveal() const {
74  EDGEPT* edgept = point1;
75  do {
76  edgept->Reveal();
77  edgept = edgept->next;
78  } while (!edgept->EqualPos(*point2) && edgept != point1);
79  edgept = point2;
80  do {
81  edgept->Reveal();
82  edgept = edgept->next;
83  } while (!edgept->EqualPos(*point1) && edgept != point2);
84 }
85 
86 // Compute a split priority based on the bounding boxes of the parts.
87 // The arguments here are config parameters defined in Wordrec. Add chop_
88 // to the beginning of the name.
89 float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob,
90  int centered_maxwidth, double center_knob,
91  double width_change_knob) const {
92  TBOX box1 = Box12();
93  TBOX box2 = Box21();
94  int min_left = MIN(box1.left(), box2.left());
95  int max_right = MAX(box1.right(), box2.right());
96  if (xmin < min_left && xmax > max_right) return kBadPriority;
97 
98  float grade = 0.0f;
99  // grade_overlap.
100  int width1 = box1.width();
101  int width2 = box2.width();
102  int min_width = MIN(width1, width2);
103  int overlap = -box1.x_gap(box2);
104  if (overlap == min_width) {
105  grade += 100.0f; // Total overlap.
106  } else {
107  if (2 * overlap > min_width) overlap += 2 * overlap - min_width;
108  if (overlap > 0) grade += overlap_knob * overlap;
109  }
110  // grade_center_of_blob.
111  if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
112  grade += MIN(kCenterGradeCap, center_knob * abs(width1 - width2));
113  }
114  // grade_width_change.
115  float width_change_grade = 20 - (max_right - min_left - MAX(width1, width2));
116  if (width_change_grade > 0.0f)
117  grade += width_change_grade * width_change_knob;
118  return grade;
119 }
120 
121 // Returns true if *this SPLIT appears OK in the sense that it does not cross
122 // any outlines and does not chop off any ridiculously small pieces.
123 bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
124  return !IsLittleChunk(min_points, min_area) &&
126 }
127 
128 // Returns true if the split generates a small chunk in terms of either area
129 // or number of points.
130 bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
131  if (point1->ShortNonCircularSegment(min_points, point2) &&
132  point1->SegmentArea(point2) < min_area) {
133  return true;
134  }
135  if (point2->ShortNonCircularSegment(min_points, point1) &&
136  point2->SegmentArea(point1) < min_area) {
137  return true;
138  }
139  return false;
140 }
141 
142 /**********************************************************************
143  * make_edgept
144  *
145  * Create an EDGEPT and hook it into an existing list of edge points.
146  **********************************************************************/
147 EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
148  EDGEPT *this_edgept;
149  /* Create point */
150  this_edgept = new EDGEPT;
151  this_edgept->pos.x = x;
152  this_edgept->pos.y = y;
153  // Now deal with the src_outline steps.
154  C_OUTLINE* prev_ol = prev->src_outline;
155  if (prev_ol != NULL && prev->next == next) {
156  // Compute the fraction of the segment that is being cut.
157  FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
158  FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
159  double cut_fraction = target_vec.length() / segment_vec.length();
160  // Get the start and end at the step level.
161  ICOORD step_start = prev_ol->position_at_index(prev->start_step);
162  int end_step = prev->start_step + prev->step_count;
163  int step_length = prev_ol->pathlength();
164  ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
165  ICOORD step_vec = step_end - step_start;
166  double target_length = step_vec.length() * cut_fraction;
167  // Find the point on the segment that gives the length nearest to target.
168  int best_step = prev->start_step;
169  ICOORD total_step(0, 0);
170  double best_dist = target_length;
171  for (int s = prev->start_step; s < end_step; ++s) {
172  total_step += prev_ol->step(s % step_length);
173  double dist = fabs(target_length - total_step.length());
174  if (dist < best_dist) {
175  best_dist = dist;
176  best_step = s + 1;
177  }
178  }
179  // The new point is an intermediate point.
180  this_edgept->src_outline = prev_ol;
181  this_edgept->step_count = end_step - best_step;
182  this_edgept->start_step = best_step % step_length;
183  prev->step_count = best_step - prev->start_step;
184  } else {
185  // The new point is poly only.
186  this_edgept->src_outline = NULL;
187  this_edgept->step_count = 0;
188  this_edgept->start_step = 0;
189  }
190  /* Hook it up */
191  this_edgept->next = next;
192  this_edgept->prev = prev;
193  prev->next = this_edgept;
194  next->prev = this_edgept;
195  /* Set up vec entries */
196  this_edgept->vec.x = this_edgept->next->pos.x - x;
197  this_edgept->vec.y = this_edgept->next->pos.y - y;
198  this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
199  this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
200  return this_edgept;
201 }
202 
203 /**********************************************************************
204  * remove_edgept
205  *
206  * Remove a given EDGEPT from its list and delete it.
207  **********************************************************************/
208 void remove_edgept(EDGEPT *point) {
209  EDGEPT *prev = point->prev;
210  EDGEPT *next = point->next;
211  // Add point's steps onto prev's steps if they are from the same outline.
212  if (prev->src_outline == point->src_outline && prev->src_outline != NULL) {
213  prev->step_count += point->step_count;
214  }
215  prev->next = next;
216  next->prev = prev;
217  prev->vec.x = next->pos.x - prev->pos.x;
218  prev->vec.y = next->pos.y - prev->pos.y;
219  delete point;
220 }
221 
222 /**********************************************************************
223  * Print
224  *
225  * Shows the coordinates of both points in a split.
226  **********************************************************************/
227 void SPLIT::Print() const {
228  tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x,
229  point2->pos.y);
230 }
231 
232 #ifndef GRAPHICS_DISABLED
233 // Draws the split in the given window.
234 void SPLIT::Mark(ScrollView* window) const {
235  window->Pen(ScrollView::GREEN);
236  window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
237  window->UpdateWindow();
238 }
239 #endif
240 
241 // Creates two outlines out of one by splitting the original one in half.
242 // Inserts the resulting outlines into the given list.
243 void SPLIT::SplitOutlineList(TESSLINE* outlines) const {
244  SplitOutline();
245  while (outlines->next != NULL) outlines = outlines->next;
246 
247  outlines->next = new TESSLINE;
248  outlines->next->loop = point1;
249  outlines->next->ComputeBoundingBox();
250 
251  outlines = outlines->next;
252 
253  outlines->next = new TESSLINE;
254  outlines->next->loop = point2;
255  outlines->next->ComputeBoundingBox();
256 
257  outlines->next->next = NULL;
258 }
259 
260 // Makes a split between these two edge points, but does not affect the
261 // outlines to which they belong.
262 void SPLIT::SplitOutline() const {
263  EDGEPT* temp2 = point2->next;
264  EDGEPT* temp1 = point1->next;
265  /* Create two new points */
266  EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
267  EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
268  // point1 and 2 are now cross-over points, so they must have NULL
269  // src_outlines and give their src_outline information their new
270  // replacements.
271  new_point1->src_outline = point1->src_outline;
272  new_point1->start_step = point1->start_step;
273  new_point1->step_count = point1->step_count;
274  new_point2->src_outline = point2->src_outline;
275  new_point2->start_step = point2->start_step;
276  new_point2->step_count = point2->step_count;
277  point1->src_outline = NULL;
278  point1->start_step = 0;
279  point1->step_count = 0;
280  point2->src_outline = NULL;
281  point2->start_step = 0;
282  point2->step_count = 0;
283 }
284 
285 // Undoes the effect of SplitOutlineList, correcting the outlines for undoing
286 // the split, but possibly leaving some duplicate outlines.
287 void SPLIT::UnsplitOutlineList(TBLOB* blob) const {
288  /* Modify edge points */
289  UnsplitOutlines();
290 
291  TESSLINE* outline1 = new TESSLINE;
292  outline1->next = blob->outlines;
293  blob->outlines = outline1;
294  outline1->loop = point1;
295 
296  TESSLINE* outline2 = new TESSLINE;
297  outline2->next = blob->outlines;
298  blob->outlines = outline2;
299  outline2->loop = point2;
300 }
301 
302 // Removes the split that was put between these two points.
304  EDGEPT* tmp1 = point1->next;
305  EDGEPT* tmp2 = point2->next;
306 
307  tmp1->next->prev = point2;
308  tmp2->next->prev = point1;
309 
310  // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
311  // deleted.
312  point1->next = tmp2->next;
313  point1->src_outline = tmp2->src_outline;
314  point1->start_step = tmp2->start_step;
315  point1->step_count = tmp2->step_count;
316  // Likewise point2 takes tmp1's place.
317  point2->next = tmp1->next;
318  point2->src_outline = tmp1->src_outline;
319  point2->start_step = tmp1->start_step;
320  point2->step_count = tmp1->step_count;
321 
322  delete tmp1;
323  delete tmp2;
324 
325  point1->vec.x = point1->next->pos.x - point1->pos.x;
326  point1->vec.y = point1->next->pos.y - point1->pos.y;
327 
328  point2->vec.x = point2->next->pos.x - point2->pos.x;
329  point2->vec.y = point2->next->pos.y - point2->pos.y;
330 }
int start_step
Definition: blobs.h:173
EDGEPT * make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev)
Definition: split.cpp:147
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:538
ICOORD position_at_index(int index) const
Definition: coutln.h:151
TESSLINE * next
Definition: blobs.h:258
float length() const
find length
Definition: points.h:78
TPOINT pos
Definition: blobs.h:163
void Mark(ScrollView *window) const
Definition: split.cpp:234
Definition: points.h:189
const int kCenterGradeCap
Definition: split.cpp:45
C_OUTLINE * src_outline
Definition: blobs.h:171
bool wordrec_display_splits
Definition: split.cpp:49
void Reveal() const
Definition: split.cpp:73
EDGEPT * prev
Definition: blobs.h:170
TESSLINE * outlines
Definition: blobs.h:377
#define tprintf(...)
Definition: tprintf.h:31
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:89
void remove_edgept(EDGEPT *point)
Definition: split.cpp:208
void Hide()
Definition: blobs.h:147
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:287
#define BOOL_VAR(name, val, comment)
Definition: params.h:279
bool SegmentCrossesOutline(const TPOINT &pt1, const TPOINT &pt2) const
Definition: blobs.h:316
void UnsplitOutlines() const
Definition: split.cpp:303
void Print() const
Definition: split.cpp:227
inT16 left() const
Definition: rect.h:68
void Reveal()
Definition: blobs.h:150
void UpdateWindow()
Definition: scrollview.cpp:710
TBOX Box21() const
Definition: split.h:46
EDGEPT * point2
Definition: split.h:104
VECTOR vec
Definition: blobs.h:164
EDGEPT * loop
Definition: blobs.h:257
void SplitOutline() const
Definition: split.cpp:262
inT16 x
Definition: blobs.h:71
int step_count
Definition: blobs.h:174
bool IsLittleChunk(int min_points, int min_area) const
Definition: split.cpp:130
EDGEPT * next
Definition: blobs.h:169
Definition: blobs.h:76
bool EqualPos(const EDGEPT &other) const
Definition: blobs.h:105
void ComputeBoundingBox()
Definition: blobs.cpp:225
#define MAX(x, y)
Definition: ndminx.h:24
inT16 y
Definition: blobs.h:72
Definition: rect.h:30
#define MIN(x, y)
Definition: ndminx.h:28
ICOORD step(int index) const
Definition: coutln.h:142
Definition: blobs.h:261
bool ShortNonCircularSegment(int min_points, const EDGEPT *end) const
Definition: blobs.h:135
inT16 right() const
Definition: rect.h:75
inT16 width() const
Definition: rect.h:111
TBOX Box12() const
Definition: split.h:44
EDGEPT * point1
Definition: split.h:103
int SegmentArea(const EDGEPT *end) const
Definition: blobs.h:122
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:243
float length() const
find length
Definition: points.h:230
TBOX bounding_box() const
Definition: split.cpp:52
inT32 pathlength() const
Definition: coutln.h:133
void Hide() const
Definition: split.cpp:59
const double kBadPriority
Definition: split.cpp:47
void Pen(Color color)
Definition: scrollview.cpp:726
integer coordinate
Definition: points.h:30
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:123
int x_gap(const TBOX &box) const
Definition: rect.h:217