tesseract  4.00.00dev
textlineprojection.cpp
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #ifdef HAVE_CONFIG_H
15 #include "config_auto.h"
16 #endif
17 
18 #include "textlineprojection.h"
19 #include "allheaders.h"
20 #include "bbgrid.h" // Base class.
21 #include "blobbox.h" // BlobNeighourDir.
22 #include "blobs.h"
23 #include "colpartition.h"
24 #include "normalis.h"
25 
26 // Padding factor to use on definitely oriented blobs
27 const int kOrientedPadFactor = 8;
28 // Padding factor to use on not definitely oriented blobs.
29 const int kDefaultPadFactor = 2;
30 // Penalty factor for going away from the line center.
31 const int kWrongWayPenalty = 4;
32 // Ratio between parallel gap and perpendicular gap used to measure total
33 // distance of a box from a target box in curved textline space.
34 // parallel-gap is treated more favorably by this factor to allow catching
35 // quotes and elipsis at the end of textlines.
36 const int kParaPerpDistRatio = 4;
37 // Multiple of scale_factor_ that the inter-line gap must be before we start
38 // padding the increment box perpendicular to the text line.
39 const int kMinLineSpacingFactor = 4;
40 // Maximum tab-stop overrun for horizontal padding, in projection pixels.
41 const int kMaxTabStopOverrun = 6;
42 
43 namespace tesseract {
44 
46  : x_origin_(0), y_origin_(0), pix_(NULL) {
47  // The projection map should be about 100 ppi, whatever the input.
48  scale_factor_ = IntCastRounded(resolution / 100.0);
49  if (scale_factor_ < 1) scale_factor_ = 1;
50 }
52  pixDestroy(&pix_);
53 }
54 
55 // Build the projection profile given the input_block containing lists of
56 // blobs, a rotation to convert to image coords,
57 // and a full-resolution nontext_map, marking out areas to avoid.
58 // During construction, we have the following assumptions:
59 // The rotation is a multiple of 90 degrees, ie no deskew yet.
60 // The blobs have had their left and right rules set to also limit
61 // the range of projection.
63  const FCOORD& rotation,
64  Pix* nontext_map) {
65  pixDestroy(&pix_);
66  TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
67  x_origin_ = 0;
68  y_origin_ = image_box.height();
69  int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
70  int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
71 
72  pix_ = pixCreate(width, height, 8);
73  ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
74  ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
75  Pix* final_pix = pixBlockconv(pix_, 1, 1);
76 // Pix* final_pix = pixBlockconv(pix_, 2, 2);
77  pixDestroy(&pix_);
78  pix_ = final_pix;
79 }
80 
81 // Display the blobs in the window colored according to textline quality.
82 void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
83  ScrollView* win) {
84 #ifndef GRAPHICS_DISABLED
85  BLOBNBOX_IT it(blobs);
86  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
87  BLOBNBOX* blob = it.data();
88  const TBOX& box = blob->bounding_box();
89  bool bad_box = BoxOutOfHTextline(box, NULL, false);
90  if (blob->UniquelyVertical())
91  win->Pen(ScrollView::YELLOW);
92  else
93  win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
94  win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
95  }
96  win->Update();
97 #endif // GRAPHICS_DISABLED
98 }
99 
100 // Moves blobs that look like they don't sit well on a textline from the
101 // input blobs list to the output small_blobs list.
102 // This gets them away from initial textline finding to stop diacritics
103 // from forming incorrect textlines. (Introduced mainly to fix Thai.)
105  BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
106  BLOBNBOX_IT it(blobs);
107  BLOBNBOX_IT small_it(small_blobs);
108  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
109  BLOBNBOX* blob = it.data();
110  const TBOX& box = blob->bounding_box();
111  bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
112  box.bottom());
113  if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) {
114  blob->ClearNeighbours();
115  small_it.add_to_end(it.extract());
116  }
117  }
118 }
119 
120 // Create a window and display the projection in it.
122 #ifndef GRAPHICS_DISABLED
123  int width = pixGetWidth(pix_);
124  int height = pixGetHeight(pix_);
125  Pix* pixc = pixCreate(width, height, 32);
126  int src_wpl = pixGetWpl(pix_);
127  int col_wpl = pixGetWpl(pixc);
128  uinT32* src_data = pixGetData(pix_);
129  uinT32* col_data = pixGetData(pixc);
130  for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
131  for (int x = 0; x < width; ++x) {
132  int pixel = GET_DATA_BYTE(src_data, x);
133  l_uint32 result;
134  if (pixel <= 17)
135  composeRGBPixel(0, 0, pixel * 15, &result);
136  else if (pixel <= 145)
137  composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
138  else
139  composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
140  col_data[x] = result;
141  }
142  }
143  ScrollView* win = new ScrollView("Projection", 0, 0,
144  width, height, width, height);
145  win->Image(pixc, 0, 0);
146  win->Update();
147  pixDestroy(&pixc);
148 #endif // GRAPHICS_DISABLED
149 }
150 
151 // Compute the distance of the box from the partition using curved projection
152 // space. As DistanceOfBoxFromBox, except that the direction is taken from
153 // the ColPartition and the median bounds of the ColPartition are used as
154 // the to_box.
156  const ColPartition& part,
157  const DENORM* denorm,
158  bool debug) const {
159  // Compute a partition box that uses the median top/bottom of the blobs
160  // within and median left/right for vertical.
161  TBOX part_box = part.bounding_box();
162  if (part.IsHorizontalType()) {
163  part_box.set_top(part.median_top());
164  part_box.set_bottom(part.median_bottom());
165  } else {
166  part_box.set_left(part.median_left());
167  part_box.set_right(part.median_right());
168  }
169  // Now use DistanceOfBoxFromBox to make the actual calculation.
170  return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
171  denorm, debug);
172 }
173 
174 // Compute the distance from the from_box to the to_box using curved
175 // projection space. Separation that involves a decrease in projection
176 // density (moving from the from_box to the to_box) is weighted more heavily
177 // than constant density, and an increase is weighted less.
178 // If horizontal_textline is true, then curved space is used vertically,
179 // as for a diacritic on the edge of a textline.
180 // The projection uses original image coords, so denorm is used to get
181 // back to the image coords from box/part space.
182 // How the calculation works: Think of a diacritic near a textline.
183 // Distance is measured from the far side of the from_box to the near side of
184 // the to_box. Shown is the horizontal textline case.
185 // |------^-----|
186 // | from | box |
187 // |------|-----|
188 // perpendicular |
189 // <------v-------->|--------------------|
190 // parallel | to box |
191 // |--------------------|
192 // Perpendicular distance uses "curved space" See VerticalDistance below.
193 // Parallel distance is linear.
194 // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
196  const TBOX& to_box,
197  bool horizontal_textline,
198  const DENORM* denorm,
199  bool debug) const {
200  // The parallel_gap is the horizontal gap between a horizontal textline and
201  // the box. Analogous for vertical.
202  int parallel_gap = 0;
203  // start_pt is the box end of the line to be modified for curved space.
204  TPOINT start_pt;
205  // end_pt is the partition end of the line to be modified for curved space.
206  TPOINT end_pt;
207  if (horizontal_textline) {
208  parallel_gap = from_box.x_gap(to_box) + from_box.width();
209  start_pt.x = (from_box.left() + from_box.right()) / 2;
210  end_pt.x = start_pt.x;
211  if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
212  start_pt.y = from_box.top();
213  end_pt.y = MIN(to_box.top(), start_pt.y);
214  } else {
215  start_pt.y = from_box.bottom();
216  end_pt.y = MAX(to_box.bottom(), start_pt.y);
217  }
218  } else {
219  parallel_gap = from_box.y_gap(to_box) + from_box.height();
220  if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
221  start_pt.x = from_box.right();
222  end_pt.x = MIN(to_box.right(), start_pt.x);
223  } else {
224  start_pt.x = from_box.left();
225  end_pt.x = MAX(to_box.left(), start_pt.x);
226  }
227  start_pt.y = (from_box.bottom() + from_box.top()) / 2;
228  end_pt.y = start_pt.y;
229  }
230  // The perpendicular gap is the max vertical distance gap out of:
231  // top of from_box to to_box top and bottom of from_box to to_box bottom.
232  // This value is then modified for curved projection space.
233  // Analogous for vertical.
234  int perpendicular_gap = 0;
235  // If start_pt == end_pt, then the from_box lies entirely within the to_box
236  // (in the perpendicular direction), so we don't need to calculate the
237  // perpendicular_gap.
238  if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
239  if (denorm != NULL) {
240  // Denormalize the start and end.
241  denorm->DenormTransform(NULL, start_pt, &start_pt);
242  denorm->DenormTransform(NULL, end_pt, &end_pt);
243  }
244  if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
245  perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
246  end_pt.y);
247  } else {
248  perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
249  start_pt.y);
250  }
251  }
252  // The parallel_gap weighs less than the perpendicular_gap.
253  return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
254 }
255 
256 // Compute the distance between (x, y1) and (x, y2) using the rule that
257 // a decrease in textline density is weighted more heavily than an increase.
258 // The coordinates are in source image space, ie processed by any denorm
259 // already, but not yet scaled by scale_factor_.
260 // Going from the outside of a textline to the inside should measure much
261 // less distance than going from the inside of a textline to the outside.
262 // How it works:
263 // An increase is cheap (getting closer to a textline).
264 // Constant costs unity.
265 // A decrease is expensive (getting further from a textline).
266 // Pixels in projection map Counted distance
267 // 2
268 // 3 1/x
269 // 3 1
270 // 2 x
271 // 5 1/x
272 // 7 1/x
273 // Total: 1 + x + 3/x where x = kWrongWayPenalty.
275  int y1, int y2) const {
276  x = ImageXToProjectionX(x);
277  y1 = ImageYToProjectionY(y1);
278  y2 = ImageYToProjectionY(y2);
279  if (y1 == y2) return 0;
280  int wpl = pixGetWpl(pix_);
281  int step = y1 < y2 ? 1 : -1;
282  uinT32* data = pixGetData(pix_) + y1 * wpl;
283  wpl *= step;
284  int prev_pixel = GET_DATA_BYTE(data, x);
285  int distance = 0;
286  int right_way_steps = 0;
287  for (int y = y1; y != y2; y += step) {
288  data += wpl;
289  int pixel = GET_DATA_BYTE(data, x);
290  if (debug)
291  tprintf("At (%d,%d), pix = %d, prev=%d\n",
292  x, y + step, pixel, prev_pixel);
293  if (pixel < prev_pixel)
294  distance += kWrongWayPenalty;
295  else if (pixel > prev_pixel)
296  ++right_way_steps;
297  else
298  ++distance;
299  prev_pixel = pixel;
300  }
301  return distance * scale_factor_ +
302  right_way_steps * scale_factor_ / kWrongWayPenalty;
303 }
304 
305 // Compute the distance between (x1, y) and (x2, y) using the rule that
306 // a decrease in textline density is weighted more heavily than an increase.
307 int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
308  int y) const {
309  x1 = ImageXToProjectionX(x1);
310  x2 = ImageXToProjectionX(x2);
311  y = ImageYToProjectionY(y);
312  if (x1 == x2) return 0;
313  int wpl = pixGetWpl(pix_);
314  int step = x1 < x2 ? 1 : -1;
315  uinT32* data = pixGetData(pix_) + y * wpl;
316  int prev_pixel = GET_DATA_BYTE(data, x1);
317  int distance = 0;
318  int right_way_steps = 0;
319  for (int x = x1; x != x2; x += step) {
320  int pixel = GET_DATA_BYTE(data, x + step);
321  if (debug)
322  tprintf("At (%d,%d), pix = %d, prev=%d\n",
323  x + step, y, pixel, prev_pixel);
324  if (pixel < prev_pixel)
325  distance += kWrongWayPenalty;
326  else if (pixel > prev_pixel)
327  ++right_way_steps;
328  else
329  ++distance;
330  prev_pixel = pixel;
331  }
332  return distance * scale_factor_ +
333  right_way_steps * scale_factor_ / kWrongWayPenalty;
334 }
335 
336 // Returns true if the blob appears to be outside of a textline.
337 // Such blobs are potentially diacritics (even if large in Thai) and should
338 // be kept away from initial textline finding.
340  const DENORM* denorm,
341  bool debug) const {
342  int grad1 = 0;
343  int grad2 = 0;
344  EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, NULL, NULL);
345  int worst_result = MIN(grad1, grad2);
346  int total_result = grad1 + grad2;
347  if (total_result >= 6) return false; // Strongly in textline.
348  // Medium strength: if either gradient is negative, it is likely outside
349  // the body of the textline.
350  if (worst_result < 0)
351  return true;
352  return false;
353 }
354 
355 // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
356 // but uses the median top/bottom for horizontal and median left/right for
357 // vertical instead of the bounding box edges.
358 // Evaluates for both horizontal and vertical and returns the best result,
359 // with a positive value for horizontal and a negative value for vertical.
361  const DENORM* denorm,
362  bool debug) const {
363  if (part.IsSingleton())
364  return EvaluateBox(part.bounding_box(), denorm, debug);
365  // Test vertical orientation.
366  TBOX box = part.bounding_box();
367  // Use the partition median for left/right.
368  box.set_left(part.median_left());
369  box.set_right(part.median_right());
370  int vresult = EvaluateBox(box, denorm, debug);
371 
372  // Test horizontal orientation.
373  box = part.bounding_box();
374  // Use the partition median for top/bottom.
375  box.set_top(part.median_top());
376  box.set_bottom(part.median_bottom());
377  int hresult = EvaluateBox(box, denorm, debug);
378  if (debug) {
379  tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
380  part.bounding_box().print();
381  part.Print();
382  }
383  return hresult >= -vresult ? hresult : vresult;
384 }
385 
386 // Computes the mean projection gradients over the horizontal and vertical
387 // edges of the box:
388 // -h-h-h-h-h-h
389 // |------------| mean=htop -v|+v--------+v|-v
390 // |+h+h+h+h+h+h| -v|+v +v|-v
391 // | | -v|+v +v|-v
392 // | box | -v|+v box +v|-v
393 // | | -v|+v +v|-v
394 // |+h+h+h+h+h+h| -v|+v +v|-v
395 // |------------| mean=hbot -v|+v--------+v|-v
396 // -h-h-h-h-h-h
397 // mean=vleft mean=vright
398 //
399 // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
400 // for a horizontal textline, a negative number for a vertical textline,
401 // and near zero for undecided. Undecided is most likely non-text.
402 // All the gradients are truncated to remain non-negative, since negative
403 // horizontal gradients don't give any indication of being vertical and
404 // vice versa.
405 // Additional complexity: The coordinates have to be transformed to original
406 // image coordinates with denorm (if not null), scaled to match the projection
407 // pix, and THEN step out 2 pixels each way from the edge to compute the
408 // gradient, and tries 3 positions, each measuring the gradient over a
409 // 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by
410 // several layers of helpers below.
411 int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
412  bool debug) const {
413  return EvaluateBoxInternal(box, denorm, debug, NULL, NULL, NULL, NULL);
414 }
415 
416 // Internal version of EvaluateBox returns the unclipped gradients as well
417 // as the result of EvaluateBox.
418 // hgrad1 and hgrad2 are the gradients for the horizontal textline.
419 int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
420  const DENORM* denorm, bool debug,
421  int* hgrad1, int* hgrad2,
422  int* vgrad1, int* vgrad2) const {
423  int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
424  box.top(), true);
425  int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
426  box.bottom(), false);
427  int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
428  box.top(), true);
429  int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
430  box.bottom(), box.top(),
431  false);
432  int top_clipped = MAX(top_gradient, 0);
433  int bottom_clipped = MAX(bottom_gradient, 0);
434  int left_clipped = MAX(left_gradient, 0);
435  int right_clipped = MAX(right_gradient, 0);
436  if (debug) {
437  tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
438  top_gradient, bottom_gradient, left_gradient, right_gradient);
439  box.print();
440  }
441  int result = MAX(top_clipped, bottom_clipped) -
442  MAX(left_clipped, right_clipped);
443  if (hgrad1 != NULL && hgrad2 != NULL) {
444  *hgrad1 = top_gradient;
445  *hgrad2 = bottom_gradient;
446  }
447  if (vgrad1 != NULL && vgrad2 != NULL) {
448  *vgrad1 = left_gradient;
449  *vgrad2 = right_gradient;
450  }
451  return result;
452 }
453 
454 // Helper returns the mean gradient value for the horizontal row at the given
455 // y, (in the external coordinates) by subtracting the mean of the transformed
456 // row 2 pixels above from the mean of the transformed row 2 pixels below.
457 // This gives a positive value for a good top edge and negative for bottom.
458 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
459 int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
460  inT16 min_x, inT16 max_x, inT16 y,
461  bool best_is_max) const {
462  TPOINT start_pt(min_x, y);
463  TPOINT end_pt(max_x, y);
464  int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
465  int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
466  int best_gradient = lower - upper;
467  upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
468  lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
469  int gradient = lower - upper;
470  if ((gradient > best_gradient) == best_is_max)
471  best_gradient = gradient;
472  upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
473  lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
474  gradient = lower - upper;
475  if ((gradient > best_gradient) == best_is_max)
476  best_gradient = gradient;
477  return best_gradient;
478 }
479 
480 // Helper returns the mean gradient value for the vertical column at the
481 // given x, (in the external coordinates) by subtracting the mean of the
482 // transformed column 2 pixels left from the mean of the transformed column
483 // 2 pixels to the right.
484 // This gives a positive value for a good left edge and negative for right.
485 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
486 int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, inT16 x,
487  inT16 min_y, inT16 max_y,
488  bool best_is_max) const {
489  TPOINT start_pt(x, min_y);
490  TPOINT end_pt(x, max_y);
491  int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
492  int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
493  int best_gradient = right - left;
494  left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
495  right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
496  int gradient = right - left;
497  if ((gradient > best_gradient) == best_is_max)
498  best_gradient = gradient;
499  left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
500  right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
501  gradient = right - left;
502  if ((gradient > best_gradient) == best_is_max)
503  best_gradient = gradient;
504  return best_gradient;
505 }
506 
507 // Helper returns the mean pixel value over the line between the start_pt and
508 // end_pt (inclusive), but shifted perpendicular to the line in the projection
509 // image by offset pixels. For simplicity, it is assumed that the vector is
510 // either nearly horizontal or nearly vertical. It works on skewed textlines!
511 // The end points are in external coordinates, and will be denormalized with
512 // the denorm if not NULL before further conversion to pix coordinates.
513 // After all the conversions, the offset is added to the direction
514 // perpendicular to the line direction. The offset is thus in projection image
515 // coordinates, which allows the caller to get a guaranteed displacement
516 // between pixels used to calculate gradients.
517 int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
518  int offset,
519  TPOINT start_pt,
520  TPOINT end_pt) const {
521  TransformToPixCoords(denorm, &start_pt);
522  TransformToPixCoords(denorm, &end_pt);
523  TruncateToImageBounds(&start_pt);
524  TruncateToImageBounds(&end_pt);
525  int wpl = pixGetWpl(pix_);
526  uinT32* data = pixGetData(pix_);
527  int total = 0;
528  int count = 0;
529  int x_delta = end_pt.x - start_pt.x;
530  int y_delta = end_pt.y - start_pt.y;
531  if (abs(x_delta) >= abs(y_delta)) {
532  if (x_delta == 0)
533  return 0;
534  // Horizontal line. Add the offset vertically.
535  int x_step = x_delta > 0 ? 1 : -1;
536  // Correct offset for rotation, keeping it anti-clockwise of the delta.
537  offset *= x_step;
538  start_pt.y += offset;
539  end_pt.y += offset;
540  TruncateToImageBounds(&start_pt);
541  TruncateToImageBounds(&end_pt);
542  x_delta = end_pt.x - start_pt.x;
543  y_delta = end_pt.y - start_pt.y;
544  count = x_delta * x_step + 1;
545  for (int x = start_pt.x; x != end_pt.x; x += x_step) {
546  int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
547  total += GET_DATA_BYTE(data + wpl * y, x);
548  }
549  } else {
550  // Vertical line. Add the offset horizontally.
551  int y_step = y_delta > 0 ? 1 : -1;
552  // Correct offset for rotation, keeping it anti-clockwise of the delta.
553  // Pix holds the image with y=0 at the top, so the offset is negated.
554  offset *= -y_step;
555  start_pt.x += offset;
556  end_pt.x += offset;
557  TruncateToImageBounds(&start_pt);
558  TruncateToImageBounds(&end_pt);
559  x_delta = end_pt.x - start_pt.x;
560  y_delta = end_pt.y - start_pt.y;
561  count = y_delta * y_step + 1;
562  for (int y = start_pt.y; y != end_pt.y; y += y_step) {
563  int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
564  total += GET_DATA_BYTE(data + wpl * y, x);
565  }
566  }
567  return DivRounded(total, count);
568 }
569 
570 // Given an input pix, and a box, the sides of the box are shrunk inwards until
571 // they bound any black pixels found within the original box.
572 // The function converts between tesseract coords and the pix coords assuming
573 // that this pix is full resolution equal in size to the original image.
574 // Returns an empty box if there are no black pixels in the source box.
575 static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
576  int im_height = pixGetHeight(pix);
577  Box* input_box = boxCreate(box.left(), im_height - box.top(),
578  box.width(), box.height());
579  Box* output_box = NULL;
580  pixClipBoxToForeground(pix, input_box, NULL, &output_box);
581  TBOX result_box;
582  if (output_box != NULL) {
583  l_int32 x, y, width, height;
584  boxGetGeometry(output_box, &x, &y, &width, &height);
585  result_box.set_left(x);
586  result_box.set_right(x + width);
587  result_box.set_top(im_height - y);
588  result_box.set_bottom(result_box.top() - height);
589  boxDestroy(&output_box);
590  }
591  boxDestroy(&input_box);
592  return result_box;
593 }
594 
595 // Splits the given box in half at x_middle or y_middle according to split_on_x
596 // and checks for nontext_map pixels in each half. Reduces the bbox so that it
597 // still includes the middle point, but does not touch any fg pixels in
598 // nontext_map. An empty box may be returned if there is no such box.
599 static void TruncateBoxToMissNonText(int x_middle, int y_middle,
600  bool split_on_x, Pix* nontext_map,
601  TBOX* bbox) {
602  TBOX box1(*bbox);
603  TBOX box2(*bbox);
604  TBOX im_box;
605  if (split_on_x) {
606  box1.set_right(x_middle);
607  im_box = BoundsWithinBox(nontext_map, box1);
608  if (!im_box.null_box()) box1.set_left(im_box.right());
609  box2.set_left(x_middle);
610  im_box = BoundsWithinBox(nontext_map, box2);
611  if (!im_box.null_box()) box2.set_right(im_box.left());
612  } else {
613  box1.set_bottom(y_middle);
614  im_box = BoundsWithinBox(nontext_map, box1);
615  if (!im_box.null_box()) box1.set_top(im_box.bottom());
616  box2.set_top(y_middle);
617  im_box = BoundsWithinBox(nontext_map, box2);
618  if (!im_box.null_box()) box2.set_bottom(im_box.top());
619  }
620  box1 += box2;
621  *bbox = box1;
622 }
623 
624 
625 // Helper function to add 1 to a rectangle in source image coords to the
626 // internal projection pix_.
627 void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
628  int scaled_left = ImageXToProjectionX(box.left());
629  int scaled_top = ImageYToProjectionY(box.top());
630  int scaled_right = ImageXToProjectionX(box.right());
631  int scaled_bottom = ImageYToProjectionY(box.bottom());
632  int wpl = pixGetWpl(pix_);
633  uinT32* data = pixGetData(pix_) + scaled_top * wpl;
634  for (int y = scaled_top; y <= scaled_bottom; ++y) {
635  for (int x = scaled_left; x <= scaled_right; ++x) {
636  int pixel = GET_DATA_BYTE(data, x);
637  if (pixel < 255)
638  SET_DATA_BYTE(data, x, pixel + 1);
639  }
640  data += wpl;
641  }
642 }
643 
644 // Inserts a list of blobs into the projection.
645 // Rotation is a multiple of 90 degrees to get from blob coords to
646 // nontext_map coords, nontext_map_box is the bounds of the nontext_map.
647 // Blobs are spread horizontally or vertically according to their internal
648 // flags, but the spreading is truncated by set pixels in the nontext_map
649 // and also by the horizontal rule line limits on the blobs.
650 void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
651  const FCOORD& rotation,
652  const TBOX& nontext_map_box,
653  Pix* nontext_map) {
654  BLOBNBOX_IT blob_it(blobs);
655  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
656  BLOBNBOX* blob = blob_it.data();
657  TBOX bbox = blob->bounding_box();
658  ICOORD middle((bbox.left() + bbox.right()) / 2,
659  (bbox.bottom() + bbox.top()) / 2);
660  bool spreading_horizontally = PadBlobBox(blob, &bbox);
661  // Rotate to match the nontext_map.
662  bbox.rotate(rotation);
663  middle.rotate(rotation);
664  if (rotation.x() == 0.0f)
665  spreading_horizontally = !spreading_horizontally;
666  // Clip to the image before applying the increments.
667  bbox &= nontext_map_box; // This is in-place box intersection.
668  // Check for image pixels before spreading.
669  TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
670  nontext_map, &bbox);
671  if (bbox.area() > 0) {
672  IncrementRectangle8Bit(bbox);
673  }
674  }
675 }
676 
677 // Pads the bounding box of the given blob according to whether it is on
678 // a horizontal or vertical text line, taking into account tab-stops near
679 // the blob. Returns true if padding was in the horizontal direction.
680 bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
681  // Determine which direction to spread.
682  // If text is well spaced out, it can be useful to pad perpendicular to
683  // the textline direction, so as to ensure diacritics get absorbed
684  // correctly, but if the text is tightly spaced, this will destroy the
685  // blank space between textlines in the projection map, and that would
686  // be very bad.
687  int pad_limit = scale_factor_ * kMinLineSpacingFactor;
688  int xpad = 0;
689  int ypad = 0;
690  bool padding_horizontally = false;
691  if (blob->UniquelyHorizontal()) {
692  xpad = bbox->height() * kOrientedPadFactor;
693  padding_horizontally = true;
694  // If the text appears to be very well spaced, pad the other direction by a
695  // single pixel in the projection profile space to help join diacritics to
696  // the textline.
697  if ((blob->neighbour(BND_ABOVE) == NULL ||
698  bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
699  (blob->neighbour(BND_BELOW) == NULL ||
700  bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
701  ypad = scale_factor_;
702  }
703  } else if (blob->UniquelyVertical()) {
704  ypad = bbox->width() * kOrientedPadFactor;
705  if ((blob->neighbour(BND_LEFT) == NULL ||
706  bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
707  (blob->neighbour(BND_RIGHT) == NULL ||
708  bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
709  xpad = scale_factor_;
710  }
711  } else {
712  if ((blob->neighbour(BND_ABOVE) != NULL &&
713  blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
714  (blob->neighbour(BND_BELOW) != NULL &&
715  blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
716  ypad = bbox->width() * kDefaultPadFactor;
717  }
718  if ((blob->neighbour(BND_RIGHT) != NULL &&
719  blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
720  (blob->neighbour(BND_LEFT) != NULL &&
721  blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
722  xpad = bbox->height() * kDefaultPadFactor;
723  padding_horizontally = true;
724  }
725  }
726  bbox->pad(xpad, ypad);
727  pad_limit = scale_factor_ * kMaxTabStopOverrun;
728  // Now shrink horizontally to avoid stepping more than pad_limit over a
729  // tab-stop.
730  if (bbox->left() < blob->left_rule() - pad_limit) {
731  bbox->set_left(blob->left_rule() - pad_limit);
732  }
733  if (bbox->right() > blob->right_rule() + pad_limit) {
734  bbox->set_right(blob->right_rule() + pad_limit);
735  }
736  return padding_horizontally;
737 }
738 
739 // Helper denormalizes the TPOINT with the denorm if not NULL, then
740 // converts to pix_ coordinates.
741 void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
742  TPOINT* pt) const {
743  if (denorm != NULL) {
744  // Denormalize the point.
745  denorm->DenormTransform(NULL, *pt, pt);
746  }
747  pt->x = ImageXToProjectionX(pt->x);
748  pt->y = ImageYToProjectionY(pt->y);
749 }
750 
751 #ifdef _MSC_VER
752 #pragma optimize("g", off)
753 #endif // _MSC_VER
754 // Helper truncates the TPOINT to be within the pix_.
755 void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
756  pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
757  pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
758 }
759 #ifdef _MSC_VER
760 #pragma optimize("", on)
761 #endif // _MSC_VER
762 
763 // Transform tesseract image coordinates to coordinates used in the projection.
764 int TextlineProjection::ImageXToProjectionX(int x) const {
765  x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
766  return x;
767 }
768 int TextlineProjection::ImageYToProjectionY(int y) const {
769  y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
770  return y;
771 }
772 
773 } // namespace tesseract.
const int kDefaultPadFactor
void rotate(const FCOORD &vec)
Definition: ipoints.h:241
Definition: points.h:189
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
inT32 area() const
Definition: rect.h:118
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
int HorizontalDistance(bool debug, int x1, int x2, int y) const
int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const
int VerticalDistance(bool debug, int x, int y1, int y2) const
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
int left_rule() const
Definition: blobbox.h:298
inT16 x() const
access function
Definition: points.h:52
#define tprintf(...)
Definition: tprintf.h:31
bool UniquelyVertical() const
Definition: blobbox.h:395
voidpf uLong offset
Definition: ioapi.h:42
const int kMinLineSpacingFactor
int IntCastRounded(double x)
Definition: helpers.h:179
const int kWrongWayPenalty
const int kMaxTabStopOverrun
int16_t inT16
Definition: host.h:36
inT16 left() const
Definition: rect.h:68
void set_top(int y)
Definition: rect.h:57
int y_gap(const TBOX &box) const
Definition: rect.h:225
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map)
uint32_t uinT32
Definition: host.h:39
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:389
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
void ClearNeighbours()
Definition: blobbox.h:494
inT16 x
Definition: blobs.h:71
const TBOX & bounding_box() const
Definition: colpartition.h:109
static void Update()
Definition: scrollview.cpp:715
inT16 y() const
access_function
Definition: points.h:56
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:122
int DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part, const DENORM *denorm, bool debug) const
bool null_box() const
Definition: rect.h:46
void pad(int xpad, int ypad)
Definition: rect.h:127
BLOBNBOX * neighbour(BlobNeighbourDir n) const
Definition: blobbox.h:355
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
static bool WithinTestRegion(int detail_level, int x, int y)
bool IsSingleton() const
Definition: colpartition.h:361
inT16 top() const
Definition: rect.h:54
#define MAX(x, y)
Definition: ndminx.h:24
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
int right_rule() const
Definition: blobbox.h:304
inT16 y
Definition: blobs.h:72
Definition: rect.h:30
const int kParaPerpDistRatio
const int kOrientedPadFactor
#define MIN(x, y)
Definition: ndminx.h:28
Definition: blobs.h:50
bool UniquelyHorizontal() const
Definition: blobbox.h:398
inT16 height() const
Definition: rect.h:104
inT16 right() const
Definition: rect.h:75
inT16 width() const
Definition: rect.h:111
void set_right(int x)
Definition: rect.h:78
void set_left(int x)
Definition: rect.h:71
void print() const
Definition: rect.h:270
void Image(struct Pix *image, int x_pos, int y_pos)
Definition: scrollview.cpp:773
inT16 bottom() const
Definition: rect.h:61
bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const
void set_bottom(int y)
Definition: rect.h:64
bool IsHorizontalType() const
Definition: colpartition.h:439
float x() const
Definition: points.h:209
const TBOX & bounding_box() const
Definition: blobbox.h:215
void rotate(const FCOORD &vec)
Definition: rect.h:189
int count(LIST var_list)
Definition: oldlist.cpp:103
int DivRounded(int a, int b)
Definition: helpers.h:173
void Pen(Color color)
Definition: scrollview.cpp:726
integer coordinate
Definition: points.h:30
int x_gap(const TBOX &box) const
Definition: rect.h:217