tesseract  4.00.00dev
tesseract::ColPartitionGrid Class Reference

#include <colpartitiongrid.h>

Inheritance diagram for tesseract::ColPartitionGrid:
tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT > tesseract::GridBase

Public Member Functions

 ColPartitionGrid ()
 
 ColPartitionGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~ColPartitionGrid ()
 
void HandleClick (int x, int y)
 
void Merges (TessResultCallback2< bool, ColPartition *, TBOX *> *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition *> *confirm_cb)
 
bool MergePart (TessResultCallback2< bool, ColPartition *, TBOX *> *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition *> *confirm_cb, ColPartition *part)
 
int ComputeTotalOverlap (ColPartitionGrid **overlap_grid)
 
void FindOverlappingPartitions (const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
 
ColPartitionBestMergeCandidate (const ColPartition *part, ColPartition_CLIST *candidates, bool debug, TessResultCallback2< bool, const ColPartition *, const ColPartition *> *confirm_cb, int *overlap_increase)
 
void SplitOverlappingPartitions (ColPartition_LIST *big_parts)
 
bool GridSmoothNeighbours (BlobTextFlowType source_type, Pix *nontext_map, const TBOX &im_box, const FCOORD &rerotation)
 
void ReflectInYAxis ()
 
void Deskew (const FCOORD &deskew)
 
void ExtractPartitionsAsBlocks (BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void SetTabStops (TabFind *tabgrid)
 
bool MakeColPartSets (PartSetVector *part_sets)
 
ColPartitionSetMakeSingleColumnSet (WidthCallback *cb)
 
void ClaimBoxes ()
 
void ReTypeBlobs (BLOBNBOX_LIST *im_blobs)
 
void RecomputeBounds (int gridsize, const ICOORD &bleft, const ICOORD &tright, const ICOORD &vertical)
 
void GridFindMargins (ColPartitionSet **best_columns)
 
void ListFindMargins (ColPartitionSet **best_columns, ColPartition_LIST *parts)
 
void DeleteParts ()
 
void DeleteUnknownParts (TO_BLOCK *block)
 
void DeleteNonLeaderParts ()
 
void FindFigureCaptions ()
 
void FindPartitionPartners ()
 
void FindPartitionPartners (bool upper, ColPartition *part)
 
void FindVPartitionPartners (bool to_the_left, ColPartition *part)
 
void RefinePartitionPartners (bool get_desperate)
 
- Public Member Functions inherited from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BBGrid ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(ColPartition *))
 
void InsertBBox (bool h_spread, bool v_spread, ColPartition *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, ColPartition *bbox)
 
void RemoveBBox (ColPartition *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Additional Inherited Members

- Protected Attributes inherited from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
ColPartition_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

Definition at line 33 of file colpartitiongrid.h.

Constructor & Destructor Documentation

◆ ColPartitionGrid() [1/2]

tesseract::ColPartitionGrid::ColPartitionGrid ( )

Definition at line 67 of file colpartitiongrid.cpp.

67  {
68 }

◆ ColPartitionGrid() [2/2]

tesseract::ColPartitionGrid::ColPartitionGrid ( int  gridsize,
const ICOORD bleft,
const ICOORD tright 
)

Definition at line 69 of file colpartitiongrid.cpp.

71  : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize,
72  bleft, tright) {
73 }
const ICOORD & bleft() const
Definition: bbgrid.h:73
int gridsize() const
Definition: bbgrid.h:64
const ICOORD & tright() const
Definition: bbgrid.h:76

◆ ~ColPartitionGrid()

tesseract::ColPartitionGrid::~ColPartitionGrid ( )
virtual

Definition at line 75 of file colpartitiongrid.cpp.

75  {
76 }

Member Function Documentation

◆ BestMergeCandidate()

ColPartition * tesseract::ColPartitionGrid::BestMergeCandidate ( const ColPartition part,
ColPartition_CLIST *  candidates,
bool  debug,
TessResultCallback2< bool, const ColPartition *, const ColPartition *> *  confirm_cb,
int overlap_increase 
)

Definition at line 409 of file colpartitiongrid.cpp.

412  {
413  if (overlap_increase != NULL)
414  *overlap_increase = 0;
415  if (candidates->empty())
416  return NULL;
417  int ok_overlap =
418  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
419  // The best neighbour to merge with is the one that causes least
420  // total pairwise overlap among all the neighbours.
421  // If more than one offers the same total overlap, choose the one
422  // with the least total area.
423  const TBOX& part_box = part->bounding_box();
424  ColPartition_C_IT it(candidates);
425  ColPartition* best_candidate = NULL;
426  // Find the total combined box of all candidates and the original.
427  TBOX full_box(part_box);
428  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
429  ColPartition* candidate = it.data();
430  full_box += candidate->bounding_box();
431  }
432  // Keep valid neighbours in a list.
433  ColPartition_CLIST neighbours;
434  // Now run a rect search of the merged box for overlapping neighbours, as
435  // we need anything that might be overlapped by the merged box.
436  FindOverlappingPartitions(full_box, part, &neighbours);
437  if (debug) {
438  tprintf("Finding best merge candidate from %d, %d neighbours for box:",
439  candidates->length(), neighbours.length());
440  part_box.print();
441  }
442  // If the best increase in overlap is positive, then we also check the
443  // worst non-candidate overlap. This catches the case of multiple good
444  // candidates that overlap each other when merged. If the worst
445  // non-candidate overlap is better than the best overlap, then return
446  // the worst non-candidate overlap instead.
447  ColPartition_CLIST non_candidate_neighbours;
448  non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
449  &neighbours, candidates);
450  int worst_nc_increase = 0;
451  int best_increase = MAX_INT32;
452  int best_area = 0;
453  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
454  ColPartition* candidate = it.data();
455  if (confirm_cb != NULL && !confirm_cb->Run(part, candidate)) {
456  if (debug) {
457  tprintf("Candidate not confirmed:");
458  candidate->bounding_box().print();
459  }
460  continue;
461  }
462  int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
463  const TBOX& cand_box = candidate->bounding_box();
464  if (best_candidate == NULL || increase < best_increase) {
465  best_candidate = candidate;
466  best_increase = increase;
467  best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
468  if (debug) {
469  tprintf("New best merge candidate has increase %d, area %d, over box:",
470  increase, best_area);
471  full_box.print();
472  candidate->Print();
473  }
474  } else if (increase == best_increase) {
475  int area = cand_box.bounding_union(part_box).area() - cand_box.area();
476  if (area < best_area) {
477  best_area = area;
478  best_candidate = candidate;
479  }
480  }
481  increase = IncreaseInOverlap(part, candidate, ok_overlap,
482  &non_candidate_neighbours);
483  if (increase > worst_nc_increase)
484  worst_nc_increase = increase;
485  }
486  if (best_increase > 0) {
487  // If the worst non-candidate increase is less than the best increase
488  // including the candidates, then all the candidates can merge together
489  // and the increase in outside overlap would be less, so use that result,
490  // but only if each candidate is either a good diacritic merge with part,
491  // or an ok merge candidate with all the others.
492  // See TestCompatibleCandidates for more explanation and a picture.
493  if (worst_nc_increase < best_increase &&
494  TestCompatibleCandidates(*part, debug, candidates)) {
495  best_increase = worst_nc_increase;
496  }
497  }
498  if (overlap_increase != NULL)
499  *overlap_increase = best_increase;
500  return best_candidate;
501 }
const double kTinyEnoughTextlineOverlapFraction
inT32 area() const
Definition: rect.h:118
#define MAX_INT32
Definition: host.h:62
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
virtual R Run(A1, A2)=0
#define tprintf(...)
Definition: tprintf.h:31
int gridsize() const
Definition: bbgrid.h:64
void FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
Definition: rect.h:30
void print() const
Definition: rect.h:270

◆ ClaimBoxes()

void tesseract::ColPartitionGrid::ClaimBoxes ( )

Definition at line 866 of file colpartitiongrid.cpp.

866  {
867  // Iterate the ColPartitions in the grid.
868  ColPartitionGridSearch gsearch(this);
869  gsearch.StartFullSearch();
870  ColPartition* part;
871  while ((part = gsearch.NextFullSearch()) != NULL) {
872  part->ClaimBoxes();
873  }
874 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932

◆ ComputeTotalOverlap()

int tesseract::ColPartitionGrid::ComputeTotalOverlap ( ColPartitionGrid **  overlap_grid)

Definition at line 322 of file colpartitiongrid.cpp.

322  {
323  int total_overlap = 0;
324  // Iterate the ColPartitions in the grid.
325  ColPartitionGridSearch gsearch(this);
326  gsearch.StartFullSearch();
327  ColPartition* part;
328  while ((part = gsearch.NextFullSearch()) != NULL) {
329  ColPartition_CLIST neighbors;
330  const TBOX& part_box = part->bounding_box();
331  FindOverlappingPartitions(part_box, part, &neighbors);
332  ColPartition_C_IT n_it(&neighbors);
333  bool any_part_overlap = false;
334  for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
335  const TBOX& n_box = n_it.data()->bounding_box();
336  int overlap = n_box.intersection(part_box).area();
337  if (overlap > 0 && overlap_grid != NULL) {
338  if (*overlap_grid == NULL) {
339  *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright());
340  }
341  (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy());
342  if (!any_part_overlap) {
343  (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy());
344  }
345  }
346  any_part_overlap = true;
347  total_overlap += overlap;
348  }
349  }
350  return total_overlap;
351 }
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
inT32 area() const
Definition: rect.h:118
const ICOORD & bleft() const
Definition: bbgrid.h:73
int gridsize() const
Definition: bbgrid.h:64
void FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
const ICOORD & tright() const
Definition: bbgrid.h:76
Definition: rect.h:30

◆ DeleteNonLeaderParts()

void tesseract::ColPartitionGrid::DeleteNonLeaderParts ( )

Definition at line 1045 of file colpartitiongrid.cpp.

1045  {
1046  ColPartitionGridSearch gsearch(this);
1047  gsearch.StartFullSearch();
1048  ColPartition* part;
1049  while ((part = gsearch.NextFullSearch()) != NULL) {
1050  if (part->flow() != BTFT_LEADER) {
1051  gsearch.RemoveBBox();
1052  if (part->ReleaseNonLeaderBoxes()) {
1053  InsertBBox(true, true, part);
1054  gsearch.RepositionIterator();
1055  } else {
1056  delete part;
1057  }
1058  }
1059  }
1060 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:490

◆ DeleteParts()

void tesseract::ColPartitionGrid::DeleteParts ( )

Definition at line 1011 of file colpartitiongrid.cpp.

1011  {
1012  ColPartition_LIST dead_parts;
1013  ColPartition_IT dead_it(&dead_parts);
1014  ColPartitionGridSearch gsearch(this);
1015  gsearch.StartFullSearch();
1016  ColPartition* part;
1017  while ((part = gsearch.NextFullSearch()) != NULL) {
1018  part->DisownBoxes();
1019  dead_it.add_to_end(part); // Parts will be deleted on return.
1020  }
1021  Clear();
1022 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932

◆ DeleteUnknownParts()

void tesseract::ColPartitionGrid::DeleteUnknownParts ( TO_BLOCK block)

Definition at line 1026 of file colpartitiongrid.cpp.

1026  {
1027  ColPartitionGridSearch gsearch(this);
1028  gsearch.StartFullSearch();
1029  ColPartition* part;
1030  while ((part = gsearch.NextFullSearch()) != NULL) {
1031  if (part->blob_type() == BRT_UNKNOWN) {
1032  gsearch.RemoveBBox();
1033  // Once marked, the blobs will be swept up by DeleteUnownedNoise.
1034  part->set_flow(BTFT_NONTEXT);
1035  part->set_blob_type(BRT_NOISE);
1036  part->SetBlobTypes();
1037  part->DisownBoxes();
1038  delete part;
1039  }
1040  }
1041  block->DeleteUnownedNoise();
1042 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
void DeleteUnownedNoise()
Definition: blobbox.cpp:1033

◆ Deskew()

void tesseract::ColPartitionGrid::Deskew ( const FCOORD deskew)

Definition at line 738 of file colpartitiongrid.cpp.

738  {
739  ColPartition_LIST parts;
740  ColPartition_IT part_it(&parts);
741  // Iterate the ColPartitions in the grid to extract them.
742  ColPartitionGridSearch gsearch(this);
743  gsearch.StartFullSearch();
744  ColPartition* part;
745  while ((part = gsearch.NextFullSearch()) != NULL) {
746  part_it.add_after_then_move(part);
747  }
748  // Rebuild the grid to the new size.
749  TBOX grid_box(bleft_, tright_);
750  grid_box.rotate_large(deskew);
751  Init(gridsize(), grid_box.botleft(), grid_box.topright());
752  // Reinitializing the grid with rotated coords also clears all the
753  // pointers, so parts will now own the ColPartitions. (Briefly).
754  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
755  part = part_it.extract();
756  part->ComputeLimits();
757  InsertBBox(true, true, part);
758  }
759 }
ICOORD tright_
Definition: bbgrid.h:92
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:448
int gridsize() const
Definition: bbgrid.h:64
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:490
Definition: rect.h:30

◆ ExtractPartitionsAsBlocks()

void tesseract::ColPartitionGrid::ExtractPartitionsAsBlocks ( BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 678 of file colpartitiongrid.cpp.

679  {
680  TO_BLOCK_IT to_block_it(to_blocks);
681  BLOCK_IT block_it(blocks);
682  // All partitions will be put on this list and deleted on return.
683  ColPartition_LIST parts;
684  ColPartition_IT part_it(&parts);
685  // Iterate the ColPartitions in the grid to extract them.
686  ColPartitionGridSearch gsearch(this);
687  gsearch.StartFullSearch();
688  ColPartition* part;
689  while ((part = gsearch.NextFullSearch()) != NULL) {
690  part_it.add_after_then_move(part);
691  // The partition has to be at least vaguely like text.
692  BlobRegionType blob_type = part->blob_type();
693  if (BLOBNBOX::IsTextType(blob_type) ||
694  (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
695  PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT
696  : PT_FLOWING_TEXT;
697  // Get metrics from the row that will be used for the block.
698  TBOX box = part->bounding_box();
699  int median_width = part->median_width();
700  int median_height = part->median_size();
701  // Turn the partition into a TO_ROW.
702  TO_ROW* row = part->MakeToRow();
703  if (row == NULL) {
704  // This partition is dead.
705  part->DeleteBoxes();
706  continue;
707  }
708  BLOCK* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
709  box.right(), box.top());
710  block->set_poly_block(new POLY_BLOCK(box, type));
711  TO_BLOCK* to_block = new TO_BLOCK(block);
712  TO_ROW_IT row_it(to_block->get_rows());
713  row_it.add_after_then_move(row);
714  // We haven't differentially rotated vertical and horizontal text at
715  // this point, so use width or height as appropriate.
716  if (blob_type == BRT_VERT_TEXT) {
717  to_block->line_size = static_cast<float>(median_width);
718  to_block->line_spacing = static_cast<float>(box.width());
719  to_block->max_blob_size = static_cast<float>(box.width() + 1);
720  } else {
721  to_block->line_size = static_cast<float>(median_height);
722  to_block->line_spacing = static_cast<float>(box.height());
723  to_block->max_blob_size = static_cast<float>(box.height() + 1);
724  }
725  block_it.add_to_end(block);
726  to_block_it.add_to_end(to_block);
727  } else {
728  // This partition is dead.
729  part->DeleteBoxes();
730  }
731  }
732  Clear();
733  // Now it is safe to delete the ColPartitions as parts goes out of scope.
734 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:57
PolyBlockType
Definition: publictypes.h:41
inT16 left() const
Definition: rect.h:68
float line_size
Definition: blobbox.h:781
inT16 top() const
Definition: rect.h:54
Definition: rect.h:30
inT16 height() const
Definition: rect.h:104
inT16 right() const
Definition: rect.h:75
inT16 width() const
Definition: rect.h:111
float max_blob_size
Definition: blobbox.h:782
inT16 bottom() const
Definition: rect.h:61
static bool IsTextType(BlobRegionType type)
Definition: blobbox.h:403
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
BlobRegionType
Definition: blobbox.h:57
float line_spacing
Definition: blobbox.h:775
Definition: ocrblock.h:30

◆ FindFigureCaptions()

void tesseract::ColPartitionGrid::FindFigureCaptions ( )

Definition at line 1063 of file colpartitiongrid.cpp.

1063  {
1064  // For each image region find its best candidate text caption region,
1065  // if any and mark it as such.
1066  ColPartitionGridSearch gsearch(this);
1067  gsearch.StartFullSearch();
1068  ColPartition* part;
1069  while ((part = gsearch.NextFullSearch()) != NULL) {
1070  if (part->IsImageType()) {
1071  const TBOX& part_box = part->bounding_box();
1072  bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(),
1073  part_box.bottom());
1074  ColPartition* best_caption = NULL;
1075  int best_dist = 0; // Distance to best_caption.
1076  int best_upper = 0; // Direction of best_caption.
1077  // Handle both lower and upper directions.
1078  for (int upper = 0; upper < 2; ++upper) {
1079  ColPartition_C_IT partner_it(upper ? part->upper_partners()
1080  : part->lower_partners());
1081  // If there are no image partners, then this direction is ok.
1082  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1083  partner_it.forward()) {
1084  ColPartition* partner = partner_it.data();
1085  if (partner->IsImageType()) {
1086  break;
1087  }
1088  }
1089  if (!partner_it.cycled_list()) continue;
1090  // Find the nearest totally overlapping text partner.
1091  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1092  partner_it.forward()) {
1093  ColPartition* partner = partner_it.data();
1094  if (!partner->IsTextType() || partner->type() == PT_TABLE) continue;
1095  const TBOX& partner_box = partner->bounding_box();
1096  if (debug) {
1097  tprintf("Finding figure captions for image part:");
1098  part_box.print();
1099  tprintf("Considering partner:");
1100  partner_box.print();
1101  }
1102  if (partner_box.left() >= part_box.left() &&
1103  partner_box.right() <= part_box.right()) {
1104  int dist = partner_box.y_gap(part_box);
1105  if (best_caption == NULL || dist < best_dist) {
1106  best_dist = dist;
1107  best_caption = partner;
1108  best_upper = upper;
1109  }
1110  }
1111  }
1112  }
1113  if (best_caption != NULL) {
1114  if (debug) {
1115  tprintf("Best caption candidate:");
1116  best_caption->bounding_box().print();
1117  }
1118  // We have a candidate caption. Qualify it as being separable from
1119  // any body text. We are looking for either a small number of lines
1120  // or a big gap that indicates a separation from the body text.
1121  int line_count = 0;
1122  int biggest_gap = 0;
1123  int smallest_gap = MAX_INT16;
1124  int total_height = 0;
1125  int mean_height = 0;
1126  ColPartition* end_partner = NULL;
1127  ColPartition* next_partner = NULL;
1128  for (ColPartition* partner = best_caption; partner != NULL &&
1129  line_count <= kMaxCaptionLines;
1130  partner = next_partner) {
1131  if (!partner->IsTextType()) {
1132  end_partner = partner;
1133  break;
1134  }
1135  ++line_count;
1136  total_height += partner->bounding_box().height();
1137  next_partner = partner->SingletonPartner(best_upper);
1138  if (next_partner != NULL) {
1139  int gap = partner->bounding_box().y_gap(
1140  next_partner->bounding_box());
1141  if (gap > biggest_gap) {
1142  biggest_gap = gap;
1143  end_partner = next_partner;
1144  mean_height = total_height / line_count;
1145  } else if (gap < smallest_gap) {
1146  smallest_gap = gap;
1147  }
1148  // If the gap looks big compared to the text size and the smallest
1149  // gap seen so far, then we can stop.
1150  if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
1151  biggest_gap > smallest_gap * kMinCaptionGapRatio)
1152  break;
1153  }
1154  }
1155  if (debug) {
1156  tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
1157  line_count, biggest_gap, smallest_gap, mean_height);
1158  if (end_partner != NULL) {
1159  tprintf("End partner:");
1160  end_partner->bounding_box().print();
1161  }
1162  }
1163  if (next_partner == NULL && line_count <= kMaxCaptionLines)
1164  end_partner = NULL; // No gap, but line count is small.
1165  if (line_count <= kMaxCaptionLines) {
1166  // This is a qualified caption. Mark the text as caption.
1167  for (ColPartition* partner = best_caption; partner != NULL &&
1168  partner != end_partner;
1169  partner = next_partner) {
1170  partner->set_type(PT_CAPTION_TEXT);
1171  partner->SetBlobTypes();
1172  if (debug) {
1173  tprintf("Set caption type for partition:");
1174  partner->bounding_box().print();
1175  }
1176  next_partner = partner->SingletonPartner(best_upper);
1177  }
1178  }
1179  }
1180  }
1181  }
1182 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
const double kMinCaptionGapRatio
const double kMinCaptionGapHeightRatio
#define MAX_INT16
Definition: host.h:61
#define tprintf(...)
Definition: tprintf.h:31
Definition: capi.h:94
inT16 left() const
Definition: rect.h:68
int y_gap(const TBOX &box) const
Definition: rect.h:225
const int kMaxCaptionLines
static bool WithinTestRegion(int detail_level, int x, int y)
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void print() const
Definition: rect.h:270
inT16 bottom() const
Definition: rect.h:61

◆ FindOverlappingPartitions()

void tesseract::ColPartitionGrid::FindOverlappingPartitions ( const TBOX box,
const ColPartition not_this,
ColPartition_CLIST *  parts 
)

Definition at line 356 of file colpartitiongrid.cpp.

358  {
359  ColPartitionGridSearch rsearch(this);
360  rsearch.StartRectSearch(box);
361  ColPartition* part;
362  while ((part = rsearch.NextRectSearch()) != NULL) {
363  if (part != not_this)
364  parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
365  }
366 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932

◆ FindPartitionPartners() [1/2]

void tesseract::ColPartitionGrid::FindPartitionPartners ( )

Definition at line 1188 of file colpartitiongrid.cpp.

1188  {
1189  ColPartitionGridSearch gsearch(this);
1190  gsearch.StartFullSearch();
1191  ColPartition* part;
1192  while ((part = gsearch.NextFullSearch()) != NULL) {
1193  if (part->IsVerticalType()) {
1194  FindVPartitionPartners(true, part);
1195  FindVPartitionPartners(false, part);
1196  } else {
1197  FindPartitionPartners(true, part);
1198  FindPartitionPartners(false, part);
1199  }
1200  }
1201 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
void FindVPartitionPartners(bool to_the_left, ColPartition *part)

◆ FindPartitionPartners() [2/2]

void tesseract::ColPartitionGrid::FindPartitionPartners ( bool  upper,
ColPartition part 
)

Definition at line 1205 of file colpartitiongrid.cpp.

1205  {
1206  if (part->type() == PT_NOISE)
1207  return; // Noise is not allowed to partner anything.
1208  const TBOX& box = part->bounding_box();
1209  int top = part->median_top();
1210  int bottom = part->median_bottom();
1211  int height = top - bottom;
1212  int mid_y = (bottom + top) / 2;
1213  ColPartitionGridSearch vsearch(this);
1214  // Search down for neighbour below
1215  vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
1216  ColPartition* neighbour;
1217  ColPartition* best_neighbour = NULL;
1218  int best_dist = MAX_INT32;
1219  while ((neighbour = vsearch.NextVerticalSearch(!upper)) != NULL) {
1220  if (neighbour == part || neighbour->type() == PT_NOISE)
1221  continue; // Noise is not allowed to partner anything.
1222  int neighbour_bottom = neighbour->median_bottom();
1223  int neighbour_top = neighbour->median_top();
1224  int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1225  if (upper != (neighbour_y > mid_y))
1226  continue;
1227  if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour))
1228  continue;
1229  if (!part->TypesMatch(*neighbour)) {
1230  if (best_neighbour == NULL)
1231  best_neighbour = neighbour;
1232  continue;
1233  }
1234  int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
1235  if (dist <= kMaxPartitionSpacing * height) {
1236  if (dist < best_dist) {
1237  best_dist = dist;
1238  best_neighbour = neighbour;
1239  }
1240  } else {
1241  break;
1242  }
1243  }
1244  if (best_neighbour != NULL)
1245  part->AddPartner(upper, best_neighbour);
1246 }
Definition: capi.h:95
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
#define MAX_INT32
Definition: host.h:62
inT16 left() const
Definition: rect.h:68
const double kMaxPartitionSpacing
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75

◆ FindVPartitionPartners()

void tesseract::ColPartitionGrid::FindVPartitionPartners ( bool  to_the_left,
ColPartition part 
)

Definition at line 1250 of file colpartitiongrid.cpp.

1251  {
1252  if (part->type() == PT_NOISE)
1253  return; // Noise is not allowed to partner anything.
1254  const TBOX& box = part->bounding_box();
1255  int left = part->median_left();
1256  int right = part->median_right();
1257  int width = right - left;
1258  int mid_x = (left + right) / 2;
1259  ColPartitionGridSearch hsearch(this);
1260  // Search left for neighbour to_the_left
1261  hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
1262  ColPartition* neighbour;
1263  ColPartition* best_neighbour = NULL;
1264  int best_dist = MAX_INT32;
1265  while ((neighbour = hsearch.NextSideSearch(to_the_left)) != NULL) {
1266  if (neighbour == part || neighbour->type() == PT_NOISE)
1267  continue; // Noise is not allowed to partner anything.
1268  int neighbour_left = neighbour->median_left();
1269  int neighbour_right = neighbour->median_right();
1270  int neighbour_x = (neighbour_left + neighbour_right) / 2;
1271  if (to_the_left != (neighbour_x < mid_x))
1272  continue;
1273  if (!part->VOverlaps(*neighbour))
1274  continue;
1275  if (!part->TypesMatch(*neighbour))
1276  continue; // Only match to other vertical text.
1277  int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
1278  if (dist <= kMaxPartitionSpacing * width) {
1279  if (dist < best_dist || best_neighbour == NULL) {
1280  best_dist = dist;
1281  best_neighbour = neighbour;
1282  }
1283  } else {
1284  break;
1285  }
1286  }
1287  // For vertical partitions, the upper partner is to the left, and lower is
1288  // to the right.
1289  if (best_neighbour != NULL)
1290  part->AddPartner(to_the_left, best_neighbour);
1291 }
Definition: capi.h:95
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
#define MAX_INT32
Definition: host.h:62
const double kMaxPartitionSpacing
inT16 top() const
Definition: rect.h:54
Definition: rect.h:30
inT16 bottom() const
Definition: rect.h:61

◆ GridFindMargins()

void tesseract::ColPartitionGrid::GridFindMargins ( ColPartitionSet **  best_columns)

Definition at line 969 of file colpartitiongrid.cpp.

969  {
970  // Iterate the ColPartitions in the grid.
971  ColPartitionGridSearch gsearch(this);
972  gsearch.StartFullSearch();
973  ColPartition* part;
974  while ((part = gsearch.NextFullSearch()) != NULL) {
975  // Set up a rectangle search x-bounded by the column and y by the part.
976  ColPartitionSet* columns = best_columns != NULL
977  ? best_columns[gsearch.GridY()]
978  : NULL;
979  FindPartitionMargins(columns, part);
980  const TBOX& box = part->bounding_box();
981  if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
982  tprintf("Computed margins for part:");
983  part->Print();
984  }
985  }
986 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
#define tprintf(...)
Definition: tprintf.h:31
inT16 left() const
Definition: rect.h:68
static bool WithinTestRegion(int detail_level, int x, int y)
Definition: rect.h:30
inT16 bottom() const
Definition: rect.h:61

◆ GridSmoothNeighbours()

bool tesseract::ColPartitionGrid::GridSmoothNeighbours ( BlobTextFlowType  source_type,
Pix *  nontext_map,
const TBOX im_box,
const FCOORD rerotation 
)

Definition at line 628 of file colpartitiongrid.cpp.

631  {
632  // Iterate the ColPartitions in the grid.
633  ColPartitionGridSearch gsearch(this);
634  gsearch.StartFullSearch();
635  ColPartition* part;
636  bool any_changed = false;
637  while ((part = gsearch.NextFullSearch()) != NULL) {
638  if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type()))
639  continue;
640  const TBOX& box = part->bounding_box();
641  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
642  if (SmoothRegionType(nontext_map, im_box, rotation, debug, part))
643  any_changed = true;
644  }
645  return any_changed;
646 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
inT16 left() const
Definition: rect.h:68
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:411
static bool WithinTestRegion(int detail_level, int x, int y)
Definition: rect.h:30
inT16 bottom() const
Definition: rect.h:61

◆ HandleClick()

void tesseract::ColPartitionGrid::HandleClick ( int  x,
int  y 
)
virtual

Reimplemented from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >.

Definition at line 79 of file colpartitiongrid.cpp.

79  {
80  BBGrid<ColPartition,
81  ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
82  // Run a radial search for partitions that overlap.
83  ColPartitionGridSearch radsearch(this);
84  radsearch.SetUniqueMode(true);
85  radsearch.StartRadSearch(x, y, 1);
86  ColPartition* neighbour;
87  FCOORD click(x, y);
88  while ((neighbour = radsearch.NextRadSearch()) != NULL) {
89  const TBOX& nbox = neighbour->bounding_box();
90  if (nbox.contains(click)) {
91  tprintf("Block box:");
92  neighbour->bounding_box().print();
93  neighbour->Print();
94  }
95  }
96 }
Definition: points.h:189
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
void HandleClick(int x, int y)
#define tprintf(...)
Definition: tprintf.h:31
bool contains(const FCOORD pt) const
Definition: rect.h:323
Definition: rect.h:30

◆ ListFindMargins()

void tesseract::ColPartitionGrid::ListFindMargins ( ColPartitionSet **  best_columns,
ColPartition_LIST *  parts 
)

Definition at line 993 of file colpartitiongrid.cpp.

994  {
995  ColPartition_IT part_it(parts);
996  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
997  ColPartition* part = part_it.data();
998  ColPartitionSet* columns = NULL;
999  if (best_columns != NULL) {
1000  const TBOX& part_box = part->bounding_box();
1001  // Get the columns from the y grid coord.
1002  int grid_x, grid_y;
1003  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
1004  columns = best_columns[grid_y];
1005  }
1006  FindPartitionMargins(columns, part);
1007  }
1008 }
inT16 left() const
Definition: rect.h:68
Definition: rect.h:30
inT16 bottom() const
Definition: rect.h:61
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
Definition: bbgrid.cpp:54

◆ MakeColPartSets()

bool tesseract::ColPartitionGrid::MakeColPartSets ( PartSetVector part_sets)

Definition at line 786 of file colpartitiongrid.cpp.

786  {
787  ColPartition_LIST* part_lists = new ColPartition_LIST[gridheight()];
788  part_sets->reserve(gridheight());
789  // Iterate the ColPartitions in the grid to get parts onto lists for the
790  // y bottom of each.
791  ColPartitionGridSearch gsearch(this);
792  gsearch.StartFullSearch();
793  ColPartition* part;
794  bool any_parts_found = false;
795  while ((part = gsearch.NextFullSearch()) != NULL) {
796  BlobRegionType blob_type = part->blob_type();
797  if (blob_type != BRT_NOISE &&
798  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
799  int grid_x, grid_y;
800  const TBOX& part_box = part->bounding_box();
801  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
802  ColPartition_IT part_it(&part_lists[grid_y]);
803  part_it.add_to_end(part);
804  any_parts_found = true;
805  }
806  }
807  if (any_parts_found) {
808  for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
809  ColPartitionSet* line_set = NULL;
810  if (!part_lists[grid_y].empty()) {
811  line_set = new ColPartitionSet(&part_lists[grid_y]);
812  }
813  part_sets->push_back(line_set);
814  }
815  }
816  delete [] part_lists;
817  return any_parts_found;
818 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
int gridheight() const
Definition: bbgrid.h:70
inT16 left() const
Definition: rect.h:68
Definition: rect.h:30
inT16 bottom() const
Definition: rect.h:61
BlobRegionType
Definition: blobbox.h:57
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
Definition: bbgrid.cpp:54

◆ MakeSingleColumnSet()

ColPartitionSet * tesseract::ColPartitionGrid::MakeSingleColumnSet ( WidthCallback cb)

Definition at line 824 of file colpartitiongrid.cpp.

824  {
825  ColPartition* single_column_part = NULL;
826  // Iterate the ColPartitions in the grid to get parts onto lists for the
827  // y bottom of each.
828  ColPartitionGridSearch gsearch(this);
829  gsearch.StartFullSearch();
830  ColPartition* part;
831  while ((part = gsearch.NextFullSearch()) != NULL) {
832  BlobRegionType blob_type = part->blob_type();
833  if (blob_type != BRT_NOISE &&
834  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
835  // Consider for single column.
836  BlobTextFlowType flow = part->flow();
837  if ((blob_type == BRT_TEXT &&
838  (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
839  flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
840  blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
841  if (single_column_part == NULL) {
842  single_column_part = part->ShallowCopy();
843  single_column_part->set_blob_type(BRT_TEXT);
844  // Copy the tabs from itself to properly setup the margins.
845  single_column_part->CopyLeftTab(*single_column_part, false);
846  single_column_part->CopyRightTab(*single_column_part, false);
847  } else {
848  if (part->left_key() < single_column_part->left_key())
849  single_column_part->CopyLeftTab(*part, false);
850  if (part->right_key() > single_column_part->right_key())
851  single_column_part->CopyRightTab(*part, false);
852  }
853  }
854  }
855  }
856  if (single_column_part != NULL) {
857  // Make a ColPartitionSet out of the single_column_part as a candidate
858  // for the single column case.
859  single_column_part->SetColumnGoodness(cb);
860  return new ColPartitionSet(single_column_part);
861  }
862  return NULL;
863 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
BlobTextFlowType
Definition: blobbox.h:99
BlobRegionType
Definition: blobbox.h:57

◆ MergePart()

bool tesseract::ColPartitionGrid::MergePart ( TessResultCallback2< bool, ColPartition *, TBOX *> *  box_cb,
TessResultCallback2< bool, const ColPartition *, const ColPartition *> *  confirm_cb,
ColPartition part 
)

Definition at line 126 of file colpartitiongrid.cpp.

130  {
131  if (part->IsUnMergeableType())
132  return false;
133  bool any_done = false;
134  // Repeatedly merge part while we find a best merge candidate that works.
135  bool merge_done = false;
136  do {
137  merge_done = false;
138  TBOX box = part->bounding_box();
139  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
140  if (debug) {
141  tprintf("Merge candidate:");
142  box.print();
143  }
144  // Set up a rectangle search bounded by the part.
145  if (!box_cb->Run(part, &box))
146  continue;
147  // Create a list of merge candidates.
148  ColPartition_CLIST merge_candidates;
149  FindMergeCandidates(part, box, debug, &merge_candidates);
150  // Find the best merge candidate based on minimal overlap increase.
151  int overlap_increase;
152  ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug,
153  confirm_cb,
154  &overlap_increase);
155  if (neighbour != NULL && overlap_increase <= 0) {
156  if (debug) {
157  tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
158  part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
159  overlap_increase);
160  }
161  // Looks like a good candidate so merge it.
162  RemoveBBox(neighbour);
163  // We will modify the box of part, so remove it from the grid, merge
164  // it and then re-insert it into the grid.
165  RemoveBBox(part);
166  part->Absorb(neighbour, NULL);
167  InsertBBox(true, true, part);
168  merge_done = true;
169  any_done = true;
170  } else if (neighbour != NULL) {
171  if (debug) {
172  tprintf("Overlapped when merged with increase %d: ", overlap_increase);
173  neighbour->bounding_box().print();
174  }
175  } else if (debug) {
176  tprintf("No candidate neighbour returned\n");
177  }
178  } while (merge_done);
179  return any_done;
180 }
virtual R Run(A1, A2)=0
#define tprintf(...)
Definition: tprintf.h:31
inT16 left() const
Definition: rect.h:68
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:490
static bool WithinTestRegion(int detail_level, int x, int y)
ColPartition * BestMergeCandidate(const ColPartition *part, ColPartition_CLIST *candidates, bool debug, TessResultCallback2< bool, const ColPartition *, const ColPartition *> *confirm_cb, int *overlap_increase)
Definition: rect.h:30
void print() const
Definition: rect.h:270
inT16 bottom() const
Definition: rect.h:61

◆ Merges()

void tesseract::ColPartitionGrid::Merges ( TessResultCallback2< bool, ColPartition *, TBOX *> *  box_cb,
TessResultCallback2< bool, const ColPartition *, const ColPartition *> *  confirm_cb 
)

Definition at line 105 of file colpartitiongrid.cpp.

108  {
109  // Iterate the ColPartitions in the grid.
110  ColPartitionGridSearch gsearch(this);
111  gsearch.StartFullSearch();
112  ColPartition* part;
113  while ((part = gsearch.NextFullSearch()) != NULL) {
114  if (MergePart(box_cb, confirm_cb, part))
115  gsearch.RepositionIterator();
116  }
117  delete box_cb;
118  delete confirm_cb;
119 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
bool MergePart(TessResultCallback2< bool, ColPartition *, TBOX *> *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition *> *confirm_cb, ColPartition *part)

◆ RecomputeBounds()

void tesseract::ColPartitionGrid::RecomputeBounds ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
const ICOORD vertical 
)

Definition at line 940 of file colpartitiongrid.cpp.

943  {
944  ColPartition_LIST saved_parts;
945  ColPartition_IT part_it(&saved_parts);
946  // Iterate the ColPartitions in the grid to get parts onto a list.
947  ColPartitionGridSearch gsearch(this);
948  gsearch.StartFullSearch();
949  ColPartition* part;
950  while ((part = gsearch.NextFullSearch()) != NULL) {
951  part_it.add_to_end(part);
952  }
953  // Reinitialize grid to the new size.
954  Init(gridsize, bleft, tright);
955  // Recompute the bounds of the parts and put them back in the new grid.
956  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
957  part = part_it.extract();
958  part->set_vertical(vertical);
959  part->ComputeLimits();
960  InsertBBox(true, true, part);
961  }
962 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:448
int gridsize() const
Definition: bbgrid.h:64
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:490

◆ RefinePartitionPartners()

void tesseract::ColPartitionGrid::RefinePartitionPartners ( bool  get_desperate)

Definition at line 1296 of file colpartitiongrid.cpp.

1296  {
1297  ColPartitionGridSearch gsearch(this);
1298  // Refine in type order so that chasing multiple partners can be done
1299  // before eliminating type mis-matching partners.
1300  for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
1301  // Iterate the ColPartitions in the grid.
1302  gsearch.StartFullSearch();
1303  ColPartition* part;
1304  while ((part = gsearch.NextFullSearch()) != NULL) {
1305  part->RefinePartners(static_cast<PolyBlockType>(type),
1306  get_desperate, this);
1307  // Iterator may have been messed up by a merge.
1308  gsearch.RepositionIterator();
1309  }
1310  }
1311 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
Definition: capi.h:95

◆ ReflectInYAxis()

void tesseract::ColPartitionGrid::ReflectInYAxis ( )

Definition at line 650 of file colpartitiongrid.cpp.

650  {
651  ColPartition_LIST parts;
652  ColPartition_IT part_it(&parts);
653  // Iterate the ColPartitions in the grid to extract them.
654  ColPartitionGridSearch gsearch(this);
655  gsearch.StartFullSearch();
656  ColPartition* part;
657  while ((part = gsearch.NextFullSearch()) != NULL) {
658  part_it.add_after_then_move(part);
659  }
660  ICOORD bot_left(-tright().x(), bleft().y());
661  ICOORD top_right(-bleft().x(), tright().y());
662  // Reinitializing the grid with reflected coords also clears all the
663  // pointers, so parts will now own the ColPartitions. (Briefly).
664  Init(gridsize(), bot_left, top_right);
665  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
666  part = part_it.extract();
667  part->ReflectInYAxis();
668  InsertBBox(true, true, part);
669  }
670 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:448
const ICOORD & bleft() const
Definition: bbgrid.h:73
int gridsize() const
Definition: bbgrid.h:64
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:490
const ICOORD & tright() const
Definition: bbgrid.h:76
integer coordinate
Definition: points.h:30

◆ ReTypeBlobs()

void tesseract::ColPartitionGrid::ReTypeBlobs ( BLOBNBOX_LIST *  im_blobs)

Definition at line 879 of file colpartitiongrid.cpp.

879  {
880  BLOBNBOX_IT im_blob_it(im_blobs);
881  ColPartition_LIST dead_parts;
882  ColPartition_IT dead_part_it(&dead_parts);
883  // Iterate the ColPartitions in the grid.
884  ColPartitionGridSearch gsearch(this);
885  gsearch.StartFullSearch();
886  ColPartition* part;
887  while ((part = gsearch.NextFullSearch()) != NULL) {
888  BlobRegionType blob_type = part->blob_type();
889  BlobTextFlowType flow = part->flow();
890  bool any_blobs_moved = false;
891  if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
892  BLOBNBOX_C_IT blob_it(part->boxes());
893  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
894  BLOBNBOX* blob = blob_it.data();
895  im_blob_it.add_after_then_move(blob);
896  }
897  } else if (blob_type != BRT_NOISE) {
898  // Make sure the blobs are marked with the correct type and flow.
899  BLOBNBOX_C_IT blob_it(part->boxes());
900  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
901  BLOBNBOX* blob = blob_it.data();
902  if (blob->region_type() == BRT_NOISE) {
903  // TODO(rays) Deprecated. Change this section to an assert to verify
904  // and then delete.
905  ASSERT_HOST(blob->cblob()->area() != 0);
906  blob->set_owner(NULL);
907  blob_it.extract();
908  any_blobs_moved = true;
909  } else {
910  blob->set_region_type(blob_type);
911  if (blob->flow() != BTFT_LEADER)
912  blob->set_flow(flow);
913  }
914  }
915  }
916  if (blob_type == BRT_NOISE || part->boxes()->empty()) {
917  BLOBNBOX_C_IT blob_it(part->boxes());
918  part->DisownBoxes();
919  dead_part_it.add_to_end(part);
920  gsearch.RemoveBBox();
921  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
922  BLOBNBOX* blob = blob_it.data();
923  if (blob->cblob()->area() == 0) {
924  // Any blob with zero area is a fake image blob and should be deleted.
925  delete blob->cblob();
926  delete blob;
927  }
928  }
929  } else if (any_blobs_moved) {
930  gsearch.RemoveBBox();
931  part->ComputeLimits();
932  InsertBBox(true, true, part);
933  gsearch.RepositionIterator();
934  }
935  }
936 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
inT32 area()
Definition: stepblob.cpp:270
C_BLOB * cblob() const
Definition: blobbox.h:253
BlobRegionType region_type() const
Definition: blobbox.h:268
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:271
#define ASSERT_HOST(x)
Definition: errcode.h:84
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:490
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:283
BlobTextFlowType
Definition: blobbox.h:99
BlobRegionType
Definition: blobbox.h:57
BlobTextFlowType flow() const
Definition: blobbox.h:280

◆ SetTabStops()

void tesseract::ColPartitionGrid::SetTabStops ( TabFind tabgrid)

Definition at line 762 of file colpartitiongrid.cpp.

762  {
763  // Iterate the ColPartitions in the grid.
764  ColPartitionGridSearch gsearch(this);
765  gsearch.StartFullSearch();
766  ColPartition* part;
767  while ((part = gsearch.NextFullSearch()) != NULL) {
768  const TBOX& part_box = part->bounding_box();
769  TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false);
770  // If the overlapping line is not a left tab, try for non-overlapping.
771  if (left_line != NULL && !left_line->IsLeftTab())
772  left_line = tabgrid->LeftTabForBox(part_box, false, false);
773  if (left_line != NULL && left_line->IsLeftTab())
774  part->SetLeftTab(left_line);
775  TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false);
776  if (right_line != NULL && !right_line->IsRightTab())
777  right_line = tabgrid->RightTabForBox(part_box, false, false);
778  if (right_line != NULL && right_line->IsRightTab())
779  part->SetRightTab(right_line);
780  part->SetColumnGoodness(tabgrid->WidthCB());
781  }
782 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
Definition: rect.h:30

◆ SplitOverlappingPartitions()

void tesseract::ColPartitionGrid::SplitOverlappingPartitions ( ColPartition_LIST *  big_parts)

Definition at line 518 of file colpartitiongrid.cpp.

519  {
520  int ok_overlap =
521  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
522  // Iterate the ColPartitions in the grid.
523  ColPartitionGridSearch gsearch(this);
524  gsearch.StartFullSearch();
525  ColPartition* part;
526  while ((part = gsearch.NextFullSearch()) != NULL) {
527  // Set up a rectangle search bounded by the part.
528  const TBOX& box = part->bounding_box();
529  ColPartitionGridSearch rsearch(this);
530  rsearch.SetUniqueMode(true);
531  rsearch.StartRectSearch(box);
532  int unresolved_overlaps = 0;
533 
534  ColPartition* neighbour;
535  while ((neighbour = rsearch.NextRectSearch()) != NULL) {
536  if (neighbour == part)
537  continue;
538  const TBOX& neighbour_box = neighbour->bounding_box();
539  if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
540  part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false))
541  continue; // The overlap is OK both ways.
542 
543  // If removal of the biggest box from either partition eliminates the
544  // overlap, and it is much bigger than the box left behind, then
545  // it is either a drop-cap, an inter-line join, or some junk that
546  // we don't want anyway, so put it in the big_parts list.
547  if (!part->IsSingleton()) {
548  BLOBNBOX* excluded = part->BiggestBox();
549  TBOX shrunken = part->BoundsWithoutBox(excluded);
550  if (!shrunken.overlap(neighbour_box) &&
551  excluded->bounding_box().height() >
552  kBigPartSizeRatio * shrunken.height()) {
553  // Removing the biggest box fixes the overlap, so do it!
554  gsearch.RemoveBBox();
555  RemoveBadBox(excluded, part, big_parts);
556  InsertBBox(true, true, part);
557  gsearch.RepositionIterator();
558  break;
559  }
560  } else if (box.contains(neighbour_box)) {
561  ++unresolved_overlaps;
562  continue; // No amount of splitting will fix it.
563  }
564  if (!neighbour->IsSingleton()) {
565  BLOBNBOX* excluded = neighbour->BiggestBox();
566  TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
567  if (!shrunken.overlap(box) &&
568  excluded->bounding_box().height() >
569  kBigPartSizeRatio * shrunken.height()) {
570  // Removing the biggest box fixes the overlap, so do it!
571  rsearch.RemoveBBox();
572  RemoveBadBox(excluded, neighbour, big_parts);
573  InsertBBox(true, true, neighbour);
574  gsearch.RepositionIterator();
575  break;
576  }
577  }
578  int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
579  int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
580  ColPartition* right_part = NULL;
581  if (neighbour_overlap_count <= part_overlap_count ||
582  part->IsSingleton()) {
583  // Try to split the neighbour to reduce overlap.
584  BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box);
585  if (split_blob != NULL) {
586  rsearch.RemoveBBox();
587  right_part = neighbour->SplitAtBlob(split_blob);
588  InsertBBox(true, true, neighbour);
589  ASSERT_HOST(right_part != NULL);
590  }
591  } else {
592  // Try to split part to reduce overlap.
593  BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box);
594  if (split_blob != NULL) {
595  gsearch.RemoveBBox();
596  right_part = part->SplitAtBlob(split_blob);
597  InsertBBox(true, true, part);
598  ASSERT_HOST(right_part != NULL);
599  }
600  }
601  if (right_part != NULL) {
602  InsertBBox(true, true, right_part);
603  gsearch.RepositionIterator();
604  rsearch.RepositionIterator();
605  break;
606  }
607  }
608  if (unresolved_overlaps > 2 && part->IsSingleton()) {
609  // This part is no good so just add to big_parts.
610  RemoveBBox(part);
611  ColPartition_IT big_it(big_parts);
612  part->set_block_owned(true);
613  big_it.add_to_end(part);
614  gsearch.RepositionIterator();
615  }
616  }
617 }
bool overlap(const TBOX &box) const
Definition: rect.h:345
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:932
const double kTinyEnoughTextlineOverlapFraction
const double kBigPartSizeRatio
int gridsize() const
Definition: bbgrid.h:64
#define ASSERT_HOST(x)
Definition: errcode.h:84
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:490
bool contains(const FCOORD pt) const
Definition: rect.h:323
Definition: rect.h:30
inT16 height() const
Definition: rect.h:104
const TBOX & bounding_box() const
Definition: blobbox.h:215

The documentation for this class was generated from the following files: