22 #pragma warning(disable:4244) // Conversion warnings 27 #include "config_auto.h" 60 false,
"Show partition bounds");
62 false,
"Show blobs rejected as noise");
64 "Show partition bounds, waiting if >1");
79 int resolution,
bool cjk_script,
80 double aligned_gap_fraction,
81 TabVector_LIST* vlines, TabVector_LIST* hlines,
82 int vertical_x,
int vertical_y)
83 :
TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
85 cjk_script_(cjk_script),
86 min_gutter_width_(static_cast<
int>(kMinGutterWidthGrid * gridsize)),
87 mean_column_gap_(tright.x() - bleft.x()),
88 tabfind_aligned_gap_fraction_(aligned_gap_fraction),
89 reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f),
90 best_columns_(NULL), stroke_width_(NULL),
91 part_grid_(gridsize, bleft, tright), nontext_map_(NULL),
92 projection_(resolution),
93 denorm_(NULL), input_blobs_win_(NULL), equation_detect_(NULL) {
94 TabVector_IT h_it(&horizontal_lines_);
95 h_it.add_list_after(hlines);
100 if (best_columns_ != NULL) {
101 delete [] best_columns_;
103 if (stroke_width_ != NULL)
104 delete stroke_width_;
105 delete input_blobs_win_;
106 pixDestroy(&nontext_map_);
107 while (denorm_ != NULL) {
108 DENORM* dead_denorm = denorm_;
115 ColPartition_IT part_it(&noise_parts_);
116 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
123 part_it.set_to_list(&good_parts_);
124 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
132 BLOBNBOX_IT bb_it(&image_bblobs_);
133 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
135 delete bblob->
cblob();
150 if (stroke_width_ != NULL)
151 delete stroke_width_;
153 min_gutter_width_ =
static_cast<int>(kMinGutterWidthGrid *
gridsize());
155 #ifndef GRAPHICS_DISABLED 157 input_blobs_win_ =
MakeWindow(0, 0,
"Filtered Input Blobs");
160 #endif // GRAPHICS_DISABLED 162 pixDestroy(&nontext_map_);
168 photo_mask_pix, input_block);
172 stroke_width_->
Clear();
186 BLOBNBOX_CLIST* osd_blobs) {
203 bool vertical_text_lines,
204 int recognition_rotation) {
205 const FCOORD anticlockwise90(0.0f, 1.0f);
206 const FCOORD clockwise90(0.0f, -1.0f);
207 const FCOORD rotation180(-1.0f, 0.0f);
208 const FCOORD norotation(1.0f, 0.0f);
210 text_rotation_ = norotation;
213 rotation_ = norotation;
214 if (recognition_rotation == 1) {
215 rotation_ = anticlockwise90;
216 }
else if (recognition_rotation == 2) {
217 rotation_ = rotation180;
218 }
else if (recognition_rotation == 3) {
219 rotation_ = clockwise90;
225 if (recognition_rotation & 1) {
226 vertical_text_lines = !vertical_text_lines;
232 if (vertical_text_lines) {
233 rotation_.
rotate(anticlockwise90);
234 text_rotation_.
rotate(clockwise90);
237 rerotate_ =
FCOORD(rotation_.
x(), -rotation_.
y());
238 if (rotation_.
x() != 1.0f || rotation_.
y() != 0.0f) {
254 tprintf(
"Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
255 vertical_text_lines, recognition_rotation,
256 rotation_.
x(), rotation_.
y(),
257 text_rotation_.
x(), text_rotation_.
y());
263 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
291 int scaled_factor,
TO_BLOCK* input_block,
292 Pix* photo_mask_pix, Pix* thresholds_pix,
294 BLOCK_LIST* blocks, BLOBNBOX_LIST* diacritic_blobs,
295 TO_BLOCK_LIST* to_blocks) {
296 pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
303 pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_,
304 &projection_, diacritic_blobs, &part_grid_, &big_parts_);
307 input_block,
this, pixa_debug, &part_grid_,
312 input_block,
this, pixa_debug, &part_grid_,
319 ColPartition_IT p_it(&big_parts_);
320 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
321 p_it.data()->DisownBoxesNoAssert();
323 delete stroke_width_;
324 stroke_width_ = NULL;
350 ReflectForRtl(input_block, &image_bblobs_);
363 min_gutter_width_, tabfind_aligned_gap_fraction_,
364 &part_grid_, &deskew_, &reskew_);
368 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
369 denorm_ = new_denorm;
375 if (!MakeColumns(
false)) {
384 #ifndef GRAPHICS_DISABLED 389 #endif // GRAPHICS_DISABLED 395 GridSplitPartitions();
399 GridMergePartitions();
402 InsertRemainingNoise(input_block);
404 GridInsertHLinePartitions();
405 GridInsertVLinePartitions();
417 if (equation_detect_) {
432 GridRemoveUnderlinePartitions();
442 #ifndef GRAPHICS_DISABLED 445 if (window != NULL) {
454 #endif // GRAPHICS_DISABLED 460 ReleaseBlobsAndCleanupUnused(input_block);
468 TransformToBlocks(blocks, to_blocks);
470 tprintf(
"Found %d blocks, %d to_blocks\n",
471 blocks->length(), to_blocks->length());
474 DisplayBlocks(blocks);
475 RotateAndReskewBlocks(input_is_rtl, to_blocks);
477 #ifndef GRAPHICS_DISABLED 478 if (blocks_win_ != NULL) {
479 bool waiting =
false;
483 if (event->type ==
SVET_INPUT && event->parameter != NULL) {
484 if (*event->parameter ==
'd')
496 #endif // GRAPHICS_DISABLED 504 deskew->
set_y(-deskew->
y());
508 equation_detect_ = detect;
514 void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) {
515 #ifndef GRAPHICS_DISABLED 517 if (blocks_win_ == NULL)
520 blocks_win_->
Clear();
522 BLOCK_IT block_it(blocks);
524 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
525 block_it.forward()) {
526 BLOCK* block = block_it.data();
527 block->
plot(blocks_win_, serial++,
538 void ColumnFinder::DisplayColumnBounds(
PartSetVector* sets) {
539 #ifndef GRAPHICS_DISABLED 553 bool ColumnFinder::MakeColumns(
bool single_column) {
558 if (!single_column) {
563 bool good_only =
true;
569 if (column_candidate != NULL)
573 good_only = !good_only;
574 }
while (column_sets_.
empty() && !good_only);
576 PrintColumnCandidates(
"Column candidates");
578 ImproveColumnCandidates(&column_sets_, &column_sets_);
580 PrintColumnCandidates(
"Improved columns");
582 ImproveColumnCandidates(&part_sets, &column_sets_);
586 if (single_column_set != NULL) {
592 PrintColumnCandidates(
"Final Columns");
593 bool has_columns = !column_sets_.
empty();
596 bool any_multi_column = AssignColumns(part_sets);
598 DisplayColumnBounds(&part_sets);
600 ComputeMeanColumnGap(any_multi_column);
602 for (
int i = 0; i < part_sets.
size(); ++i) {
604 if (line_set != NULL) {
616 void ColumnFinder::ImproveColumnCandidates(
PartSetVector* src_sets,
619 temp_cols.
move(column_sets);
620 if (src_sets == column_sets)
621 src_sets = &temp_cols;
622 int set_size = temp_cols.
size();
624 bool good_only =
true;
626 for (
int i = 0; i < set_size; ++i) {
630 if (improved != NULL) {
635 good_only = !good_only;
636 }
while (column_sets->
empty() && !good_only);
637 if (column_sets->
empty())
638 column_sets->
move(&temp_cols);
644 void ColumnFinder::PrintColumnCandidates(
const char* title) {
645 int set_size = column_sets_.
size();
646 tprintf(
"Found %d %s:\n", set_size, title);
648 for (
int i = 0; i < set_size; ++i) {
664 bool ColumnFinder::AssignColumns(
const PartSetVector& part_sets) {
665 int set_count = part_sets.
size();
669 for (
int y = 0; y < set_count; ++y)
670 best_columns_[y] = NULL;
671 int column_count = column_sets_.
size();
681 bool* any_columns_possible =
new bool[set_count];
682 int* assigned_costs =
new int[set_count];
683 int** column_set_costs =
new int*[set_count];
686 for (
int part_i = 0; part_i < set_count; ++part_i) {
688 bool debug = line_set != NULL &&
691 column_set_costs[part_i] =
new int[column_count];
692 any_columns_possible[part_i] =
false;
694 for (
int col_i = 0; col_i < column_count; ++col_i) {
695 if (line_set != NULL &&
696 column_sets_.
get(col_i)->CompatibleColumns(debug, line_set,
698 column_set_costs[part_i][col_i] =
699 column_sets_.
get(col_i)->UnmatchedWidth(line_set);
700 any_columns_possible[part_i] =
true;
702 column_set_costs[part_i][col_i] =
MAX_INT32;
704 tprintf(
"Set id %d did not match at y=%d, lineset =%p\n",
705 col_i, part_i, line_set);
709 bool any_multi_column =
false;
713 while (BiggestUnassignedRange(set_count, any_columns_possible,
716 tprintf(
"Biggest unassigned range = %d- %d\n", start, end);
718 int column_set_id = RangeModalColumnSet(column_set_costs,
719 assigned_costs, start, end);
721 tprintf(
"Range modal column id = %d\n", column_set_id);
722 column_sets_.
get(column_set_id)->Print();
725 ShrinkRangeToLongestRun(column_set_costs, assigned_costs,
726 any_columns_possible,
727 column_set_id, &start, &end);
729 tprintf(
"Shrunk range = %d- %d\n", start, end);
733 ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
734 any_columns_possible,
735 column_set_id, -1, -1, &start);
737 ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
738 any_columns_possible,
739 column_set_id, 1, set_count, &end);
742 tprintf(
"Column id %d applies to range = %d - %d\n",
743 column_set_id, start, end);
745 AssignColumnToRange(column_set_id, start, end, column_set_costs,
747 if (column_sets_.
get(column_set_id)->GoodColumnCount() > 1)
748 any_multi_column =
true;
752 if (best_columns_[0] == NULL) {
753 AssignColumnToRange(0, 0,
gridheight_, column_set_costs, assigned_costs);
756 for (
int i = 0; i < set_count; ++i) {
757 delete [] column_set_costs[i];
759 delete [] assigned_costs;
760 delete [] any_columns_possible;
761 delete [] column_set_costs;
762 return any_multi_column;
767 bool ColumnFinder::BiggestUnassignedRange(
int set_count,
768 const bool* any_columns_possible,
769 int* best_start,
int* best_end) {
770 int best_range_size = 0;
771 *best_start = set_count;
772 *best_end = set_count;
774 for (
int start = 0; start <
gridheight_; start = end) {
776 while (start < set_count) {
777 if (best_columns_[start] == NULL && any_columns_possible[start])
784 while (end < set_count) {
785 if (best_columns_[end] != NULL)
787 if (any_columns_possible[end])
791 if (start < set_count && range_size > best_range_size) {
792 best_range_size = range_size;
797 return *best_start < *best_end;
801 int ColumnFinder::RangeModalColumnSet(
int** column_set_costs,
802 const int* assigned_costs,
803 int start,
int end) {
804 int column_count = column_sets_.
size();
805 STATS column_stats(0, column_count);
806 for (
int part_i = start; part_i < end; ++part_i) {
807 for (
int col_j = 0; col_j < column_count; ++col_j) {
808 if (column_set_costs[part_i][col_j] < assigned_costs[part_i])
809 column_stats.
add(col_j, 1);
813 return column_stats.
mode();
820 void ColumnFinder::ShrinkRangeToLongestRun(
int** column_set_costs,
821 const int* assigned_costs,
822 const bool* any_columns_possible,
824 int* best_start,
int* best_end) {
826 int orig_start = *best_start;
827 int orig_end = *best_end;
828 int best_range_size = 0;
829 *best_start = orig_end;
830 *best_end = orig_end;
832 for (
int start = orig_start; start < orig_end; start = end) {
834 while (start < orig_end) {
835 if (column_set_costs[start][column_set_id] < assigned_costs[start] ||
836 !any_columns_possible[start])
842 while (end < orig_end) {
843 if (column_set_costs[end][column_set_id] >= assigned_costs[start] &&
844 any_columns_possible[end])
848 if (start < orig_end && end - start > best_range_size) {
849 best_range_size = end - start;
859 void ColumnFinder::ExtendRangePastSmallGaps(
int** column_set_costs,
860 const int* assigned_costs,
861 const bool* any_columns_possible,
863 int step,
int end,
int* start) {
865 tprintf(
"Starting expansion at %d, step=%d, limit=%d\n",
870 int barrier_size = 0;
876 for (i = *start + step; i != end; i += step) {
877 if (column_set_costs[i][column_set_id] < assigned_costs[i])
880 if (any_columns_possible[i])
884 tprintf(
"At %d, Barrier size=%d\n", i, barrier_size);
885 if (barrier_size > kMaxIncompatibleColumnCount)
894 for (i += step; i != end; i += step) {
895 if (column_set_costs[i][column_set_id] < assigned_costs[i])
897 else if (any_columns_possible[i])
901 tprintf(
"At %d, good size = %d\n", i, good_size);
903 if (good_size >= barrier_size)
905 }
while (good_size >= barrier_size);
909 void ColumnFinder::AssignColumnToRange(
int column_set_id,
int start,
int end,
910 int** column_set_costs,
911 int* assigned_costs) {
913 for (
int i = start; i < end; ++i) {
914 assigned_costs[i] = column_set_costs[i][column_set_id];
915 best_columns_[i] = column_set;
920 void ColumnFinder::ComputeMeanColumnGap(
bool any_multi_column) {
924 int width_samples = 0;
932 mean_column_gap_ = any_multi_column && gap_samples > 0
933 ? total_gap / gap_samples : total_width / width_samples;
941 static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) {
942 for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) {
944 if (blob->
owner() == NULL) {
945 delete blob->
cblob();
954 void ColumnFinder::ReleaseBlobsAndCleanupUnused(
TO_BLOCK* block) {
955 ReleaseAllBlobsAndDeleteUnused(&block->
blobs);
956 ReleaseAllBlobsAndDeleteUnused(&block->
small_blobs);
957 ReleaseAllBlobsAndDeleteUnused(&block->
noise_blobs);
958 ReleaseAllBlobsAndDeleteUnused(&block->
large_blobs);
959 ReleaseAllBlobsAndDeleteUnused(&image_bblobs_);
963 void ColumnFinder::GridSplitPartitions() {
966 gsearch(&part_grid_);
986 if (last_col != first_col + 1)
989 int y = part->
MidY();
994 tprintf(
"Considering partition for GridSplit:");
1010 tprintf(
"Searching box (%d,%d)->(%d,%d)\n",
1012 margin_box.
right(), margin_box.
top());
1024 int x_middle = (margin_box.
left() + margin_box.
right()) / 2;
1026 tprintf(
"Splitting part at %d:", x_middle);
1030 if (split_part != NULL) {
1034 split_part->
Print();
1036 part_grid_.
InsertBBox(
true,
true, split_part);
1040 tprintf(
"Split had no effect\n");
1046 tprintf(
"Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n",
1055 void ColumnFinder::GridMergePartitions() {
1058 gsearch(&part_grid_);
1069 tprintf(
"Considering part for merge at:");
1072 int y = part->
MidY();
1075 if (left_column == NULL || right_column != left_column) {
1077 tprintf(
"In different columns\n");
1083 bool modified_box =
false;
1085 rsearch(&part_grid_);
1095 tprintf(
"Considering merge with neighbour at:");
1098 if (neighbour_box.
right() < box.
left() ||
1117 int h_gap =
MAX(part_box.
left(), neighbour_box.
left()) -
1119 if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction ||
1120 part_box.
width() < mean_column_gap_ ||
1121 neighbour_box.
width() < mean_column_gap_) {
1123 tprintf(
"Running grid-based merge between:\n");
1128 if (!modified_box) {
1132 modified_box =
true;
1136 tprintf(
"Neighbour failed hgap test\n");
1139 tprintf(
"Neighbour failed overlap or typesmatch test\n");
1157 void ColumnFinder::InsertRemainingNoise(
TO_BLOCK* block) {
1159 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1161 if (blob->
owner() != NULL)
continue;
1171 int best_distance = 0;
1177 if (best_part == NULL || distance < best_distance) {
1179 best_distance = distance;
1182 if (best_part != NULL &&
1183 best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) {
1186 tprintf(
"Adding noise blob with distance %d, thr=%g:box:",
1188 kMaxDistToPartSizeRatio * best_part->
median_size());
1195 part_grid_.
InsertBBox(
true,
true, best_part);
1213 if (top == bottom) {
1224 void ColumnFinder::GridRemoveUnderlinePartitions() {
1225 TabVector_IT hline_it(&horizontal_lines_);
1226 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
1230 TBOX line_box = BoxFromHLine(hline);
1231 TBOX search_box = line_box;
1237 bool touched_table =
false;
1238 bool touched_text =
false;
1242 touched_table =
true;
1247 if (line_box.
bottom() <= text_bottom && text_bottom <= search_box.
top())
1248 touched_text =
true;
1251 line_part = covered;
1254 if (line_part != NULL && !touched_table && touched_text) {
1262 void ColumnFinder::GridInsertHLinePartitions() {
1263 TabVector_IT hline_it(&horizontal_lines_);
1264 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
1266 TBOX line_box = BoxFromHLine(hline);
1271 bool any_image =
false;
1290 void ColumnFinder::GridInsertVLinePartitions() {
1292 for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) {
1299 if (left == right) {
1309 bool any_image =
false;
1329 void ColumnFinder::SetPartitionTypes() {
1331 gsearch(&part_grid_);
1341 void ColumnFinder::SmoothPartnerRuns() {
1344 gsearch(&part_grid_);
1349 if (partner != NULL) {
1351 tprintf(
"Ooops! Partition:(%d partners)",
1354 tprintf(
"has singleton partner:(%d partners",
1357 tprintf(
"but its singleton partner is:");
1374 void ColumnFinder::AddToTempPartList(
ColPartition* part,
1375 ColPartition_CLIST* temp_list) {
1376 int mid_y = part->
MidY();
1377 ColPartition_C_IT it(temp_list);
1378 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1386 int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1387 if (neighbour_y < mid_y)
1392 if (it.cycled_list()) {
1393 it.add_to_end(part);
1395 it.add_before_stay_put(part);
1400 void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list,
1401 WorkingPartSet_LIST* work_set) {
1402 ColPartition_C_IT it(temp_list);
1403 while (!it.empty()) {
1405 &good_parts_, work_set);
1411 void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks,
1412 TO_BLOCK_LIST* to_blocks) {
1413 WorkingPartSet_LIST work_set;
1415 ColPartition_IT noise_it(&noise_parts_);
1419 ColPartition_CLIST temp_part_list;
1422 gsearch(&part_grid_);
1424 int prev_grid_y = -1;
1427 int grid_y = gsearch.
GridY();
1428 if (grid_y != prev_grid_y) {
1429 EmptyTempPartList(&temp_part_list, &work_set);
1430 prev_grid_y = grid_y;
1432 if (best_columns_[grid_y] != column_set) {
1433 column_set = best_columns_[grid_y];
1437 &good_parts_, &work_set);
1439 tprintf(
"Changed column groups at grid index %d, y=%d\n",
1443 noise_it.add_to_end(part);
1445 AddToTempPartList(part, &temp_part_list);
1448 EmptyTempPartList(&temp_part_list, &work_set);
1450 WorkingPartSet_IT work_it(&work_set);
1451 while (!work_it.empty()) {
1454 &good_parts_, blocks, to_blocks);
1462 static void ReflectBlobList(BLOBNBOX_LIST* bblobs) {
1463 BLOBNBOX_IT it(bblobs);
1464 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1465 it.data()->reflect_box_in_y_axis();
1475 void ColumnFinder::ReflectForRtl(
TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) {
1476 ReflectBlobList(bblobs);
1477 ReflectBlobList(&input_block->
blobs);
1484 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f);
1485 denorm_ = new_denorm;
1491 static void RotateAndExplodeBlobList(
const FCOORD& blob_rotation,
1492 BLOBNBOX_LIST* bblobs,
1495 BLOBNBOX_IT it(bblobs);
1496 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1499 C_OUTLINE_LIST* outlines = cblob->
out_list();
1500 C_OUTLINE_IT ol_it(outlines);
1501 if (!outlines->singleton()) {
1504 for (;!ol_it.empty(); ol_it.forward()) {
1510 it.add_after_stay_put(new_blob);
1516 if (blob_rotation.
x() != 1.0f || blob_rotation.
y() != 0.0f) {
1517 cblob->
rotate(blob_rotation);
1538 void ColumnFinder::RotateAndReskewBlocks(
bool input_is_rtl,
1539 TO_BLOCK_LIST* blocks) {
1546 TO_BLOCK_IT it(blocks);
1547 int block_index = 1;
1548 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1562 FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block);
1567 RotateAndExplodeBlobList(blob_rotation, &to_block->
blobs,
1569 TO_ROW_IT row_it(to_block->
get_rows());
1570 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1571 TO_ROW* row = row_it.data();
1572 RotateAndExplodeBlobList(blob_rotation, row->
blob_list(),
1576 static_cast<int>(heights.
median() + 0.5));
1578 tprintf(
"Block median size = (%d, %d)\n",
1588 FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(
BLOCK* block) {
1597 FCOORD classify_rotation(text_rotation_);
1598 FCOORD block_rotation(1.0f, 0.0f);
1604 if (rerotate_.
x() == 0.0f)
1605 block_rotation = rerotate_;
1607 block_rotation =
FCOORD(0.0f, -1.0f);
1608 block->
rotate(block_rotation);
1609 classify_rotation =
FCOORD(1.0f, 0.0f);
1611 block_rotation.
rotate(rotation_);
1615 FCOORD blob_rotation(block_rotation);
1616 block_rotation.
set_y(-block_rotation.
y());
1620 tprintf(
"Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:",
1623 classify_rotation.
x(), classify_rotation.
y());
1626 return blob_rotation;
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
ColPartition * ColumnContaining(int x, int y)
bool PSM_SPARSE(int pageseg_mode)
bool textord_debug_printable
C_OUTLINE_LIST * out_list()
void rotate(const FCOORD &rotation)
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
bool textord_tabfind_show_columns
bool overlap(const TBOX &box) const
bool MakeColPartSets(PartSetVector *part_sets)
const TBOX & bounding_box() const
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap, int *gap_samples)
void DisplayBoxes(ScrollView *window)
void ReSetAndReFilterBlobs()
ColPartition_CLIST * upper_partners()
Pix * ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block)
const DENORM * predecessor() const
void set_right_to_left(bool value)
void SetBlockRuleEdges(TO_BLOCK *block)
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
bool VSignificantCoreOverlap(const ColPartition &other) const
static BLOBNBOX * RealBlob(C_OUTLINE *outline)
void SmoothPartnerRun(int working_set_count)
void rotate(const FCOORD vec)
void plot_graded_blobs(ScrollView *to_win)
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
bool textord_tabfind_find_tables
BLOBNBOX_LIST large_blobs
void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback *width_cb, const FCOORD &reskew)
void DeleteUnknownParts(TO_BLOCK *block)
ScrollView * DisplayTabVectors(ScrollView *tab_win)
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
bool IsUnMergeableType() const
ScrollView * MakeWindow(int x, int y, const char *window_name)
void SetEquationDetect(EquationDetectBase *detect)
void ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
const ICOORD & bleft() const
ColPartition * GetColumnByIndex(int index)
inT16 x() const
access function
bool intersects_other_lines() const
void move(GenericVector< T > *from)
const ICOORD & median_size() const
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
tesseract::ColPartition * owner() const
ColPartition_CLIST * lower_partners()
bool textord_tabfind_show_blocks
#define BOOL_VAR(name, val, comment)
void AddBox(BLOBNBOX *box)
bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
bool textord_tabfind_show_initial_partitions
int RightAtY(int y) const
void set_y(float yin)
rewrite function
ColPartitionSet * Copy(bool good_only)
void FindFigureCaptions()
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
void AssertNoDuplicates()
void RepositionIterator()
void set_region_type(BlobRegionType new_type)
virtual int FindEquationParts(ColPartitionGrid *part_grid, ColPartitionSet **best_columns)=0
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
static void FindImagePartitions(Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
void SetUniqueMode(bool mode)
void set_classify_rotation(const FCOORD &rotation)
void set_owner(tesseract::ColPartition *new_owner)
const double kMinGutterWidthGrid
void GetDeskewVectors(FCOORD *deskew, FCOORD *reskew)
void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block)
void SetPartitionType(int resolution, ColPartitionSet *columns)
const ICOORD & startpt() const
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
void DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win)
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
ColPartition * SplitAt(int split_x)
BLOBNBOX_LIST small_blobs
int textord_debug_tabfind
void set_type(PolyBlockType t)
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
int textord_tabfind_show_partitions
const TBOX & bounding_box() const
const int kMaxIncompatibleColumnCount
inT16 y() const
access_function
bool WithinSameMargins(const ColPartition &other) const
const ICOORD & tright() const
int DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part, const DENORM *denorm, bool debug) const
#define INT_VAR(name, val, comment)
void ImproveColumnCandidate(WidthCallback *cb, PartSetVector *src_sets)
void InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block)
void DeleteUnownedNoise()
const double kHorizontalGapMergeFraction
void pad(int xpad, int ypad)
bool contains(const FCOORD pt) const
PolyBlockType type() const
void RefinePartitionPartners(bool get_desperate)
void set_re_rotation(const FCOORD &rotation)
static bool WithinTestRegion(int detail_level, int x, int y)
bool HOverlaps(const ColPartition &other) const
ColPartitionSet * MakeSingleColumnSet(WidthCallback *cb)
void add(inT32 value, inT32 count)
void set_index(int value)
const ICOORD & endpt() const
void set_flow(BlobTextFlowType value)
POLY_BLOCK * poly_block() const
void TidyBlobs(TO_BLOCK *block)
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
void reflect_polygon_in_y_axis()
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
void FindPartitionPartners()
void plot(ScrollView *window, inT32 serial, ScrollView::Color colour)
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, Pix *image_mask)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
void Init(int grid_size, const ICOORD &bottom_left, const ICOORD &top_right)
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
ColPartition * SingletonPartner(bool upper)
void RemoveBBox(BBC *bbox)
PolyBlockType isA() const
FCOORD re_rotation() const
const double kMaxDistToPartSizeRatio
void Absorb(ColPartition *other, WidthCallback *cb)
ColumnFinder(int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines, TabVector_LIST *hlines, int vertical_x, int vertical_y)
void CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
int median_bottom() const
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
void rotate(const FCOORD &rotation)
bool textord_tabfind_show_reject_blobs
void StartRectSearch(const TBOX &rect)
void set_left_to_right_language(bool order)
bool right_to_left() const
BLOBNBOX_LIST * blob_list()
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
BLOBNBOX_LIST noise_blobs
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
void delete_data_pointers()
TabVector_LIST * dead_vectors()
WidthCallback * WidthCB()
const TBOX & bounding_box() const
void set_resolution(int resolution)
void ReTypeBlobs(BLOBNBOX_LIST *im_blobs)
void SetTabStops(TabFind *tabgrid)
void compute_bounding_box()
void set_median_size(int x, int y)
BlobTextFlowType flow() const
BlobRegionType blob_type() const
bool TypesMatch(const ColPartition &other) const
int FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
static ColPartition * MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
bool LegalColumnCandidate()
void GridFindMargins(ColPartitionSet **best_columns)
SVEvent * AwaitEvent(SVEventType type)
void set_skew(const FCOORD &skew)
void RemoveLineResidue(ColPartition_LIST *big_part_list)
void AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback *cb)