28 #pragma warning(disable:4244)  // Conversion warnings    33 #include "config_auto.h"    36 #include "allheaders.h"    63 static Pix* RemoveEnclosingCircle(Pix* pixs) {
    64   Pix* pixsi = pixInvert(NULL, pixs);
    65   Pix* pixc = pixCreateTemplate(pixs);
    66   pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
    67   pixSeedfillBinary(pixc, pixc, pixsi, 4);
    68   pixInvert(pixc, pixc);
    70   Pix* pixt = pixAnd(NULL, pixs, pixc);
    72   pixCountConnComp(pixt, 8, &max_count);
    78     pixErodeBrick(pixc, pixc, 3, 3);
    79     pixt = pixAnd(NULL, pixs, pixc);
    81     pixCountConnComp(pixt, 8, &count);
    82     if (i == 1 || count > max_count) {
    85     } 
else if (i > 1 && count < min_count) {
    88       pixout = pixCopy(NULL, pixt);  
    89     } 
else if (count >= min_count) {
   106   int width = pixGetWidth(pix_binary_);
   107   int height = pixGetHeight(pix_binary_);
   113       input_file != NULL && input_file->
length() > 0) {
   114     STRING name = *input_file;
   115     const char* lastdot = strrchr(name.
string(), 
'.');
   117       name[lastdot - name.
string()] = 
'\0';
   120   if (blocks->empty()) {
   123     BLOCK_IT block_it(blocks);
   126     block_it.add_to_end(block);
   137   BLOBNBOX_LIST diacritic_blobs;
   138   int auto_page_seg_ret_val = 0;
   139   TO_BLOCK_LIST to_blocks;
   143         pageseg_mode, blocks, &to_blocks,
   146       return auto_page_seg_ret_val;
   150     deskew_ = 
FCOORD(1.0f, 0.0f);
   151     reskew_ = 
FCOORD(1.0f, 0.0f);
   153       Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
   154       if (pixcleaned != NULL) {
   155         pixDestroy(&pix_binary_);
   156         pix_binary_ = pixcleaned;
   161   if (auto_page_seg_ret_val < 0) {
   165   if (blocks->empty()) {
   174   textord_.
TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
   175                        pix_thresholds_, pix_grey_, splitting || cjk_mode,
   176                        &diacritic_blobs, blocks, &to_blocks);
   177   return auto_page_seg_ret_val;
   205                            TO_BLOCK_LIST* to_blocks,
   206                            BLOBNBOX_LIST* diacritic_blobs, 
Tesseract* osd_tess,
   208   Pix* photomask_pix = NULL;
   209   Pix* musicmask_pix = NULL;
   211   BLOCK_LIST found_blocks;
   212   TO_BLOCK_LIST temp_blocks;
   215       pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
   218   if (finder != NULL) {
   219     TO_BLOCK_IT to_block_it(&temp_blocks);
   220     TO_BLOCK* to_block = to_block_it.data();
   221     if (musicmask_pix != NULL) {
   224       pixOr(photomask_pix, photomask_pix, musicmask_pix);
   229     result = finder->
FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
   230                                 to_block, photomask_pix, pix_thresholds_,
   231                                 pix_grey_, &pixa_debug_, &found_blocks,
   232                                 diacritic_blobs, to_blocks);
   237   pixDestroy(&photomask_pix);
   238   pixDestroy(&musicmask_pix);
   239   if (result < 0) 
return result;
   242   BLOCK_IT block_it(blocks);
   244   block_it.add_list_after(&found_blocks);
   250 static void AddAllScriptsConverted(
const UNICHARSET& sid_set,
   276     OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
   277     Pix** music_mask_pix) {
   280   TabVector_LIST v_lines;
   281   TabVector_LIST h_lines;
   286     pixa_debug_.
AddPix(pix_binary_, 
"PageSegInput");
   291                                  &vertical_x, &vertical_y, music_mask_pix,
   294     pixa_debug_.
AddPix(pix_binary_, 
"NoLines");
   299     pixa_debug_.
AddPix(pix_binary_, 
"NoImages");
   306   TO_BLOCK_IT to_block_it(to_blocks);
   310   TO_BLOCK* to_block = to_block_it.data();
   311   TBOX blkbox = to_block->block->bounding_box();
   314   if (to_block->line_size >= 2) {
   315     finder = 
new ColumnFinder(static_cast<int>(to_block->line_size),
   319                               &v_lines, &h_lines, vertical_x, vertical_y);
   327     BLOBNBOX_CLIST osd_blobs;
   332     int osd_orientation = 0;
   339                                           to_block, &osd_blobs);
   341     if (
PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) {
   343       if (osd_tess != 
this) {
   347         for (
int s = 0; s < sub_langs_.size(); ++s) {
   348           AddAllScriptsConverted(sub_langs_[s]->
unicharset,
   360       for (
int i = 0; i < 4; ++i) {
   361         if (i != osd_orientation &&
   367       const char* best_script_str =
   372           strcmp(
"Japanese", best_script_str) == 0 ||
   373           strcmp(
"Korean", best_script_str) == 0 ||
   374           strcmp(
"Hangul", best_script_str) == 0;
   380         if (!cjk && !vertical_text && osd_orientation == 2) {
   382           tprintf(
"OSD: Weak margin (%.2f), horiz textlines, not CJK: "   383                   "Don't rotate.\n", osd_margin);
   387               "OSD: Weak margin (%.2f) for %d blob text block, "   388               "but using orientation anyway: %d\n",
   389               osd_margin, osd_blobs.length(), osd_orientation);
   393     osd_blobs.shallow_clear();
 double min_orientation_margin
 
bool PSM_SPARSE(int pageseg_mode)
 
bool read_unlv_file(STRING name, inT32 xsize, inT32 ysize, BLOCK_LIST *blocks)
 
double textord_tabfind_vertical_text_ratio
 
void set_right_to_left(bool value)
 
bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode)
 
int pageseg_devanagari_split_strategy
 
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
bool PSM_OSD_ENABLED(int pageseg_mode)
 
int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr)
 
void SetEquationDetect(EquationDetectBase *detect)
 
ColumnFinder * SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, Pix **music_mask_pix)
 
const char * get_script_from_script_id(int id) const
 
void set_cjk_script(bool is_cjk)
 
bool textord_tabfind_show_vlines
 
Assume a single uniform block of text. (Default.) 
 
int LabelSpecialText(TO_BLOCK *to_block)
 
const char * string() const
 
int tessedit_pageseg_mode
 
bool tessedit_dump_pageseg_images
 
bool textord_use_cjk_fp_model
 
bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
 
static Pix * FindImages(Pix *pix, DebugPixa *pixa_debug)
 
bool textord_tabfind_vertical_text
 
void GetDeskewVectors(FCOORD *deskew, FCOORD *reskew)
 
void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block)
 
int get_script_table_size() const
 
int textord_debug_tabfind
 
bool enable_noise_removal
 
Treat the image as a single word in a circle. 
 
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
const ICOORD & topright() const
 
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
 
const int kMaxCircleErosions
 
int get_script_id_from_name(const char *script_name) const
 
void CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
 
void AddPix(const Pix *pix, const char *caption)
 
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
 
const ICOORD & botleft() const
 
int os_detect_blobs(const GenericVector< int > *allowed_scripts, BLOBNBOX_CLIST *blob_list, OSResults *osr, tesseract::Tesseract *tess)
 
static void FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *vertical_x, int *vertical_y, Pix **pix_music_mask, TabVector_LIST *v_lines, TabVector_LIST *h_lines)
 
bool right_to_left() const
 
Orientation and script detection only. 
 
double textord_tabfind_aligned_gap_fraction
 
bool PSM_ORIENTATION_ENABLED(int pageseg_mode)
 
int FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
 
bool textord_tabfind_force_vertical_text