tesseract  4.00.00dev
text2image.cpp File Reference
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <iostream>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "allheaders.h"
#include "boxchar.h"
#include "commandlineflags.h"
#include "degradeimage.h"
#include "errcode.h"
#include "fileio.h"
#include "helpers.h"
#include "normstrngs.h"
#include "stringrenderer.h"
#include "tlog.h"
#include "unicharset.h"
#include "util.h"

Go to the source code of this file.

Classes

struct  tesseract::SpacingProperties
 

Namespaces

 tesseract
 

Functions

 STRING_PARAM_FLAG (text, "", "File name of text input to process")
 
 STRING_PARAM_FLAG (outputbase, "", "Basename for output image/box file")
 
 BOOL_PARAM_FLAG (degrade_image, true, "Degrade rendered image with speckle noise, dilation/erosion " "and rotation")
 
 BOOL_PARAM_FLAG (rotate_image, true, "Rotate the image in a random way.")
 
 INT_PARAM_FLAG (exposure, 0, "Exposure level in photocopier")
 
 INT_PARAM_FLAG (resolution, 300, "Pixels per inch")
 
 INT_PARAM_FLAG (xsize, 3600, "Width of output image")
 
 INT_PARAM_FLAG (ysize, 4800, "Height of output image")
 
 INT_PARAM_FLAG (margin, 100, "Margin round edges of image")
 
 INT_PARAM_FLAG (ptsize, 12, "Size of printed text")
 
 DOUBLE_PARAM_FLAG (char_spacing, 0, "Inter-character space in ems")
 
 DOUBLE_PARAM_FLAG (underline_start_prob, 0, "Fraction of words to underline (value in [0,1])")
 
 DOUBLE_PARAM_FLAG (underline_continuation_prob, 0, "Fraction of words to underline (value in [0,1])")
 
 INT_PARAM_FLAG (leading, 12, "Inter-line space (in pixels)")
 
 STRING_PARAM_FLAG (writing_mode, "horizontal", "Specify one of the following writing" " modes.\ " 'horizontal' :Render regular horizontal text.(default)\" " 'vertical' :Render vertical text. Glyph orientation is" " selected by Pango.\" " 'vertical-upright' :Render vertical text. Glyph " " orientation is set to be upright.")
 
 INT_PARAM_FLAG (box_padding, 0, "Padding around produced bounding boxes")
 
 BOOL_PARAM_FLAG (strip_unrenderable_words, true, "Remove unrenderable words from source text")
 
 STRING_PARAM_FLAG (font, "Arial", "Font description name to use")
 
 BOOL_PARAM_FLAG (ligatures, false, "Rebuild and render ligatures")
 
 BOOL_PARAM_FLAG (find_fonts, false, "Search for all fonts that can render the text")
 
 BOOL_PARAM_FLAG (render_per_font, true, "If find_fonts==true, render each font to its own image. " "Image filenames are of the form output_name.font_name.tif")
 
 DOUBLE_PARAM_FLAG (min_coverage, 1.0, "If find_fonts==true, the minimum coverage the font has of " "the characters in the text file to include it, between " "0 and 1.")
 
 BOOL_PARAM_FLAG (list_available_fonts, false, "List available fonts and quit.")
 
 BOOL_PARAM_FLAG (render_ngrams, false, "Put each space-separated entity from the" " input file into one bounding box. The ngrams in the input" " file will be randomly permuted before rendering (so that" " there is sufficient variety of characters on each line).")
 
 BOOL_PARAM_FLAG (output_word_boxes, false, "Output word bounding boxes instead of character boxes. " "This is used for Cube training, and implied by " "--render_ngrams.")
 
 STRING_PARAM_FLAG (unicharset_file, "", "File with characters in the unicharset. If --render_ngrams" " is true and --unicharset_file is specified, ngrams with" " characters that are not in unicharset will be omitted")
 
 BOOL_PARAM_FLAG (bidirectional_rotation, false, "Rotate the generated characters both ways.")
 
 BOOL_PARAM_FLAG (only_extract_font_properties, false, "Assumes that the input file contains a list of ngrams. Renders" " each ngram, extracts spacing properties and records them in" " output_base/[font_name].fontinfo file.")
 
 BOOL_PARAM_FLAG (output_individual_glyph_images, false, "If true also outputs individual character images")
 
 INT_PARAM_FLAG (glyph_resized_size, 0, "Each glyph is square with this side length in pixels")
 
 INT_PARAM_FLAG (glyph_num_border_pixels_to_pad, 0, "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad")
 
void tesseract::ExtractFontProperties (const string &utf8_text, StringRenderer *render, const string &output_base)
 
bool tesseract::MakeIndividualGlyphs (Pix *pix, const std::vector< BoxChar *> &vbox, const int input_tiff_page)
 
int main (int argc, char **argv)
 

Variables

const int kRandomSeed = 0x18273645
 

Function Documentation

◆ BOOL_PARAM_FLAG() [1/12]

BOOL_PARAM_FLAG ( degrade_image  ,
true  ,
"Degrade rendered image with speckle  noise,
dilation/erosion " "and rotation"   
)

◆ BOOL_PARAM_FLAG() [2/12]

BOOL_PARAM_FLAG ( rotate_image  ,
true  ,
"Rotate the image in a random way."   
)

◆ BOOL_PARAM_FLAG() [3/12]

BOOL_PARAM_FLAG ( strip_unrenderable_words  ,
true  ,
"Remove unrenderable words from source text"   
)

◆ BOOL_PARAM_FLAG() [4/12]

BOOL_PARAM_FLAG ( ligatures  ,
false  ,
"Rebuild and render ligatures"   
)

◆ BOOL_PARAM_FLAG() [5/12]

BOOL_PARAM_FLAG ( find_fonts  ,
false  ,
"Search for all fonts that can render the text"   
)

◆ BOOL_PARAM_FLAG() [6/12]

BOOL_PARAM_FLAG ( render_per_font  ,
true  ,
"If  find_fonts = =true,
render each font to its own image. " "Image filenames are of the form output_name.font_name.tif"   
)

◆ BOOL_PARAM_FLAG() [7/12]

BOOL_PARAM_FLAG ( list_available_fonts  ,
false  ,
"List available fonts and quit."   
)

◆ BOOL_PARAM_FLAG() [8/12]

BOOL_PARAM_FLAG ( render_ngrams  ,
false  ,
"Put each space-separated entity from the" " input file into one bounding box. The ngrams in the input" " file will be randomly permuted before rendering (so that" " there is sufficient variety of characters on each line)."   
)

◆ BOOL_PARAM_FLAG() [9/12]

BOOL_PARAM_FLAG ( output_word_boxes  ,
false  ,
"Output word bounding boxes instead of character boxes. " "This is used for Cube  training,
and implied by " "--render_ngrams."   
)

◆ BOOL_PARAM_FLAG() [10/12]

BOOL_PARAM_FLAG ( bidirectional_rotation  ,
false  ,
"Rotate the generated characters both ways."   
)

◆ BOOL_PARAM_FLAG() [11/12]

BOOL_PARAM_FLAG ( only_extract_font_properties  ,
false  ,
"Assumes that the input file contains a list of ngrams. Renders" " each  ngram,
extracts spacing properties and records them in" " output_base/.fontinfo file."  [font_name] 
)

◆ BOOL_PARAM_FLAG() [12/12]

BOOL_PARAM_FLAG ( output_individual_glyph_images  ,
false  ,
"If true also outputs individual character images"   
)

◆ DOUBLE_PARAM_FLAG() [1/4]

DOUBLE_PARAM_FLAG ( char_spacing  ,
,
"Inter-character space in ems"   
)

◆ DOUBLE_PARAM_FLAG() [2/4]

DOUBLE_PARAM_FLAG ( underline_start_prob  ,
,
"Fraction of words to underline (value in [0,1])"   
)

◆ DOUBLE_PARAM_FLAG() [3/4]

DOUBLE_PARAM_FLAG ( underline_continuation_prob  ,
,
"Fraction of words to underline (value in [0,1])"   
)

◆ DOUBLE_PARAM_FLAG() [4/4]

DOUBLE_PARAM_FLAG ( min_coverage  ,
1.  0,
"If  find_fonts = =true,
the minimum coverage the font has of " "the characters in the text file to include  it,
between " "0 and 1."   
)

◆ INT_PARAM_FLAG() [1/10]

INT_PARAM_FLAG ( exposure  ,
,
"Exposure level in photocopier"   
)

◆ INT_PARAM_FLAG() [2/10]

INT_PARAM_FLAG ( resolution  ,
300  ,
"Pixels per inch"   
)

◆ INT_PARAM_FLAG() [3/10]

INT_PARAM_FLAG ( xsize  ,
3600  ,
"Width of output image"   
)

◆ INT_PARAM_FLAG() [4/10]

INT_PARAM_FLAG ( ysize  ,
4800  ,
"Height of output image"   
)

◆ INT_PARAM_FLAG() [5/10]

INT_PARAM_FLAG ( margin  ,
100  ,
"Margin round edges of image"   
)

◆ INT_PARAM_FLAG() [6/10]

INT_PARAM_FLAG ( ptsize  ,
12  ,
"Size of printed text"   
)

◆ INT_PARAM_FLAG() [7/10]

INT_PARAM_FLAG ( leading  ,
12  ,
"Inter-line space (in pixels)"   
)

◆ INT_PARAM_FLAG() [8/10]

INT_PARAM_FLAG ( box_padding  ,
,
"Padding around produced bounding boxes"   
)

◆ INT_PARAM_FLAG() [9/10]

INT_PARAM_FLAG ( glyph_resized_size  ,
,
"Each glyph is square with this side length in pixels"   
)

◆ INT_PARAM_FLAG() [10/10]

INT_PARAM_FLAG ( glyph_num_border_pixels_to_pad  ,
 
)

◆ main()

int main ( int  argc,
char **  argv 
)

This program reads in a text file consisting of feature samples from a training page in the following format:

   FontName UTF8-char-str xmin ymin xmax ymax page-number
    NumberOfFeatureTypes(N)
      FeatureTypeName1 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      FeatureTypeName2 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      ...
      FeatureTypeNameN NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
   FontName CharName ...

The result of this program is a binary inttemp file used by the OCR engine.

Parameters
argcnumber of command line arguments
argvarray of command line arguments
Returns
none
Note
Exceptions: none
History: Fri Aug 18 08:56:17 1989, DSJ, Created.
History: Mon May 18 1998, Christy Russson, Revistion started.

Definition at line 418 of file text2image.cpp.

418  {
419  tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
420 
421  if (FLAGS_list_available_fonts) {
422  const std::vector<string>& all_fonts = FontUtils::ListAvailableFonts();
423  for (unsigned int i = 0; i < all_fonts.size(); ++i) {
424  printf("%3u: %s\n", i, all_fonts[i].c_str());
425  ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
426  "Font %s is unrecognized.\n", all_fonts[i].c_str());
427  }
428  return EXIT_SUCCESS;
429  }
430 
431  // Check validity of input flags.
432  if (FLAGS_text.empty()) {
433  tprintf("'--text' option is missing!\n");
434  exit(1);
435  }
436  if (FLAGS_outputbase.empty()) {
437  tprintf("'--outputbase' option is missing!\n");
438  exit(1);
439  }
440  if (!FLAGS_unicharset_file.empty() && FLAGS_render_ngrams) {
441  tprintf("Use '--unicharset_file' only if '--render_ngrams' is set.\n");
442  exit(1);
443  }
444 
445  if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(FLAGS_font.c_str())) {
446  string pango_name;
447  if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) {
448  tprintf("Could not find font named %s.\n", FLAGS_font.c_str());
449  if (!pango_name.empty()) {
450  tprintf("Pango suggested font %s.\n", pango_name.c_str());
451  }
452  tprintf("Please correct --font arg.\n");
453  exit(1);
454  }
455  }
456 
457  if (FLAGS_render_ngrams)
458  FLAGS_output_word_boxes = true;
459 
460  char font_desc_name[1024];
461  snprintf(font_desc_name, 1024, "%s %d", FLAGS_font.c_str(),
462  static_cast<int>(FLAGS_ptsize));
463  StringRenderer render(font_desc_name, FLAGS_xsize, FLAGS_ysize);
464  render.set_add_ligatures(FLAGS_ligatures);
465  render.set_leading(FLAGS_leading);
466  render.set_resolution(FLAGS_resolution);
467  render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize);
468  render.set_h_margin(FLAGS_margin);
469  render.set_v_margin(FLAGS_margin);
470  render.set_output_word_boxes(FLAGS_output_word_boxes);
471  render.set_box_padding(FLAGS_box_padding);
472  render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words);
473  render.set_underline_start_prob(FLAGS_underline_start_prob);
474  render.set_underline_continuation_prob(FLAGS_underline_continuation_prob);
475 
476  // Set text rendering orientation and their forms.
477  if (FLAGS_writing_mode == "horizontal") {
478  // Render regular horizontal text (default).
479  render.set_vertical_text(false);
480  render.set_gravity_hint_strong(false);
481  render.set_render_fullwidth_latin(false);
482  } else if (FLAGS_writing_mode == "vertical") {
483  // Render vertical text. Glyph orientation is selected by Pango.
484  render.set_vertical_text(true);
485  render.set_gravity_hint_strong(false);
486  render.set_render_fullwidth_latin(false);
487  } else if (FLAGS_writing_mode == "vertical-upright") {
488  // Render vertical text. Glyph orientation is set to be upright.
489  // Also Basic Latin characters are converted to their fullwidth forms
490  // on rendering, since fullwidth Latin characters are well designed to fit
491  // vertical text lines, while .box files store halfwidth Basic Latin
492  // unichars.
493  render.set_vertical_text(true);
494  render.set_gravity_hint_strong(true);
495  render.set_render_fullwidth_latin(true);
496  } else {
497  tprintf("Invalid writing mode: %s\n", FLAGS_writing_mode.c_str());
498  exit(1);
499  }
500 
501  string src_utf8;
502  // This c_str is NOT redundant!
503  if (!File::ReadFileToString(FLAGS_text.c_str(), &src_utf8)) {
504  tprintf("Failed to read file: %s\n", FLAGS_text.c_str());
505  exit(1);
506  }
507 
508  // Remove the unicode mark if present.
509  if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) {
510  src_utf8.erase(0, 3);
511  }
512  tlog(1, "Render string of size %d\n", src_utf8.length());
513 
514  if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
515  // Try to preserve behavior of old text2image by expanding inter-word
516  // spaces by a factor of 4.
517  const string kSeparator = FLAGS_render_ngrams ? " " : " ";
518  // Also restrict the number of characters per line to try and avoid
519  // line-breaking in the middle of words like "-A", "R$" etc. which are
520  // otherwise allowed by the standard unicode line-breaking rules.
521  const unsigned int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
522  string rand_utf8;
523  UNICHARSET unicharset;
524  if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
525  !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) {
526  tprintf("Failed to load unicharset from file %s\n",
527  FLAGS_unicharset_file.c_str());
528  exit(1);
529  }
530 
531  // If we are rendering ngrams that will be OCRed later, shuffle them so that
532  // tesseract does not have difficulties finding correct baseline, word
533  // spaces, etc.
534  const char *str8 = src_utf8.c_str();
535  int len = src_utf8.length();
536  int step;
537  std::vector<std::pair<int, int> > offsets;
538  int offset = SpanUTF8Whitespace(str8);
539  while (offset < len) {
540  step = SpanUTF8NotWhitespace(str8 + offset);
541  offsets.push_back(std::make_pair(offset, step));
542  offset += step;
543  offset += SpanUTF8Whitespace(str8 + offset);
544  }
545  if (FLAGS_render_ngrams)
546  std::random_shuffle(offsets.begin(), offsets.end());
547 
548  for (size_t i = 0, line = 1; i < offsets.size(); ++i) {
549  const char *curr_pos = str8 + offsets[i].first;
550  int ngram_len = offsets[i].second;
551  // Skip words that contain characters not in found in unicharset.
552  if (!FLAGS_unicharset_file.empty() &&
553  !unicharset.encodable_string(curr_pos, nullptr)) {
554  continue;
555  }
556  rand_utf8.append(curr_pos, ngram_len);
557  if (rand_utf8.length() > line * kCharsPerLine) {
558  rand_utf8.append(" \n");
559  ++line;
560  if (line & 0x1) rand_utf8.append(kSeparator);
561  } else {
562  rand_utf8.append(kSeparator);
563  }
564  }
565  tlog(1, "Rendered ngram string of size %d\n", rand_utf8.length());
566  src_utf8.swap(rand_utf8);
567  }
568  if (FLAGS_only_extract_font_properties) {
569  tprintf("Extracting font properties only\n");
570  ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str());
571  tprintf("Done!\n");
572  return 0;
573  }
574 
575  int im = 0;
576  std::vector<float> page_rotation;
577  const char* to_render_utf8 = src_utf8.c_str();
578 
579  tesseract::TRand randomizer;
580  randomizer.set_seed(kRandomSeed);
581  std::vector<string> font_names;
582  // We use a two pass mechanism to rotate images in both direction.
583  // The first pass(0) will rotate the images in random directions and
584  // the second pass(1) will mirror those rotations.
585  int num_pass = FLAGS_bidirectional_rotation ? 2 : 1;
586  for (int pass = 0; pass < num_pass; ++pass) {
587  int page_num = 0;
588  string font_used;
589  for (size_t offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) {
590  tlog(1, "Starting page %d\n", im);
591  Pix* pix = nullptr;
592  if (FLAGS_find_fonts) {
593  offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
594  to_render_utf8 + offset,
595  strlen(to_render_utf8 + offset),
596  &font_used, &pix);
597  } else {
598  offset += render.RenderToImage(to_render_utf8 + offset,
599  strlen(to_render_utf8 + offset), &pix);
600  }
601  if (pix != nullptr) {
602  float rotation = 0;
603  if (pass == 1) {
604  // Pass 2, do mirror rotation.
605  rotation = -1 * page_rotation[page_num];
606  }
607  if (FLAGS_degrade_image) {
608  pix = DegradeImage(pix, FLAGS_exposure, &randomizer,
609  FLAGS_rotate_image ? &rotation : nullptr);
610  }
611  render.RotatePageBoxes(rotation);
612 
613  if (pass == 0) {
614  // Pass 1, rotate randomly and store the rotation..
615  page_rotation.push_back(rotation);
616  }
617 
618  Pix* gray_pix = pixConvertTo8(pix, false);
619  pixDestroy(&pix);
620  Pix* binary = pixThresholdToBinary(gray_pix, 128);
621  pixDestroy(&gray_pix);
622  char tiff_name[1024];
623  if (FLAGS_find_fonts) {
624  if (FLAGS_render_per_font) {
625  string fontname_for_file = tesseract::StringReplace(
626  font_used, " ", "_");
627  snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(),
628  fontname_for_file.c_str());
629  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w");
630  tprintf("Rendered page %d to file %s\n", im, tiff_name);
631  } else {
632  font_names.push_back(font_used);
633  }
634  } else {
635  snprintf(tiff_name, 1024, "%s.tif", FLAGS_outputbase.c_str());
636  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ? "w" : "a");
637  tprintf("Rendered page %d to file %s\n", im, tiff_name);
638  }
639  // Make individual glyphs
640  if (FLAGS_output_individual_glyph_images) {
641  if (!MakeIndividualGlyphs(binary, render.GetBoxes(), im)) {
642  tprintf("ERROR: Individual glyphs not saved\n");
643  }
644  }
645  pixDestroy(&binary);
646  }
647  if (FLAGS_find_fonts && offset != 0) {
648  // We just want a list of names, or some sample images so we don't need
649  // to render more than the first page of the text.
650  break;
651  }
652  }
653  }
654  if (!FLAGS_find_fonts) {
655  string box_name = FLAGS_outputbase.c_str();
656  box_name += ".box";
657  render.WriteAllBoxes(box_name);
658  } else if (!FLAGS_render_per_font && !font_names.empty()) {
659  string filename = FLAGS_outputbase.c_str();
660  filename += ".fontlist.txt";
661  FILE* fp = fopen(filename.c_str(), "wb");
662  if (fp == nullptr) {
663  tprintf("Failed to create output font list %s\n", filename.c_str());
664  } else {
665  for (size_t i = 0; i < font_names.size(); ++i) {
666  fprintf(fp, "%s\n", font_names[i].c_str());
667  }
668  fclose(fp);
669  }
670  }
671 
672  return 0;
673 }
int SpanUTF8NotWhitespace(const char *text)
Definition: normstrngs.cpp:205
#define tlog(level,...)
Definition: tlog.h:33
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:90
void ExtractFontProperties(const string &utf8_text, StringRenderer *render, const string &output_base)
Definition: text2image.cpp:213
bool MakeIndividualGlyphs(Pix *pix, const std::vector< BoxChar *> &vbox, const int input_tiff_page)
Definition: text2image.cpp:313
#define tprintf(...)
Definition: tprintf.h:31
voidpf uLong offset
Definition: ioapi.h:42
bool encodable_string(const char *str, int *first_bad_position) const
Definition: unicharset.cpp:222
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:348
Pix * DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
const char * filename
Definition: ioapi.h:38
const int kRandomSeed
Definition: text2image.cpp:60
void set_seed(uinT64 seed)
Definition: helpers.h:45
int SpanUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:194
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)

◆ STRING_PARAM_FLAG() [1/5]

STRING_PARAM_FLAG ( text  ,
""  ,
"File name of text input to process"   
)

◆ STRING_PARAM_FLAG() [2/5]

STRING_PARAM_FLAG ( outputbase  ,
""  ,
"Basename for output image/box file"   
)

◆ STRING_PARAM_FLAG() [3/5]

STRING_PARAM_FLAG ( writing_mode  ,
"horizontal"  ,
"Specify one of the following writing" " modes.\ 'horizontal' :Render regular horizontal text.(default)\ " 'vertical' :Render vertical text. Glyph orientation is" " selected by Pango.\" " 'vertical-upright' :Render vertical text. Glyph " " orientation is set to be upright."   
)

◆ STRING_PARAM_FLAG() [4/5]

STRING_PARAM_FLAG ( font  ,
"Arial"  ,
"Font description name to use"   
)

◆ STRING_PARAM_FLAG() [5/5]

STRING_PARAM_FLAG ( unicharset_file  ,
""  ,
"File with characters in the unicharset. If --render_ngrams" " is true and --unicharset_file is  specified,
ngrams with" " characters that are not in unicharset will be omitted"   
)

Variable Documentation

◆ kRandomSeed

const int kRandomSeed = 0x18273645

Definition at line 60 of file text2image.cpp.