tesseract  4.00.00dev
tesseractmain.cpp
Go to the documentation of this file.
1 /**********************************************************************
2 * File: tessedit.cpp (Formerly tessedit.c)
3 * Description: Main program for merge of tess and editor.
4 * Author: Ray Smith
5 * Created: Tue Jan 07 15:21:46 GMT 1992
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include <iostream>
26 
27 #include "allheaders.h"
28 #include "baseapi.h"
29 #include "basedir.h"
30 #include "dict.h"
31 #include "openclwrapper.h"
32 #include "osdetect.h"
33 #include "renderer.h"
34 #include "simddetect.h"
35 #include "strngs.h"
36 #include "tprintf.h"
37 
38 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
39 
40 #include <tiffio.h>
41 
42 static void Win32WarningHandler(const char* module, const char* fmt,
43  va_list ap) {
44  if (module != NULL) {
45  fprintf(stderr, "%s: ", module);
46  }
47  fprintf(stderr, "Warning, ");
48  vfprintf(stderr, fmt, ap);
49  fprintf(stderr, ".\n");
50 }
51 
52 #endif /* HAVE_TIFFIO_H && _WIN32 */
53 
55  char* versionStrP;
56 
57  printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
58 
59  versionStrP = getLeptonicaVersion();
60  printf(" %s\n", versionStrP);
61  lept_free(versionStrP);
62 
63  versionStrP = getImagelibVersions();
64  printf(" %s\n", versionStrP);
65  lept_free(versionStrP);
66 
67 #ifdef USE_OPENCL
68  cl_platform_id platform[4];
69  cl_uint num_platforms;
70 
71  printf(" OpenCL info:\n");
72  if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
73  printf(" Found %u platform(s).\n", num_platforms);
74  for (unsigned n = 0; n < num_platforms; n++) {
75  char info[256];
76  if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) ==
77  CL_SUCCESS) {
78  printf(" Platform %u name: %s.\n", n + 1, info);
79  }
80  if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) ==
81  CL_SUCCESS) {
82  printf(" Version: %s.\n", info);
83  }
84  cl_device_id devices[2];
85  cl_uint num_devices;
86  if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices,
87  &num_devices) == CL_SUCCESS) {
88  printf(" Found %u device(s).\n", num_devices);
89  for (unsigned i = 0; i < num_devices; ++i) {
90  if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) ==
91  CL_SUCCESS) {
92  printf(" Device %u name: %s.\n", i + 1, info);
93  }
94  }
95  }
96  }
97  }
98 #endif
99  if (SIMDDetect::IsAVXAvailable()) printf(" Found AVX\n");
100  if (SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n");
101 }
102 
103 void PrintUsage(const char* program) {
104  printf(
105  "Usage:\n"
106  " %s --help | --help-psm | --help-oem | --version\n"
107  " %s --list-langs [--tessdata-dir PATH]\n"
108  " %s --print-parameters [options...] [configfile...]\n"
109  " %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
110  program, program, program, program);
111 }
112 
114  const char* msg =
115  "Page segmentation modes:\n"
116  " 0 Orientation and script detection (OSD) only.\n"
117  " 1 Automatic page segmentation with OSD.\n"
118  " 2 Automatic page segmentation, but no OSD, or OCR.\n"
119  " 3 Fully automatic page segmentation, but no OSD. (Default)\n"
120  " 4 Assume a single column of text of variable sizes.\n"
121  " 5 Assume a single uniform block of vertically aligned text.\n"
122  " 6 Assume a single uniform block of text.\n"
123  " 7 Treat the image as a single text line.\n"
124  " 8 Treat the image as a single word.\n"
125  " 9 Treat the image as a single word in a circle.\n"
126  " 10 Treat the image as a single character.\n"
127  " 11 Sparse text. Find as much text as possible in no"
128  " particular order.\n"
129  " 12 Sparse text with OSD.\n"
130  " 13 Raw line. Treat the image as a single text line,\n"
131  "\t\t\tbypassing hacks that are Tesseract-specific.\n";
132 
133  printf("%s", msg);
134 }
135 
137  const char* msg =
138  "OCR Engine modes:\n"
139  " 0 Original Tesseract only.\n"
140  " 1 Neural nets LSTM only.\n"
141  " 2 Tesseract + LSTM.\n"
142  " 3 Default, based on what is available.\n";
143 
144  printf("%s", msg);
145 }
146 
147 void PrintHelpMessage(const char* program) {
148  PrintUsage(program);
149 
150  const char* ocr_options =
151  "OCR options:\n"
152  " --tessdata-dir PATH Specify the location of tessdata path.\n"
153  " --user-words PATH Specify the location of user words file.\n"
154  " --user-patterns PATH Specify the location of user patterns file.\n"
155  " -l LANG[+LANG] Specify language(s) used for OCR.\n"
156  " -c VAR=VALUE Set value for config variables.\n"
157  " Multiple -c arguments are allowed.\n"
158  " --psm NUM Specify page segmentation mode.\n"
159  " --oem NUM Specify OCR Engine mode.\n"
160  "NOTE: These options must occur before any configfile.\n";
161 
162  printf("\n%s\n", ocr_options);
163  PrintHelpForPSM();
164  PrintHelpForOEM();
165 
166  const char* single_options =
167  "Single options:\n"
168  " -h, --help Show this help message.\n"
169  " --help-psm Show page segmentation modes.\n"
170  " --help-oem Show OCR Engine modes.\n"
171  " -v, --version Show version information.\n"
172  " --list-langs List available languages for tesseract engine.\n"
173  " --print-parameters Print tesseract parameters.\n";
174 
175  printf("\n%s", single_options);
176 }
177 
179  char** argv) {
180  char opt1[256], opt2[255];
181  for (int i = 0; i < argc; i++) {
182  if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
183  strncpy(opt1, argv[i + 1], 255);
184  opt1[255] = '\0';
185  char* p = strchr(opt1, '=');
186  if (!p) {
187  fprintf(stderr, "Missing = in configvar assignment\n");
188  exit(1);
189  }
190  *p = 0;
191  strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
192  opt2[254] = 0;
193  ++i;
194 
195  if (!api->SetVariable(opt1, opt2)) {
196  fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
197  }
198  }
199  }
200 }
201 
203  GenericVector<STRING> languages;
204  api->GetAvailableLanguagesAsVector(&languages);
205  printf("List of available languages (%d):\n", languages.size());
206  for (int index = 0; index < languages.size(); ++index) {
207  STRING& string = languages[index];
208  printf("%s\n", string.string());
209  }
210  api->End();
211 }
212 
213 void PrintBanner() {
214  tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
216 }
217 
233  tesseract::PageSegMode pagesegmode) {
235  api->SetPageSegMode(pagesegmode);
236 }
237 
238 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
239 void ParseArgs(const int argc, char** argv, const char** lang,
240  const char** image, const char** outputbase,
241  const char** datapath, bool* list_langs, bool* print_parameters,
242  GenericVector<STRING>* vars_vec,
243  GenericVector<STRING>* vars_values, int* arg_i,
244  tesseract::PageSegMode* pagesegmode,
245  tesseract::OcrEngineMode* enginemode) {
246  if (argc == 1) {
247  PrintHelpMessage(argv[0]);
248  exit(0);
249  }
250 
251  if (argc == 2) {
252  if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
253  PrintHelpMessage(argv[0]);
254  exit(0);
255  }
256  if ((strcmp(argv[1], "--help-psm") == 0)) {
257  PrintHelpForPSM();
258  exit(0);
259  }
260  if ((strcmp(argv[1], "--help-oem") == 0)) {
261  PrintHelpForOEM();
262  exit(0);
263  }
264  if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) {
266  exit(0);
267  }
268  }
269 
270  bool noocr = false;
271  int i = 1;
272  while (i < argc && (*outputbase == NULL || argv[i][0] == '-')) {
273  if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
274  *lang = argv[i + 1];
275  ++i;
276  } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
277  *datapath = argv[i + 1];
278  ++i;
279  } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
280  vars_vec->push_back("user_words_file");
281  vars_values->push_back(argv[i + 1]);
282  ++i;
283  } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
284  vars_vec->push_back("user_patterns_file");
285  vars_values->push_back(argv[i + 1]);
286  ++i;
287  } else if (strcmp(argv[i], "--list-langs") == 0) {
288  noocr = true;
289  *list_langs = true;
290  } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
291  // The parameter -psm is deprecated and was replaced by --psm.
292  // It is still supported for compatibility reasons.
293  *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
294  ++i;
295  } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
296  *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
297  ++i;
298  } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
299  *enginemode = static_cast<tesseract::OcrEngineMode>(atoi(argv[i + 1]));
300  ++i;
301  } else if (strcmp(argv[i], "--print-parameters") == 0) {
302  noocr = true;
303  *print_parameters = true;
304  } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
305  // handled properly after api init
306  ++i;
307  } else if (*image == NULL) {
308  *image = argv[i];
309  } else if (*outputbase == NULL) {
310  *outputbase = argv[i];
311  }
312  ++i;
313  }
314 
315  *arg_i = i;
316 
317  if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
318  *list_langs = true;
319  noocr = true;
320  }
321 
322  if (*outputbase == NULL && noocr == false) {
323  PrintHelpMessage(argv[0]);
324  exit(1);
325  }
326 }
327 
331  tesseract::PageSegMode pagesegmode, const char* outputbase) {
332  if (pagesegmode == tesseract::PSM_OSD_ONLY) {
333  renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
334  } else {
335  bool b;
336  api->GetBoolVariable("tessedit_create_hocr", &b);
337  if (b) {
338  bool font_info;
339  api->GetBoolVariable("hocr_font_info", &font_info);
340  renderers->push_back(
341  new tesseract::TessHOcrRenderer(outputbase, font_info));
342  }
343 
344  api->GetBoolVariable("tessedit_create_tsv", &b);
345  if (b) {
346  bool font_info;
347  api->GetBoolVariable("hocr_font_info", &font_info);
348  renderers->push_back(
349  new tesseract::TessTsvRenderer(outputbase, font_info));
350  }
351 
352  api->GetBoolVariable("tessedit_create_pdf", &b);
353  if (b) {
354  bool textonly;
355  api->GetBoolVariable("textonly_pdf", &textonly);
356  renderers->push_back(new tesseract::TessPDFRenderer(
357  outputbase, api->GetDatapath(), textonly));
358  }
359 
360  api->GetBoolVariable("tessedit_write_unlv", &b);
361  if (b) {
362  renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
363  }
364 
365  api->GetBoolVariable("tessedit_create_boxfile", &b);
366  if (b) {
367  renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
368  }
369 
370  api->GetBoolVariable("tessedit_create_txt", &b);
371  if (b || renderers->empty()) {
372  renderers->push_back(new tesseract::TessTextRenderer(outputbase));
373  }
374  }
375 
376  if (!renderers->empty()) {
377  // Since the PointerVector auto-deletes, null-out the renderers that are
378  // added to the root, and leave the root in the vector.
379  for (int r = 1; r < renderers->size(); ++r) {
380  (*renderers)[0]->insert((*renderers)[r]);
381  (*renderers)[r] = NULL;
382  }
383  }
384 }
385 
386 /**********************************************************************
387  * main()
388  *
389  **********************************************************************/
390 
391 int main(int argc, char** argv) {
392  const char* lang = "eng";
393  const char* image = NULL;
394  const char* outputbase = NULL;
395  const char* datapath = NULL;
396  bool list_langs = false;
397  bool print_parameters = false;
398  int arg_i = 1;
401  /* main() calls functions like ParseArgs which call exit().
402  * This results in memory leaks if vars_vec and vars_values are
403  * declared as auto variables (destructor is not called then). */
404  static GenericVector<STRING> vars_vec;
405  static GenericVector<STRING> vars_values;
406 
407 #ifdef NDEBUG
408  // Disable debugging and informational messages from Leptonica.
409  setMsgSeverity(L_SEVERITY_ERROR);
410 #endif
411 
412 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
413  /* Show libtiff warnings on console (not in GUI). */
414  TIFFSetWarningHandler(Win32WarningHandler);
415 #endif /* HAVE_TIFFIO_H && _WIN32 */
416 
417  ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
418  &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
419  &enginemode);
420 
421  bool banner = false;
422  if (outputbase != NULL && strcmp(outputbase, "-") &&
423  strcmp(outputbase, "stdout")) {
424  banner = true;
425  }
426 
427  PERF_COUNT_START("Tesseract:main")
428 
429  // Call GlobalDawgCache here to create the global DawgCache object before
430  // the TessBaseAPI object. This fixes the order of destructor calls:
431  // first TessBaseAPI must be destructed, DawgCache must be the last object.
433 
434  // Avoid memory leak caused by auto variable when exit() is called.
436 
437  api.SetOutputName(outputbase);
438 
439  int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
440  argc - arg_i, &vars_vec, &vars_values, false);
441  if (init_failed) {
442  fprintf(stderr, "Could not initialize tesseract.\n");
443  return EXIT_FAILURE;
444  }
445 
446  SetVariablesFromCLArgs(&api, argc, argv);
447 
448  if (list_langs) {
449  PrintLangsList(&api);
450  return EXIT_SUCCESS;
451  }
452 
453  if (print_parameters) {
454  FILE* fout = stdout;
455  fprintf(stdout, "Tesseract parameters:\n");
456  api.PrintVariables(fout);
457  api.End();
458  return EXIT_SUCCESS;
459  }
460 
461  FixPageSegMode(&api, pagesegmode);
462 
463  if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
464  int ret_val = EXIT_SUCCESS;
465 
466  Pix* pixs = pixRead(image);
467  if (!pixs) {
468  fprintf(stderr, "Cannot open input file: %s\n", image);
469  return 2;
470  }
471 
472  api.SetImage(pixs);
473 
474  tesseract::Orientation orientation;
477  float deskew_angle;
478 
480  if (it) {
481  it->Orientation(&orientation, &direction, &order, &deskew_angle);
482  tprintf(
483  "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
484  "Deskew angle: %.4f\n",
485  orientation, direction, order, deskew_angle);
486  } else {
487  ret_val = EXIT_FAILURE;
488  }
489 
490  delete it;
491 
492  pixDestroy(&pixs);
493  return ret_val;
494  }
495 
496  // set in_training_mode to true when using one of these configs:
497  // ambigs.train, box.train, box.train.stderr, linebox, rebox
498  bool b = false;
499  bool in_training_mode =
500  (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
501  (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
502  (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
503 
504  // Avoid memory leak caused by auto variable when exit() is called.
506 
507  if (in_training_mode) {
508  renderers.push_back(NULL);
509  } else {
510  PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
511  }
512 
513  if (!renderers.empty()) {
514  if (banner) PrintBanner();
515  bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);
516  if (!succeed) {
517  fprintf(stderr, "Error during processing.\n");
518  return EXIT_FAILURE;
519  }
520  }
521 
523 
524  return EXIT_SUCCESS;
525 }
void PrintLangsList(tesseract::TessBaseAPI *api)
#define PERF_COUNT_START(FUNCT_NAME)
static const char * Version()
Definition: baseapi.cpp:144
void PreloadRenderers(tesseract::TessBaseAPI *api, tesseract::PointerVector< tesseract::TessResultRenderer > *renderers, tesseract::PageSegMode pagesegmode, const char *outputbase)
void PrintHelpForOEM()
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:278
void SetVariablesFromCLArgs(tesseract::TessBaseAPI *api, int argc, char **argv)
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:218
int push_back(T object)
#define tprintf(...)
Definition: tprintf.h:31
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:160
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
void GetAvailableLanguagesAsVector(GenericVector< STRING > *langs) const
Definition: baseapi.cpp:391
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:787
bool empty() const
Definition: genericvector.h:90
int size() const
Definition: genericvector.h:72
void PrintHelpForPSM()
static bool IsAVXAvailable()
Definition: simddetect.h:26
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1036
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:238
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:155
void FixPageSegMode(tesseract::TessBaseAPI *api, tesseract::PageSegMode pagesegmode)
Definition: strngs.h:45
void PrintHelpMessage(const char *program)
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:545
int main(int argc, char **argv)
void PrintBanner()
#define PERF_COUNT_END
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:482
void SetOutputName(const char *name)
Definition: baseapi.cpp:211
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:489
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:266
const char * GetDatapath()
Definition: baseapi.cpp:930
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:156
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:198
static bool IsSSEAvailable()
Definition: simddetect.h:28
Orientation and script detection only.
Definition: publictypes.h:152
void PrintVersionInfo()
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
void PrintUsage(const char *program)
void ParseArgs(const int argc, char **argv, const char **lang, const char **image, const char **outputbase, const char **datapath, bool *list_langs, bool *print_parameters, GenericVector< STRING > *vars_vec, GenericVector< STRING > *vars_values, int *arg_i, tesseract::PageSegMode *pagesegmode, tesseract::OcrEngineMode *enginemode)