tesseract  4.00.00dev
tesseract-c_api-demo.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3 
4 # Copyright 2012 Zdenko Podobný
5 # Author: Zdenko Podobný
6 #
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
10 #
11 # http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
18 
19 """
20 Simple python demo script of tesseract-ocr 3.02 c-api
21 """
22 
23 import os
24 import sys
25 import ctypes
26 
27 # Demo variables
28 lang = "eng"
29 filename = "../phototest.tif"
30 libpath = "/usr/local/lib64/"
31 libpath_w = "../vs2010/DLL_Release/"
32 TESSDATA_PREFIX = os.environ.get('TESSDATA_PREFIX')
33 if not TESSDATA_PREFIX:
34  TESSDATA_PREFIX = "../"
35 
36 if sys.platform == "win32":
37  libname = libpath_w + "libtesseract302.dll"
38  libname_alt = "libtesseract302.dll"
39  os.environ["PATH"] += os.pathsep + libpath_w
40 else:
41  libname = libpath + "libtesseract.so.3.0.2"
42  libname_alt = "libtesseract.so.3"
43 
44 try:
45  tesseract = ctypes.cdll.LoadLibrary(libname)
46 except:
47  try:
48  tesseract = ctypes.cdll.LoadLibrary(libname_alt)
49  except WindowsError, err:
50  print("Trying to load '%s'..." % libname)
51  print("Trying to load '%s'..." % libname_alt)
52  print(err)
53  exit(1)
54 
55 tesseract.TessVersion.restype = ctypes.c_char_p
56 tesseract_version = tesseract.TessVersion()[:4]
57 
58 # We need to check library version because libtesseract.so.3 is symlink
59 # and can point to other version than 3.02
60 if float(tesseract_version) < 3.02:
61  print("Found tesseract-ocr library version %s." % tesseract_version)
62  print("C-API is present only in version 3.02!")
63  exit(2)
64 
65 api = tesseract.TessBaseAPICreate()
66 rc = tesseract.TessBaseAPIInit3(api, TESSDATA_PREFIX, lang);
67 if (rc):
68  tesseract.TessBaseAPIDelete(api)
69  print("Could not initialize tesseract.\n")
70  exit(3)
71 
72 text_out = tesseract.TessBaseAPIProcessPages(api, filename, None , 0);
73 result_text = ctypes.string_at(text_out)
74 print result_text