tesseract  4.00.00dev
renderer.h
Go to the documentation of this file.
1 // File: renderer.h
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifndef TESSERACT_API_RENDERER_H_
19 #define TESSERACT_API_RENDERER_H_
20 
21 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
22 // complexity of includes here. Use forward declarations wherever possible
23 // and hide includes of complex types in baseapi.cpp.
24 #include "genericvector.h"
25 #include "platform.h"
26 #include "publictypes.h"
27 
28 namespace tesseract {
29 
30 class TessBaseAPI;
31 
46  public:
47  virtual ~TessResultRenderer();
48 
49  // Takes ownership of pointer so must be new'd instance.
50  // Renderers aren't ordered, but appends the sequences of next parameter
51  // and existing next(). The renderers should be unique across both lists.
52  void insert(TessResultRenderer* next);
53 
54  // Returns the next renderer or NULL.
55  TessResultRenderer* next() { return next_; }
56 
62  bool BeginDocument(const char* title);
63 
72  bool AddImage(TessBaseAPI* api);
73 
78  bool EndDocument();
79 
80  const char* file_extension() const { return file_extension_; }
81  const char* title() const { return title_.c_str(); }
82 
92  int imagenum() const { return imagenum_; }
93 
94  protected:
105  TessResultRenderer(const char *outputbase,
106  const char* extension);
107 
108  // Hook for specialized handling in BeginDocument()
109  virtual bool BeginDocumentHandler();
110 
111  // This must be overriden to render the OCR'd results
112  virtual bool AddImageHandler(TessBaseAPI* api) = 0;
113 
114  // Hook for specialized handling in EndDocument()
115  virtual bool EndDocumentHandler();
116 
117  // Renderers can call this to append '\0' terminated strings into
118  // the output string returned by GetOutput.
119  // This method will grow the output buffer if needed.
120  void AppendString(const char* s);
121 
122  // Renderers can call this to append binary byte sequences into
123  // the output string returned by GetOutput. Note that s is not necessarily
124  // '\0' terminated (and can contain '\0' within it).
125  // This method will grow the output buffer if needed.
126  void AppendData(const char* s, int len);
127 
128  private:
129  const char* file_extension_; // standard extension for generated output
130  STRING title_; // title of document being renderered
131  int imagenum_; // index of last image added
132 
133  FILE* fout_; // output file pointer
134  TessResultRenderer* next_; // Can link multiple renderers together
135  bool happy_; // I get grumpy when the disk fills up, etc.
136 };
137 
142  public:
143  explicit TessTextRenderer(const char *outputbase);
144 
145  protected:
146  virtual bool AddImageHandler(TessBaseAPI* api);
147 };
148 
153  public:
154  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
155  explicit TessHOcrRenderer(const char *outputbase);
156 
157  protected:
158  virtual bool BeginDocumentHandler();
159  virtual bool AddImageHandler(TessBaseAPI* api);
160  virtual bool EndDocumentHandler();
161 
162  private:
163  bool font_info_; // whether to print font information
164 };
165 
170  public:
171  explicit TessTsvRenderer(const char* outputbase, bool font_info);
172  explicit TessTsvRenderer(const char* outputbase);
173 
174  protected:
175  virtual bool BeginDocumentHandler();
176  virtual bool AddImageHandler(TessBaseAPI* api);
177  virtual bool EndDocumentHandler();
178 
179  private:
180  bool font_info_; // whether to print font information
181 };
182 
187  public:
188  // datadir is the location of the TESSDATA. We need it because
189  // we load a custom PDF font from this location.
190  TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly);
191 
192  protected:
193  virtual bool BeginDocumentHandler();
194  virtual bool AddImageHandler(TessBaseAPI* api);
195  virtual bool EndDocumentHandler();
196 
197  private:
198  // We don't want to have every image in memory at once,
199  // so we store some metadata as we go along producing
200  // PDFs one page at a time. At the end, that metadata is
201  // used to make everything that isn't easily handled in a
202  // streaming fashion.
203  long int obj_; // counter for PDF objects
204  GenericVector<long int> offsets_; // offset of every PDF object in bytes
205  GenericVector<long int> pages_; // object number for every /Page object
206  const char *datadir_; // where to find the custom font
207  bool textonly_; // skip images if set
208  // Bookkeeping only. DIY = Do It Yourself.
209  void AppendPDFObjectDIY(size_t objectsize);
210  // Bookkeeping + emit data.
211  void AppendPDFObject(const char *data);
212  // Create the /Contents object for an entire page.
213  char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
214  // Turn an image into a PDF object. Only transcode if we have to.
215  static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
216  char **pdf_object, long int *pdf_object_size);
217 };
218 
219 
224  public:
225  explicit TessUnlvRenderer(const char *outputbase);
226 
227  protected:
228  virtual bool AddImageHandler(TessBaseAPI* api);
229 };
230 
235  public:
236  explicit TessBoxTextRenderer(const char *outputbase);
237 
238  protected:
239  virtual bool AddImageHandler(TessBaseAPI* api);
240 };
241 
246  public:
247  explicit TessOsdRenderer(const char* outputbase);
248 
249  protected:
250  virtual bool AddImageHandler(TessBaseAPI* api);
251 };
252 
253 } // namespace tesseract.
254 
255 #endif // TESSERACT_API_RENDERER_H_
struct TessUnlvRenderer TessUnlvRenderer
Definition: capi.h:81
struct TessPDFRenderer TessPDFRenderer
Definition: capi.h:80
#define TESS_API
Definition: platform.h:81
struct TessBaseAPI TessBaseAPI
Definition: capi.h:83
struct TessTextRenderer TessTextRenderer
Definition: capi.h:78
struct TessHOcrRenderer TessHOcrRenderer
Definition: capi.h:79
Definition: strngs.h:45
const char * file_extension() const
Definition: renderer.h:80
const char * filename
Definition: ioapi.h:38
void insert(LIST list, void *node)
Definition: oldlist.cpp:215
TessResultRenderer * next()
Definition: renderer.h:55
struct TessResultRenderer TessResultRenderer
Definition: capi.h:77
struct TessBoxTextRenderer TessBoxTextRenderer
Definition: capi.h:82
const char * title() const
Definition: renderer.h:81