tesseract  4.00.00dev
renderer.cpp
Go to the documentation of this file.
1 // File: renderer.cpp
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config_auto.h"
20 #endif
21 
22 #include <memory> // std::unique_ptr
23 #include <string.h>
24 #include "baseapi.h"
25 #include "genericvector.h"
26 #include "renderer.h"
27 
28 namespace tesseract {
29 
30 /**********************************************************************
31  * Base Renderer interface implementation
32  **********************************************************************/
34  const char* extension)
35  : file_extension_(extension),
36  title_(""), imagenum_(-1),
37  fout_(stdout),
38  next_(NULL),
39  happy_(true) {
40  if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
41  STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
42  fout_ = fopen(outfile.string(), "wb");
43  if (fout_ == NULL) {
44  happy_ = false;
45  }
46  }
47 }
48 
50  if (fout_ != nullptr) {
51  if (fout_ != stdout)
52  fclose(fout_);
53  else
54  clearerr(fout_);
55  }
56  delete next_;
57 }
58 
60  if (next == NULL) return;
61 
62  TessResultRenderer* remainder = next_;
63  next_ = next;
64  if (remainder) {
65  while (next->next_ != NULL) {
66  next = next->next_;
67  }
68  next->next_ = remainder;
69  }
70 }
71 
73  if (!happy_) return false;
74  title_ = title;
75  imagenum_ = -1;
76  bool ok = BeginDocumentHandler();
77  if (next_) {
78  ok = next_->BeginDocument(title) && ok;
79  }
80  return ok;
81 }
82 
84  if (!happy_) return false;
85  ++imagenum_;
86  bool ok = AddImageHandler(api);
87  if (next_) {
88  ok = next_->AddImage(api) && ok;
89  }
90  return ok;
91 }
92 
94  if (!happy_) return false;
95  bool ok = EndDocumentHandler();
96  if (next_) {
97  ok = next_->EndDocument() && ok;
98  }
99  return ok;
100 }
101 
102 void TessResultRenderer::AppendString(const char* s) {
103  AppendData(s, strlen(s));
104 }
105 
106 void TessResultRenderer::AppendData(const char* s, int len) {
107  int n = fwrite(s, 1, len, fout_);
108  if (n != len) happy_ = false;
109 }
110 
112  return happy_;
113 }
114 
116  return happy_;
117 }
118 
119 
120 /**********************************************************************
121  * UTF8 Text Renderer interface implementation
122  **********************************************************************/
123 TessTextRenderer::TessTextRenderer(const char *outputbase)
124  : TessResultRenderer(outputbase, "txt") {
125 }
126 
128  const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
129  if (utf8 == NULL) {
130  return false;
131  }
132 
133  AppendString(utf8.get());
134 
135  bool pageBreak = false;
136  api->GetBoolVariable("include_page_breaks", &pageBreak);
137  const char* pageSeparator = api->GetStringVariable("page_separator");
138  if (pageBreak) {
139  AppendString(pageSeparator);
140  }
141 
142  return true;
143 }
144 
145 /**********************************************************************
146  * HOcr Text Renderer interface implementation
147  **********************************************************************/
148 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
149  : TessResultRenderer(outputbase, "hocr") {
150  font_info_ = false;
151 }
152 
153 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
154  : TessResultRenderer(outputbase, "hocr") {
155  font_info_ = font_info;
156 }
157 
159  AppendString(
160  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
161  "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
162  " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
163  "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
164  "lang=\"en\">\n <head>\n <title>");
165  AppendString(title());
166  AppendString(
167  "</title>\n"
168  "<meta http-equiv=\"Content-Type\" content=\"text/html;"
169  "charset=utf-8\" />\n"
170  " <meta name='ocr-system' content='tesseract " TESSERACT_VERSION_STR
171  "' />\n"
172  " <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
173  " ocr_line ocrx_word");
174  if (font_info_)
175  AppendString(
176  " ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
177  AppendString(
178  "'/>\n"
179  "</head>\n<body>\n");
180 
181  return true;
182 }
183 
185  AppendString(" </body>\n</html>\n");
186 
187  return true;
188 }
189 
191  const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
192  if (hocr == NULL) return false;
193 
194  AppendString(hocr.get());
195 
196  return true;
197 }
198 
199 /**********************************************************************
200  * TSV Text Renderer interface implementation
201  **********************************************************************/
202 TessTsvRenderer::TessTsvRenderer(const char* outputbase)
203  : TessResultRenderer(outputbase, "tsv") {
204  font_info_ = false;
205 }
206 
207 TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
208  : TessResultRenderer(outputbase, "tsv") {
209  font_info_ = font_info;
210 }
211 
213  // Output TSV column headings
214  AppendString(
215  "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
216  "num\tleft\ttop\twidth\theight\tconf\ttext\n");
217  return true;
218 }
219 
220 bool TessTsvRenderer::EndDocumentHandler() { return true; }
221 
223  const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
224  if (tsv == NULL) return false;
225 
226  AppendString(tsv.get());
227 
228  return true;
229 }
230 
231 /**********************************************************************
232  * UNLV Text Renderer interface implementation
233  **********************************************************************/
234 TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
235  : TessResultRenderer(outputbase, "unlv") {
236 }
237 
239  const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
240  if (unlv == NULL) return false;
241 
242  AppendString(unlv.get());
243 
244  return true;
245 }
246 
247 /**********************************************************************
248  * BoxText Renderer interface implementation
249  **********************************************************************/
251  : TessResultRenderer(outputbase, "box") {
252 }
253 
255  const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
256  if (text == NULL) return false;
257 
258  AppendString(text.get());
259 
260  return true;
261 }
262 
263 /**********************************************************************
264  * Osd Text Renderer interface implementation
265  **********************************************************************/
266 TessOsdRenderer::TessOsdRenderer(const char* outputbase)
267  : TessResultRenderer(outputbase, "osd") {}
268 
270  char* osd = api->GetOsdText(imagenum());
271  if (osd == NULL) return false;
272 
273  AppendString(osd);
274  delete[] osd;
275 
276  return true;
277 }
278 
279 } // namespace tesseract
virtual bool EndDocumentHandler()
Definition: renderer.cpp:184
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:158
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:254
TessBoxTextRenderer(const char *outputbase)
Definition: renderer.cpp:250
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:212
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:269
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:190
char * GetOsdText(int page_number)
Definition: baseapi.cpp:1904
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
Definition: baseapi.cpp:1411
char * GetTSVText(int page_number)
Definition: baseapi.cpp:1582
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:238
const char * string() const
Definition: strngs.cpp:198
virtual bool EndDocumentHandler()
Definition: renderer.cpp:220
TessTsvRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:207
void insert(TessResultRenderer *next)
Definition: renderer.cpp:59
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:238
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:246
TessTextRenderer(const char *outputbase)
Definition: renderer.cpp:123
TessResultRenderer(const char *outputbase, const char *extension)
Definition: renderer.cpp:33
Definition: strngs.h:45
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:111
void AppendData(const char *s, int len)
Definition: renderer.cpp:106
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:127
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:222
char * GetBoxText(int page_number)
Definition: baseapi.cpp:1709
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:83
TessUnlvRenderer(const char *outputbase)
Definition: renderer.cpp:234
TessResultRenderer * next()
Definition: renderer.h:55
virtual bool AddImageHandler(TessBaseAPI *api)=0
bool BeginDocument(const char *title)
Definition: renderer.cpp:72
#define TESSERACT_VERSION_STR
Definition: baseapi.h:23
TessOsdRenderer(const char *outputbase)
Definition: renderer.cpp:266
TessHOcrRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:153
void AppendString(const char *s)
Definition: renderer.cpp:102
virtual bool EndDocumentHandler()
Definition: renderer.cpp:115
const char * title() const
Definition: renderer.h:81