tesseract  4.00.00dev
boxchar.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: boxchar.h
3  * Description: Simple class to associate a Tesseract classification unit with
4  * its bounding box so that the boxes can be rotated as the image
5  * is rotated for degradation. Also includes routines to output
6  * the character-tagged boxes to a boxfile.
7  * Author: Ray Smith
8  * Created: Mon Nov 18 2013
9  *
10  * (C) Copyright 2013, Google Inc.
11  * Licensed under the Apache License, Version 2.0 (the "License");
12  * you may not use this file except in compliance with the License.
13  * You may obtain a copy of the License at
14  * http://www.apache.org/licenses/LICENSE-2.0
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  **********************************************************************/
22 
23 #ifndef TESSERACT_TRAINING_BOXCHAR_H_
24 #define TESSERACT_TRAINING_BOXCHAR_H_
25 
26 #include <string>
27 #include <vector>
28 
29 #include "allheaders.h" // from Leptonica
30 
31 #ifdef USE_STD_NAMESPACE
32 using std::string;
33 using std::vector;
34 #endif
35 
36 struct Box;
37 
38 namespace tesseract {
39 
40 class BoxChar {
41  public:
42  BoxChar(const char* utf8_str, int len);
43 
44  ~BoxChar();
45 
46  // Accessors.
47  const string& ch() const { return ch_; }
48  const Box* box() const { return box_; }
49  const int& page() const { return page_; }
50 
51 
52  // Set the box_ member.
53  void AddBox(int x, int y, int width, int height);
54 
55  void set_page(int page) { page_ = page; }
56 
57  string* mutable_ch() { return &ch_; }
58  Box* mutable_box() { return box_; }
59 
60  // Sort function for sorting by left edge of box. Note that this will not
61  // work properly until after InsertNewlines and InsertSpaces.
62  bool operator<(const BoxChar& other) const {
63  if (box_ == nullptr) return true;
64  if (other.box_ == nullptr) return false;
65  return box_->x < other.box_->x;
66  }
67 
68  static void TranslateBoxes(int xshift, int yshift,
69  std::vector<BoxChar*>* boxes);
70 
71  // Prepares for writing the boxes to a file by inserting newlines, spaces,
72  // and re-ordering so the boxes are strictly left-to-right.
73  static void PrepareToWrite(std::vector<BoxChar*>* boxes);
74  // Inserts newline (tab) characters into the vector at newline positions.
75  static void InsertNewlines(bool rtl_rules, bool vertical_rules,
76  std::vector<BoxChar*>* boxes);
77  // Converts nullptr boxes to space characters, with appropriate bounding
78  // boxes.
79  static void InsertSpaces(bool rtl_rules, bool vertical_rules,
80  std::vector<BoxChar*>* boxes);
81  // Reorders text in a right-to-left script in left-to-right order.
82  static void ReorderRTLText(std::vector<BoxChar*>* boxes);
83  // Returns true if the vector contains mostly RTL characters.
84  static bool ContainsMostlyRTL(const std::vector<BoxChar*>& boxes);
85  // Returns true if the text is mostly laid out vertically.
86  static bool MostlyVertical(const std::vector<BoxChar*>& boxes);
87 
88  // Returns the total length of all the strings in the boxes.
89  static int TotalByteLength(const std::vector<BoxChar*>& boxes);
90 
91  // Rotate the vector of boxes between start and end by the given rotation.
92  // The rotation is in radians clockwise about the given center.
93  static void RotateBoxes(float rotation,
94  int xcenter,
95  int ycenter,
96  int start_box,
97  int end_box,
98  std::vector<BoxChar*>* boxes);
99 
100  // Create a tesseract box file from the vector of boxes. The image height
101  // is needed to convert to tesseract coordinates.
102  static void WriteTesseractBoxFile(const string& name, int height,
103  const std::vector<BoxChar*>& boxes);
104  // Gets the tesseract box file as a string from the vector of boxes.
105  // The image height is needed to convert to tesseract coordinates.
106  static string GetTesseractBoxStr(int height,
107  const std::vector<BoxChar*>& boxes);
108 
109  private:
110  string ch_;
111  Box* box_;
112  int page_;
113 };
114 
115 // Sort predicate to sort a vector of BoxChar*.
117  bool operator()(const BoxChar* box1, const BoxChar* box2) const {
118  return *box1 < *box2;
119  }
120 };
121 
122 } // namespace tesseract
123 
124 #endif // TESSERACT_TRAINING_BOXCHAR_H_
const int & page() const
Definition: boxchar.h:49
static void ReorderRTLText(std::vector< BoxChar *> *boxes)
Definition: boxchar.cpp:202
static void TranslateBoxes(int xshift, int yshift, std::vector< BoxChar *> *boxes)
Definition: boxchar.cpp:52
static bool MostlyVertical(const std::vector< BoxChar *> &boxes)
Definition: boxchar.cpp:245
static int TotalByteLength(const std::vector< BoxChar *> &boxes)
Definition: boxchar.cpp:264
const string & ch() const
Definition: boxchar.h:47
bool operator()(const BoxChar *box1, const BoxChar *box2) const
Definition: boxchar.h:117
static void WriteTesseractBoxFile(const string &name, int height, const std::vector< BoxChar *> &boxes)
Definition: boxchar.cpp:294
Box * mutable_box()
Definition: boxchar.h:58
static void InsertNewlines(bool rtl_rules, bool vertical_rules, std::vector< BoxChar *> *boxes)
Definition: boxchar.cpp:81
void set_page(int page)
Definition: boxchar.h:55
bool operator<(const BoxChar &other) const
Definition: boxchar.h:62
const Box * box() const
Definition: boxchar.h:48
static bool ContainsMostlyRTL(const std::vector< BoxChar *> &boxes)
Definition: boxchar.cpp:216
static string GetTesseractBoxStr(int height, const std::vector< BoxChar *> &boxes)
Definition: boxchar.cpp:301
static void RotateBoxes(float rotation, int xcenter, int ycenter, int start_box, int end_box, std::vector< BoxChar *> *boxes)
Definition: boxchar.cpp:273
BoxChar(const char *utf8_str, int len)
Definition: boxchar.cpp:41
static void PrepareToWrite(std::vector< BoxChar *> *boxes)
Definition: boxchar.cpp:66
void AddBox(int x, int y, int width, int height)
Definition: boxchar.cpp:47
string * mutable_ch()
Definition: boxchar.h:57
static void InsertSpaces(bool rtl_rules, bool vertical_rules, std::vector< BoxChar *> *boxes)
Definition: boxchar.cpp:145