#include <boxchar.h>
|
static void | TranslateBoxes (int xshift, int yshift, std::vector< BoxChar *> *boxes) |
|
static void | PrepareToWrite (std::vector< BoxChar *> *boxes) |
|
static void | InsertNewlines (bool rtl_rules, bool vertical_rules, std::vector< BoxChar *> *boxes) |
|
static void | InsertSpaces (bool rtl_rules, bool vertical_rules, std::vector< BoxChar *> *boxes) |
|
static void | ReorderRTLText (std::vector< BoxChar *> *boxes) |
|
static bool | ContainsMostlyRTL (const std::vector< BoxChar *> &boxes) |
|
static bool | MostlyVertical (const std::vector< BoxChar *> &boxes) |
|
static int | TotalByteLength (const std::vector< BoxChar *> &boxes) |
|
static void | RotateBoxes (float rotation, int xcenter, int ycenter, int start_box, int end_box, std::vector< BoxChar *> *boxes) |
|
static void | WriteTesseractBoxFile (const string &name, int height, const std::vector< BoxChar *> &boxes) |
|
static string | GetTesseractBoxStr (int height, const std::vector< BoxChar *> &boxes) |
|
Definition at line 40 of file boxchar.h.
◆ BoxChar()
tesseract::BoxChar::BoxChar |
( |
const char * |
utf8_str, |
|
|
int |
len |
|
) |
| |
Definition at line 41 of file boxchar.cpp.
41 : ch_(utf8_str, len) {
◆ ~BoxChar()
tesseract::BoxChar::~BoxChar |
( |
| ) |
|
Definition at line 45 of file boxchar.cpp.
45 { boxDestroy(&box_); }
◆ AddBox()
void tesseract::BoxChar::AddBox |
( |
int |
x, |
|
|
int |
y, |
|
|
int |
width, |
|
|
int |
height |
|
) |
| |
Definition at line 47 of file boxchar.cpp.
48 box_ = boxCreate(x, y, width, height);
◆ box()
const Box* tesseract::BoxChar::box |
( |
| ) |
const |
|
inline |
◆ ch()
const string& tesseract::BoxChar::ch |
( |
| ) |
const |
|
inline |
◆ ContainsMostlyRTL()
bool tesseract::BoxChar::ContainsMostlyRTL |
( |
const std::vector< BoxChar *> & |
boxes | ) |
|
|
static |
Definition at line 216 of file boxchar.cpp.
217 int num_rtl = 0, num_ltr = 0;
218 for (
unsigned int i = 0; i < boxes.size(); ++i) {
222 tprintf(
"Illegal utf8 in boxchar %u string:%s = ", i,
223 boxes[i]->ch_.c_str());
224 for (
size_t c = 0; c < boxes[i]->ch_.size(); ++c) {
225 tprintf(
" 0x%x", boxes[i]->ch_[c]);
230 for (
int j = 0; j < uni_vector.
size(); ++j) {
231 UCharDirection dir = u_charDirection(uni_vector[j]);
232 if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC ||
233 dir == U_ARABIC_NUMBER) {
240 return num_rtl > num_ltr;
static bool UTF8ToUnicode(const char *utf8_str, GenericVector< int > *unicodes)
◆ GetTesseractBoxStr()
string tesseract::BoxChar::GetTesseractBoxStr |
( |
int |
height, |
|
|
const std::vector< BoxChar *> & |
boxes |
|
) |
| |
|
static |
Definition at line 301 of file boxchar.cpp.
305 for (
size_t i = 0; i < boxes.size(); ++i) {
306 const Box*
box = boxes[i]->box_;
307 if (box ==
nullptr) {
308 tprintf(
"Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n");
313 boxes[i]->ch_.c_str(), box->x, height - box->y - box->h,
314 box->x + box->w, height - box->y, boxes[i]->page_);
315 output.append(buffer, nbytes);
◆ InsertNewlines()
void tesseract::BoxChar::InsertNewlines |
( |
bool |
rtl_rules, |
|
|
bool |
vertical_rules, |
|
|
std::vector< BoxChar *> * |
boxes |
|
) |
| |
|
static |
Definition at line 81 of file boxchar.cpp.
85 for (
int i = 0;
static_cast<unsigned int>(i) < boxes->size(); ++i) {
86 Box*
box = (*boxes)[i]->box_;
88 if (prev_i < 0 || prev_i < i - 1 || static_cast<unsigned int>(i) + 1 == boxes->size()) {
92 boxes->erase(boxes->begin() + i);
94 }
while (i >= 0 && static_cast<unsigned int>(i) + 1 == boxes->size() &&
95 (*boxes)[i]->box_ ==
nullptr);
100 Box* prev_box = (*boxes)[prev_i]->box_;
101 int shift = box->x - prev_box->x;
102 if (vertical_rules) {
103 shift = box->y - prev_box->y;
104 }
else if (rtl_rules) {
107 if (-shift > max_shift) {
109 int width = prev_box->w;
110 int height = prev_box->h;
111 int x = prev_box->x + width;
113 if (vertical_rules) {
115 y = prev_box->y + height;
116 }
else if (rtl_rules) {
117 x = prev_box->x - width;
119 tprintf(
"prev x = %d, width=%d\n", prev_box->x, width);
123 if (prev_i == i - 1) {
126 new_box->AddBox(x, y, width, height);
127 new_box->page_ = (*boxes)[i]->page_;
128 boxes->insert(boxes->begin() + i, new_box);
131 (*boxes)[i - 1]->AddBox(x, y, width, height);
132 (*boxes)[i - 1]->ch_ =
"\t";
135 }
else if (shift > max_shift) {
BoxChar(const char *utf8_str, int len)
◆ InsertSpaces()
void tesseract::BoxChar::InsertSpaces |
( |
bool |
rtl_rules, |
|
|
bool |
vertical_rules, |
|
|
std::vector< BoxChar *> * |
boxes |
|
) |
| |
|
static |
Definition at line 145 of file boxchar.cpp.
149 for (
int i = 1;
static_cast<unsigned int>(i) + 1 < boxes->size(); ++i) {
150 Box*
box = (*boxes)[i]->box_;
151 if (box ==
nullptr) {
152 Box* prev = (*boxes)[i - 1]->box_;
153 Box* next = (*boxes)[i + 1]->box_;
155 int top =
MIN(prev->y, next->y);
156 int bottom =
MAX(prev->y + prev->h, next->y + next->h);
157 int left = prev->x + prev->w;
159 if (vertical_rules) {
160 top = prev->y + prev->h;
162 left =
MIN(prev->x, next->x);
163 right =
MAX(prev->x + prev->w, next->x + next->w);
164 }
else if (rtl_rules) {
169 left = next->x + next->w;
171 j >= 0 && (*boxes)[j]->ch_ !=
" " && (*boxes)[j]->ch_ !=
"\t";
173 prev = (*boxes)[j]->box_;
175 if (prev->x < right) {
181 for (
size_t j = i + 2; j < boxes->size() && (*boxes)[j]->box_ !=
nullptr &&
182 (*boxes)[j]->ch_ !=
"\t";
184 next = (*boxes)[j]->box_;
185 if (next->x + next->w > left) {
186 left = next->x + next->w;
192 if (right <= left) right = left + 1;
193 if (bottom <= top) bottom = top + 1;
194 (*boxes)[i]->AddBox(left, top, right - left, bottom - top);
195 (*boxes)[i]->ch_ =
" ";
◆ MostlyVertical()
bool tesseract::BoxChar::MostlyVertical |
( |
const std::vector< BoxChar *> & |
boxes | ) |
|
|
static |
Definition at line 245 of file boxchar.cpp.
246 inT64 total_dx = 0, total_dy = 0;
247 for (
size_t i = 1; i < boxes.size(); ++i) {
248 if (boxes[i - 1]->box_ !=
nullptr && boxes[i]->box_ !=
nullptr &&
249 boxes[i - 1]->page_ == boxes[i]->page_) {
250 int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x;
251 int dy = boxes[i]->box_->y - boxes[i - 1]->box_->y;
259 return total_dy > total_dx;
const int kMinNewlineRatio
◆ mutable_box()
Box* tesseract::BoxChar::mutable_box |
( |
| ) |
|
|
inline |
◆ mutable_ch()
string* tesseract::BoxChar::mutable_ch |
( |
| ) |
|
|
inline |
◆ operator<()
bool tesseract::BoxChar::operator< |
( |
const BoxChar & |
other | ) |
const |
|
inline |
Definition at line 62 of file boxchar.h.
63 if (box_ ==
nullptr)
return true;
64 if (other.box_ ==
nullptr)
return false;
65 return box_->x < other.box_->x;
◆ page()
const int& tesseract::BoxChar::page |
( |
| ) |
const |
|
inline |
◆ PrepareToWrite()
void tesseract::BoxChar::PrepareToWrite |
( |
std::vector< BoxChar *> * |
boxes | ) |
|
|
static |
Definition at line 66 of file boxchar.cpp.
71 for (
unsigned int i = 0; i < boxes->size(); ++i) {
72 if ((*boxes)[i]->box_ ==
nullptr)
tprintf(
"Null box at index %u\n", i);
static void ReorderRTLText(std::vector< BoxChar *> *boxes)
static bool MostlyVertical(const std::vector< BoxChar *> &boxes)
static void InsertNewlines(bool rtl_rules, bool vertical_rules, std::vector< BoxChar *> *boxes)
static bool ContainsMostlyRTL(const std::vector< BoxChar *> &boxes)
static void InsertSpaces(bool rtl_rules, bool vertical_rules, std::vector< BoxChar *> *boxes)
◆ ReorderRTLText()
void tesseract::BoxChar::ReorderRTLText |
( |
std::vector< BoxChar *> * |
boxes | ) |
|
|
static |
Definition at line 202 of file boxchar.cpp.
205 BoxCharPtrSort sorter;
207 for (
size_t start = 0; start < boxes->size(); start = end + 1) {
209 while (end < boxes->
size() && (*boxes)[end]->ch_ !=
"\t") ++end;
210 std::sort(boxes->begin() + start, boxes->begin() + end, sorter);
◆ RotateBoxes()
void tesseract::BoxChar::RotateBoxes |
( |
float |
rotation, |
|
|
int |
xcenter, |
|
|
int |
ycenter, |
|
|
int |
start_box, |
|
|
int |
end_box, |
|
|
std::vector< BoxChar *> * |
boxes |
|
) |
| |
|
static |
Definition at line 273 of file boxchar.cpp.
276 Boxa* orig = boxaCreate(0);
277 for (
int i = start_box; i < end_box; ++i) {
278 BOX*
box = (*boxes)[i]->box_;
279 if (box) boxaAddBox(orig, box, L_CLONE);
281 Boxa* rotated = boxaRotate(orig, xcenter, ycenter, rotation);
283 for (
int i = start_box, box_ind = 0; i < end_box; ++i) {
284 if ((*boxes)[i]->box_) {
285 boxDestroy(&((*boxes)[i]->box_));
286 (*boxes)[i]->box_ = boxaGetBox(rotated, box_ind++, L_CLONE);
289 boxaDestroy(&rotated);
◆ set_page()
void tesseract::BoxChar::set_page |
( |
int |
page | ) |
|
|
inline |
◆ TotalByteLength()
int tesseract::BoxChar::TotalByteLength |
( |
const std::vector< BoxChar *> & |
boxes | ) |
|
|
static |
Definition at line 264 of file boxchar.cpp.
265 int total_length = 0;
266 for (
size_t i = 0; i < boxes.size(); ++i) total_length += boxes[i]->ch_.size();
◆ TranslateBoxes()
void tesseract::BoxChar::TranslateBoxes |
( |
int |
xshift, |
|
|
int |
yshift, |
|
|
std::vector< BoxChar *> * |
boxes |
|
) |
| |
|
static |
Definition at line 52 of file boxchar.cpp.
54 for (
size_t i = 0; i < boxes->size(); ++i) {
55 BOX*
box = (*boxes)[i]->box_;
◆ WriteTesseractBoxFile()
void tesseract::BoxChar::WriteTesseractBoxFile |
( |
const string & |
name, |
|
|
int |
height, |
|
|
const std::vector< BoxChar *> & |
boxes |
|
) |
| |
|
static |
Definition at line 294 of file boxchar.cpp.
static void WriteStringToFileOrDie(const string &str, const string &filename)
static string GetTesseractBoxStr(int height, const std::vector< BoxChar *> &boxes)
The documentation for this class was generated from the following files:
- /home/stefan/src/github/tesseract-ocr/tesseract/training/boxchar.h
- /home/stefan/src/github/tesseract-ocr/tesseract/training/boxchar.cpp