21 #include "config_auto.h" 26 #include "allheaders.h" 55 training_iteration_(0),
115 tprintf(
"Space was garbled in recoding!!\n");
137 tprintf(
"Failed to load any lstm-specific dictionaries for lang %s!!\n",
147 bool debug,
double worst_dict_cert,
150 const TBOX& line_box,
float score_ratio,
154 float label_threshold = use_alternates ? 0.75
f : 0.0f;
157 if (!
RecognizeLine(image_data, invert, debug,
false, label_threshold,
158 &scale_factor, &inputs, &outputs))
165 search_->
Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert, NULL);
173 use_alternates, one_word, score_ratio, scale_factor,
174 target_unicharset, words);
188 bool use_alternates,
bool one_word,
float score_ratio,
float scale_factor,
192 float prev_space_cert = 0.0f;
193 for (
int i = 0; i < labels.
size(); i = word_end) {
198 float space_cert = 0.0f;
200 word_end = labels.
size();
206 if (word_end < labels.
size()) {
212 while (word_end > i && labels[word_end - 1] ==
null_char_) --word_end;
217 tprintf(
"Creating word from outputs over [%d,%d)\n", i, word_end);
220 MIN(prev_space_cert, space_cert), debug,
222 labels, label_coords, scale_factor);
223 if (word == NULL && target_unicharset != NULL) {
226 word =
WordFromOutput(line_box, outputs, i, word_end, score_ratio,
227 MIN(prev_space_cert, space_cert), debug,
false,
228 NULL, labels, label_coords, scale_factor);
230 prev_space_cert = space_cert;
237 float* mean_output,
float* sd) {
239 STATS stats(0, kOutputScale + 1);
240 for (
int t = 0; t < outputs.
Width(); ++t) {
241 int best_label = outputs.
BestLabel(t, NULL);
243 float best_output = outputs.
f(t)[best_label];
244 stats.
add(static_cast<int>(kOutputScale * best_output), 1);
247 *min_output =
static_cast<float>(stats.
min_bucket()) / kOutputScale;
248 *mean_output = stats.
mean() / kOutputScale;
249 *sd = stats.
sd() / kOutputScale;
257 bool debug,
bool re_invert,
258 float label_threshold,
float* scale_factor,
261 const int kMaxImageWidth = 2560;
268 tprintf(
"Line cannot be recognized!!\n");
272 tprintf(
"Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix),
278 *scale_factor = min_width / *scale_factor;
284 float pos_min, pos_mean, pos_sd;
285 OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd);
286 if (invert && pos_min < 0.5) {
295 float inv_min, inv_mean, inv_sd;
296 OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
297 if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
300 tprintf(
"Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n",
301 pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd);
303 *outputs = inv_outputs;
304 *inputs = inv_inputs;
305 }
else if (re_invert) {
333 const TBOX& line_box,
const NetworkIO& outputs,
int word_start,
334 int word_end,
float score_ratio,
float space_certainty,
bool debug,
335 bool use_alternates,
const UNICHARSET* target_unicharset,
337 float scale_factor) {
339 line_box, word_start, word_end, space_certainty, use_alternates,
340 target_unicharset, labels, label_coords, scale_factor);
342 for (
int width = 1; width <= max_blob_run; ++width) {
344 for (
int i = word_start; i + width <= word_end; ++i) {
349 while (j - i < width && labels[j] !=
null_char_) ++j;
350 if (j - i == width) {
352 int end_coord = label_coords[j];
354 end_coord = label_coords[j + 1];
356 col, col + width - 1, debug, outputs, target_unicharset,
357 label_coords[i], end_coord, score_ratio);
358 if (choices == NULL) {
362 word_res->
ratings->
put(col, col + width - 1, choices);
368 if (use_alternates) {
371 for (
int i = word_start; i + 2 < word_end; ++i) {
374 (i == word_start || labels[i - 1] ==
null_char_) &&
375 (i + 3 == word_end || labels[i + 3] ==
null_char_)) {
376 int end_coord = label_coords[i + 3];
377 if (i + 3 < word_end && labels[i + 3] ==
null_char_)
378 end_coord = label_coords[i + 4];
379 BLOB_CHOICE_LIST* choices =
381 label_coords[i], end_coord, score_ratio);
382 if (choices == NULL) {
386 word_res->
ratings->
put(col, col + 1, choices);
399 int word_end,
float space_certainty,
404 float scale_factor) {
407 C_BLOB_IT b_it(&blobs);
411 int max_blob_run = 0;
413 for (
int i = word_start; i < word_end; ++i) {
417 TBOX box(label_coords[i], 0, label_coords[i + 1], line_box.
height());
418 box.
scale(scale_factor);
420 box.set_top(line_box.
top());
425 if (labels[i] ==
null_char_ || i + 1 == word_end) {
426 if (blob_run > max_blob_run)
427 max_blob_run = blob_run;
430 if (!use_alternates) max_blob_run = 1;
433 WERD* word =
new WERD(&blobs, word_start > 1 ? 1 : 0, NULL);
437 target_unicharset != NULL ? target_unicharset : &
GetUnicharset();
449 for (
int start = 0; start < labels.
size(); start = end) {
464 const char* window_name,
466 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics 467 Pix* input_pix = inputs.
ToPix();
469 pixGetHeight(input_pix), window);
472 #endif // GRAPHICS_DISABLED 480 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics 484 for (
int start = 0; start < labels.
size(); start = end) {
485 int xpos = xcoords[start] * x_scale;
491 const char* str =
DecodeLabel(labels, start, &end, NULL);
492 if (*str ==
'\\') str =
"\\\\";
493 xpos = xcoords[(start + end) / 2] * x_scale;
494 window->
Text(xpos, height, str);
496 window->
Line(xpos, 0, xpos, height * 3 / 2);
499 #endif // GRAPHICS_DISABLED 510 for (
int start = 0; start < labels.
size(); start = end) {
518 const char* label =
DecodeLabel(labels, start, &end, &decoded);
521 for (
int i = start + 1; i < end; ++i) {
523 xcoords[i], xcoords[i + 1]);
532 const char* label,
int best_choice,
533 int x_start,
int x_end) {
534 tprintf(
"%s=%d On [%d, %d), scores=", label, best_choice, x_start, x_end);
535 double max_score = 0.0;
536 double mean_score = 0.0;
537 int width = x_end - x_start;
538 for (
int x = x_start; x < x_end; ++x) {
539 const float* line = outputs.
f(x);
540 double score = line[best_choice] * 100.0;
541 if (score > max_score) max_score = score;
542 mean_score += score / width;
544 double best_score = 0.0;
546 if (c != best_choice && line[c] > best_score) {
548 best_score = line[c];
554 tprintf(
", Mean=%g, max=%g\n", mean_score, max_score);
560 static bool NullIsBest(
const NetworkIO& output,
float null_thr,
562 if (output.
f(t)[
null_char] >= null_thr)
return true;
579 }
else if (null_thr <= 0.0) {
598 int width = output.
Width();
601 while (t < width && NullIsBest(output, null_thr,
null_char_, t)) {
607 int char_start = t++;
608 while (t < width && !NullIsBest(output, null_thr,
null_char_, t) &&
616 while (t < width && NullIsBest(output, null_thr,
null_char_, t)) {
638 int width = output.
Width();
645 while (++t < width && output.
BestLabel(t, NULL) == label) {
672 int width = output.
Width();
673 for (
int t = 0; t < width; ++t) {
690 int col,
int row,
bool debug,
const NetworkIO& output,
691 const UNICHARSET* target_unicharset,
int x_start,
int x_end,
693 float rating = 0.0f, certainty = 0.0f;
698 tprintf(
"Best choice over range %d,%d=unichar%d=%s r = %g, cert=%g\n",
702 BLOB_CHOICE_LIST* choices =
new BLOB_CHOICE_LIST;
703 BLOB_CHOICE_IT bc_it(choices);
705 target_unicharset, &bc_it)) {
710 double best_cert = certainty;
716 if (certainty >= best_cert - score_ratio &&
718 target_unicharset, &bc_it)) {
725 bc_it.move_to_first();
728 while (!bc_it.at_first()) {
729 delete bc_it.extract();
740 float certainty,
int col,
int row,
742 BLOB_CHOICE_IT* bc_it) {
743 int target_id = unichar_id;
744 if (target_unicharset != NULL) {
756 bc_it->add_after_then_move(choice);
763 int start,
int* end,
int* decoded) {
769 if (decoded != NULL) {
776 while (index < labels.
size() &&
778 code.
Set(code.
length(), labels[index++]);
779 while (index < labels.
size() && labels[index] ==
null_char_) ++index;
783 if (uni_id != INVALID_UNICHAR_ID &&
784 (index == labels.
size() ||
788 if (decoded != NULL) *decoded = uni_id;
793 return "<Undecodable>";
795 if (decoded != NULL) *decoded = labels[start];
796 if (labels[start] ==
null_char_)
return "<null>";
811 if (label == INVALID_UNICHAR_ID)
return "..";
virtual int XScaleFactor() const
void Line(int x1, int y1, int x2, int y2)
int EncodeUnichar(int unichar_id, RecodedCharID *code) const
static C_BLOB * FakeBlob(const TBOX &box)
BLOB_CHOICE_LIST * GetBlobChoices(int col, int row, bool debug, const NetworkIO &output, const UNICHARSET *target_unicharset, int x_start, int x_end, float score_ratio)
virtual void SetRandomizer(TRand *randomizer)
bool IsValidFirstCode(int code) const
void RecognizeLine(const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, bool use_alternates, const UNICHARSET *target_unicharset, const TBOX &line_box, float score_ratio, bool one_word, PointerVector< WERD_RES > *words)
bool contains_unichar(const char *const unichar_repr) const
static const float kMinCertainty
int BestLabel(int t, float *score) const
bool DeSerialize(bool swap, FILE *fp)
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, NetworkIO *input)
static Pix * PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale)
static int SortByRating(const void *p1, const void *p2)
static const int kMaxCodeLen
WERD_RES * WordFromOutput(const TBOX &line_box, const NetworkIO &outputs, int word_start, int word_end, float score_ratio, float space_certainty, bool debug, bool use_alternates, const UNICHARSET *target_unicharset, const GenericVector< int > &labels, const GenericVector< int > &label_coords, float scale_factor)
const UNICHARSET & GetUnicharset() const
NetworkScratch scratch_space_
static Network * CreateFromFile(TFile *fp)
bool DeSerialize(TFile *fp)
void set_matrix_cell(int col, int row)
int FReadEndian(void *buffer, int size, int count)
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, float *certainty) const
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)
void set_int_mode(bool is_quantized)
bool LoadDictionary(const char *lang, TessdataManager *mgr)
const char * id_to_unichar(UNICHAR_ID id) const
virtual void CacheXScaleFactor(int factor)
void LoadLSTM(const STRING &lang, TessdataManager *data_file)
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words)
void scale(const float f)
void WordsFromOutputs(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > label_coords, const TBOX &line_box, bool debug, bool use_alternates, bool one_word, float score_ratio, float scale_factor, const UNICHARSET *target_unicharset, PointerVector< WERD_RES > *words)
bool Serialize(TFile *fp) const
virtual StaticShape InputShape() const
const char * DecodeSingleLabel(int label)
void LabelsViaReEncode(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
void FakeWordFromRatings(PermuterType permuter)
bool Serialize(FILE *fp) const
int DecodeUnichar(const RecodedCharID &code) const
static void ClearWindow(bool tess_coords, const char *window_name, int width, int height, ScrollView **window)
void LabelsViaSimpleText(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
void LabelsFromOutputs(const NetworkIO &outputs, float null_thr, GenericVector< int > *labels, GenericVector< int > *xcoords)
void DebugActivationRange(const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)
int FWrite(const void *buffer, int size, int count)
void add(inT32 value, inT32 count)
bool load_from_file(const char *const filename, bool skip_fragments)
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)
void LabelsViaCTC(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
void DisplayForward(const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char *window_name, ScrollView **window)
void put(ICOORD pos, const T &thing)
void DebugActivationPath(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)
void Set(int index, int value)
bool save_to_file(const char *const filename) const
void OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output, float *sd)
bool DeSerialize(TFile *fp)
bool AddBlobChoices(int unichar_id, float rating, float certainty, int col, int row, const UNICHARSET *target_unicharset, BLOB_CHOICE_IT *bc_it)
void Text(int x, int y, const char *mystring)
void LabelsViaThreshold(const NetworkIO &output, float null_threshold, GenericVector< int > *labels, GenericVector< int > *xcoords)
static DawgCache * GlobalDawgCache()
const UNICHARSET * uch_set
static int DisplayImage(Pix *pix, ScrollView *window)
STRING DecodeLabels(const GenericVector< int > &labels)
void SetupForLoad(DawgCache *dawg_cache)
virtual bool Serialize(TFile *fp) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
void DisplayLSTMOutput(const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)
inT32 training_iteration_
WERD_RES * InitializeWord(const TBOX &line_box, int word_start, int word_end, float space_certainty, bool use_alternates, const UNICHARSET *target_unicharset, const GenericVector< int > &labels, const GenericVector< int > &label_coords, float scale_factor)
bool SimpleTextOutput() const
bool Serialize(TFile *fp) const
const char * DecodeLabel(const GenericVector< int > &labels, int start, int *end, int *decoded)
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, float *certainty) const
RecodeBeamSearch * search_
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)