tesseract  4.00.00dev
recodebeam.cpp
Go to the documentation of this file.
1 // File: recodebeam.cpp
3 // Description: Beam search to decode from the re-encoded CJK as a sequence of
4 // smaller numbers in place of a single large code.
5 // Author: Ray Smith
6 // Created: Fri Mar 13 09:39:01 PDT 2015
7 //
8 // (C) Copyright 2015, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #include "recodebeam.h"
22 #include "networkio.h"
23 #include "pageres.h"
24 #include "unicharcompress.h"
25 
26 namespace tesseract {
27 
28 // Clipping value for certainty inside Tesseract. Reflects the minimum value
29 // of certainty that will be returned by ExtractBestPathAsUnicharIds.
30 // Supposedly on a uniform scale that can be compared across languages and
31 // engines.
32 const float RecodeBeamSearch::kMinCertainty = -20.0f;
33 
34 // The beam width at each code position.
35 const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
36  5, 10, 16, 16, 16, 16, 16, 16, 16, 16,
37 };
38 
39 const char* kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
40 
41 // Prints debug details of the node.
42 void RecodeNode::Print(int null_char, const UNICHARSET& unicharset,
43  int depth) const {
44  if (code == null_char) {
45  tprintf("null_char");
46  } else {
47  tprintf("label=%d, uid=%d=%s", code, unichar_id,
48  unicharset.debug_str(unichar_id).string());
49  }
50  tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%lx", score, certainty,
51  start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "",
52  end_of_word ? " End" : "", permuter, code_hash);
53  if (depth > 0 && prev != nullptr) {
54  tprintf(" prev:");
55  prev->Print(null_char, unicharset, depth - 1);
56  } else {
57  tprintf("\n");
58  }
59 }
60 
61 // Borrows the pointer, which is expected to survive until *this is deleted.
63  int null_char, bool simple_text, Dict* dict)
64  : recoder_(recoder),
65  beam_size_(0),
66  top_code_(-1),
67  second_code_(-1),
68  dict_(dict),
69  space_delimited_(true),
70  is_simple_text_(simple_text),
71  null_char_(null_char) {
72  if (dict_ != NULL && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false;
73 }
74 
75 // Decodes the set of network outputs, storing the lattice internally.
76 void RecodeBeamSearch::Decode(const NetworkIO& output, double dict_ratio,
77  double cert_offset, double worst_dict_cert,
78  const UNICHARSET* charset) {
79  beam_size_ = 0;
80  int width = output.Width();
81  for (int t = 0; t < width; ++t) {
82  ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
83  DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
84  charset);
85  }
86 }
88  double dict_ratio, double cert_offset,
89  double worst_dict_cert,
90  const UNICHARSET* charset) {
91  beam_size_ = 0;
92  int width = output.dim1();
93  for (int t = 0; t < width; ++t) {
94  ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
95  DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
96  }
97 }
98 
99 // Returns the best path as labels/scores/xcoords similar to simple CTC.
101  GenericVector<int>* labels, GenericVector<int>* xcoords) const {
102  labels->truncate(0);
103  xcoords->truncate(0);
105  ExtractBestPaths(&best_nodes, NULL);
106  // Now just run CTC on the best nodes.
107  int t = 0;
108  int width = best_nodes.size();
109  while (t < width) {
110  int label = best_nodes[t]->code;
111  if (label != null_char_) {
112  labels->push_back(label);
113  xcoords->push_back(t);
114  }
115  while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
116  }
117  }
118  xcoords->push_back(width);
119 }
120 
121 // Returns the best path as unichar-ids/certs/ratings/xcoords skipping
122 // duplicates, nulls and intermediate parts.
124  bool debug, const UNICHARSET* unicharset, GenericVector<int>* unichar_ids,
126  GenericVector<int>* xcoords) const {
128  ExtractBestPaths(&best_nodes, NULL);
129  ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
130  if (debug) {
131  DebugPath(unicharset, best_nodes);
132  DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
133  *xcoords);
134  }
135 }
136 
137 // Returns the best path as a set of WERD_RES.
139  float scale_factor, bool debug,
140  const UNICHARSET* unicharset,
141  PointerVector<WERD_RES>* words) {
142  words->truncate(0);
143  GenericVector<int> unichar_ids;
144  GenericVector<float> certs;
145  GenericVector<float> ratings;
146  GenericVector<int> xcoords;
149  ExtractBestPaths(&best_nodes, &second_nodes);
150  if (debug) {
151  DebugPath(unicharset, best_nodes);
152  ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
153  &xcoords);
154  tprintf("\nSecond choice path:\n");
155  DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
156  xcoords);
157  }
158  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords);
159  int num_ids = unichar_ids.size();
160  if (debug) {
161  DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
162  xcoords);
163  }
164  // Convert labels to unichar-ids.
165  int word_end = 0;
166  float prev_space_cert = 0.0f;
167  for (int word_start = 0; word_start < num_ids; word_start = word_end) {
168  for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
169  // A word is terminated when a space character or start_of_word flag is
170  // hit. We also want to force a separate word for every non
171  // space-delimited character when not in a dictionary context.
172  if (unichar_ids[word_end] == UNICHAR_SPACE) break;
173  int index = xcoords[word_end];
174  if (best_nodes[index]->start_of_word) break;
175  if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
176  (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
177  !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1])))
178  break;
179  }
180  float space_cert = 0.0f;
181  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
182  space_cert = certs[word_end];
183  bool leading_space =
184  word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
185  // Create a WERD_RES for the output word.
186  WERD_RES* word_res = InitializeWord(
187  leading_space, line_box, word_start, word_end,
188  MIN(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
189  for (int i = word_start; i < word_end; ++i) {
190  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
191  BLOB_CHOICE_IT bc_it(choices);
192  BLOB_CHOICE* choice = new BLOB_CHOICE(
193  unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
194  static_cast<float>(MAX_INT16), 0.0f, BCC_STATIC_CLASSIFIER);
195  int col = i - word_start;
196  choice->set_matrix_cell(col, col);
197  bc_it.add_after_then_move(choice);
198  word_res->ratings->put(col, col, choices);
199  }
200  int index = xcoords[word_end - 1];
201  word_res->FakeWordFromRatings(best_nodes[index]->permuter);
202  words->push_back(word_res);
203  prev_space_cert = space_cert;
204  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
205  ++word_end;
206  }
207 }
208 
209 // Generates debug output of the content of the beams after a Decode.
210 void RecodeBeamSearch::DebugBeams(const UNICHARSET& unicharset) const {
211  for (int p = 0; p < beam_size_; ++p) {
212  for (int d = 0; d < 2; ++d) {
213  for (int c = 0; c < NC_COUNT; ++c) {
214  NodeContinuation cont = static_cast<NodeContinuation>(c);
215  int index = BeamIndex(d, cont, 0);
216  if (beam_[p]->beams_[index].empty()) continue;
217  // Print all the best scoring nodes for each unichar found.
218  tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
219  kNodeContNames[c]);
220  DebugBeamPos(unicharset, beam_[p]->beams_[index]);
221  }
222  }
223  }
224 }
225 
226 // Generates debug output of the content of a single beam position.
227 void RecodeBeamSearch::DebugBeamPos(const UNICHARSET& unicharset,
228  const RecodeHeap& heap) const {
229  GenericVector<const RecodeNode*> unichar_bests;
230  unichar_bests.init_to_size(unicharset.size(), NULL);
231  const RecodeNode* null_best = NULL;
232  int heap_size = heap.size();
233  for (int i = 0; i < heap_size; ++i) {
234  const RecodeNode* node = &heap.get(i).data;
235  if (node->unichar_id == INVALID_UNICHAR_ID) {
236  if (null_best == NULL || null_best->score < node->score) null_best = node;
237  } else {
238  if (unichar_bests[node->unichar_id] == NULL ||
239  unichar_bests[node->unichar_id]->score < node->score) {
240  unichar_bests[node->unichar_id] = node;
241  }
242  }
243  }
244  for (int u = 0; u < unichar_bests.size(); ++u) {
245  if (unichar_bests[u] != NULL) {
246  const RecodeNode& node = *unichar_bests[u];
247  node.Print(null_char_, unicharset, 1);
248  }
249  }
250  if (null_best != NULL) {
251  null_best->Print(null_char_, unicharset, 1);
252  }
253 }
254 
255 // Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping
256 // duplicates, nulls and intermediate parts.
257 /* static */
258 void RecodeBeamSearch::ExtractPathAsUnicharIds(
259  const GenericVector<const RecodeNode*>& best_nodes,
260  GenericVector<int>* unichar_ids, GenericVector<float>* certs,
261  GenericVector<float>* ratings, GenericVector<int>* xcoords) {
262  unichar_ids->truncate(0);
263  certs->truncate(0);
264  ratings->truncate(0);
265  xcoords->truncate(0);
266  // Backtrack extracting only valid, non-duplicate unichar-ids.
267  int t = 0;
268  int width = best_nodes.size();
269  while (t < width) {
270  double certainty = 0.0;
271  double rating = 0.0;
272  while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
273  double cert = best_nodes[t++]->certainty;
274  if (cert < certainty) certainty = cert;
275  rating -= cert;
276  }
277  if (t < width) {
278  int unichar_id = best_nodes[t]->unichar_id;
279  if (unichar_id == UNICHAR_SPACE && !certs->empty() &&
280  best_nodes[t]->permuter != NO_PERM) {
281  // All the rating and certainty go on the previous character except
282  // for the space itself.
283  if (certainty < certs->back()) certs->back() = certainty;
284  ratings->back() += rating;
285  certainty = 0.0;
286  rating = 0.0;
287  }
288  unichar_ids->push_back(unichar_id);
289  xcoords->push_back(t);
290  do {
291  double cert = best_nodes[t++]->certainty;
292  // Special-case NO-PERM space to forget the certainty of the previous
293  // nulls. See long comment in ContinueContext.
294  if (cert < certainty || (unichar_id == UNICHAR_SPACE &&
295  best_nodes[t - 1]->permuter == NO_PERM)) {
296  certainty = cert;
297  }
298  rating -= cert;
299  } while (t < width && best_nodes[t]->duplicate);
300  certs->push_back(certainty);
301  ratings->push_back(rating);
302  } else if (!certs->empty()) {
303  if (certainty < certs->back()) certs->back() = certainty;
304  ratings->back() += rating;
305  }
306  }
307  xcoords->push_back(width);
308 }
309 
310 // Sets up a word with the ratings matrix and fake blobs with boxes in the
311 // right places.
312 WERD_RES* RecodeBeamSearch::InitializeWord(bool leading_space,
313  const TBOX& line_box, int word_start,
314  int word_end, float space_certainty,
315  const UNICHARSET* unicharset,
316  const GenericVector<int>& xcoords,
317  float scale_factor) {
318  // Make a fake blob for each non-zero label.
319  C_BLOB_LIST blobs;
320  C_BLOB_IT b_it(&blobs);
321  for (int i = word_start; i < word_end; ++i) {
322  int min_half_width = xcoords[i + 1] - xcoords[i];
323  if (i > 0 && xcoords[i] - xcoords[i - 1] < min_half_width)
324  min_half_width = xcoords[i] - xcoords[i - 1];
325  if (min_half_width < 1) min_half_width = 1;
326  // Make a fake blob.
327  TBOX box(xcoords[i] - min_half_width, 0, xcoords[i] + min_half_width,
328  line_box.height());
329  box.scale(scale_factor);
330  box.move(ICOORD(line_box.left(), line_box.bottom()));
331  box.set_top(line_box.top());
332  b_it.add_after_then_move(C_BLOB::FakeBlob(box));
333  }
334  // Make a fake word from the blobs.
335  WERD* word = new WERD(&blobs, leading_space, NULL);
336  // Make a WERD_RES from the word.
337  WERD_RES* word_res = new WERD_RES(word);
338  word_res->uch_set = unicharset;
339  word_res->combination = true; // Give it ownership of the word.
340  word_res->space_certainty = space_certainty;
341  word_res->ratings = new MATRIX(word_end - word_start, 1);
342  return word_res;
343 }
344 
345 // Fills top_n_flags_ with bools that are true iff the corresponding output
346 // is one of the top_n.
347 void RecodeBeamSearch::ComputeTopN(const float* outputs, int num_outputs,
348  int top_n) {
349  top_n_flags_.init_to_size(num_outputs, TN_ALSO_RAN);
350  top_code_ = -1;
351  second_code_ = -1;
352  top_heap_.clear();
353  for (int i = 0; i < num_outputs; ++i) {
354  if (top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key) {
355  TopPair entry(outputs[i], i);
356  top_heap_.Push(&entry);
357  if (top_heap_.size() > top_n) top_heap_.Pop(&entry);
358  }
359  }
360  while (!top_heap_.empty()) {
361  TopPair entry;
362  top_heap_.Pop(&entry);
363  if (top_heap_.size() > 1) {
364  top_n_flags_[entry.data] = TN_TOPN;
365  } else {
366  top_n_flags_[entry.data] = TN_TOP2;
367  if (top_heap_.empty())
368  top_code_ = entry.data;
369  else
370  second_code_ = entry.data;
371  }
372  }
373  top_n_flags_[null_char_] = TN_TOP2;
374 }
375 
376 // Adds the computation for the current time-step to the beam. Call at each
377 // time-step in sequence from left to right. outputs is the activation vector
378 // for the current timestep.
379 void RecodeBeamSearch::DecodeStep(const float* outputs, int t,
380  double dict_ratio, double cert_offset,
381  double worst_dict_cert,
382  const UNICHARSET* charset) {
383  if (t == beam_.size()) beam_.push_back(new RecodeBeam);
384  RecodeBeam* step = beam_[t];
385  beam_size_ = t + 1;
386  step->Clear();
387  if (t == 0) {
388  // The first step can only use singles and initials.
389  ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2,
390  dict_ratio, cert_offset, worst_dict_cert, step);
391  if (dict_ != nullptr) {
392  ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs,
393  TN_TOP2, dict_ratio, cert_offset, worst_dict_cert, step);
394  }
395  } else {
396  RecodeBeam* prev = beam_[t - 1];
397  if (charset != NULL) {
398  int beam_index = BeamIndex(true, NC_ANYTHING, 0);
399  for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
401  ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
402  tprintf("Step %d: Dawg beam %d:\n", t, i);
403  DebugPath(charset, path);
404  }
405  beam_index = BeamIndex(false, NC_ANYTHING, 0);
406  for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
408  ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
409  tprintf("Step %d: Non-Dawg beam %d:\n", t, i);
410  DebugPath(charset, path);
411  }
412  }
413  int total_beam = 0;
414  // Work through the scores by group (top-2, top-n, the rest) while the beam
415  // is empty. This enables extending the context using only the top-n results
416  // first, which may have an empty intersection with the valid codes, so we
417  // fall back to the rest if the beam is empty.
418  for (int tn = 0; tn < TN_COUNT && total_beam == 0; ++tn) {
419  TopNState top_n = static_cast<TopNState>(tn);
420  for (int index = 0; index < kNumBeams; ++index) {
421  // Working backwards through the heaps doesn't guarantee that we see the
422  // best first, but it comes before a lot of the worst, so it is slightly
423  // more efficient than going forwards.
424  for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
425  ContinueContext(&prev->beams_[index].get(i).data, index, outputs,
426  top_n, dict_ratio, cert_offset, worst_dict_cert,
427  step);
428  }
429  }
430  for (int index = 0; index < kNumBeams; ++index) {
432  total_beam += step->beams_[index].size();
433  }
434  }
435  // Special case for the best initial dawg. Push it on the heap if good
436  // enough, but there is only one, so it doesn't blow up the beam.
437  for (int c = 0; c < NC_COUNT; ++c) {
438  if (step->best_initial_dawgs_[c].code >= 0) {
439  int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
440  RecodeHeap* dawg_heap = &step->beams_[index];
441  PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
442  dawg_heap);
443  }
444  }
445  }
446 }
447 
448 // Adds to the appropriate beams the legal (according to recoder)
449 // continuations of context prev, which is of the given length, using the
450 // given network outputs to provide scores to the choices. Uses only those
451 // choices for which top_n_flags[index] == top_n_flag.
452 void RecodeBeamSearch::ContinueContext(const RecodeNode* prev, int index,
453  const float* outputs,
454  TopNState top_n_flag, double dict_ratio,
455  double cert_offset,
456  double worst_dict_cert,
457  RecodeBeam* step) {
458  RecodedCharID prefix;
459  RecodedCharID full_code;
460  const RecodeNode* previous = prev;
461  int length = LengthFromBeamsIndex(index);
462  bool use_dawgs = IsDawgFromBeamsIndex(index);
463  NodeContinuation prev_cont = ContinuationFromBeamsIndex(index);
464  for (int p = length - 1; p >= 0; --p, previous = previous->prev) {
465  while (previous != NULL &&
466  (previous->duplicate || previous->code == null_char_)) {
467  previous = previous->prev;
468  }
469  prefix.Set(p, previous->code);
470  full_code.Set(p, previous->code);
471  }
472  if (prev != nullptr && !is_simple_text_) {
473  if (top_n_flags_[prev->code] == top_n_flag) {
474  if (prev_cont != NC_NO_DUP) {
475  float cert =
476  NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
477  PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
478  cert, worst_dict_cert, dict_ratio, use_dawgs,
479  NC_ANYTHING, prev, step);
480  }
481  if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 &&
482  prev->code != null_char_) {
483  float cert = NetworkIO::ProbToCertainty(outputs[prev->code] +
484  outputs[null_char_]) +
485  cert_offset;
486  PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
487  cert, worst_dict_cert, dict_ratio, use_dawgs,
488  NC_NO_DUP, prev, step);
489  }
490  }
491  if (prev_cont == NC_ONLY_DUP) return;
492  if (prev->code != null_char_ && length > 0 &&
493  top_n_flags_[null_char_] == top_n_flag) {
494  // Allow nulls within multi code sequences, as the nulls within are not
495  // explicitly included in the code sequence.
496  float cert =
497  NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
498  PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID,
499  cert, worst_dict_cert, dict_ratio, use_dawgs,
500  NC_ANYTHING, prev, step);
501  }
502  }
503  const GenericVector<int>* final_codes = recoder_.GetFinalCodes(prefix);
504  if (final_codes != NULL) {
505  for (int i = 0; i < final_codes->size(); ++i) {
506  int code = (*final_codes)[i];
507  if (top_n_flags_[code] != top_n_flag) continue;
508  if (prev != nullptr && prev->code == code && !is_simple_text_) continue;
509  float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
510  if (cert < kMinCertainty && code != null_char_) continue;
511  full_code.Set(length, code);
512  int unichar_id = recoder_.DecodeUnichar(full_code);
513  // Map the null char to INVALID.
514  if (length == 0 && code == null_char_) unichar_id = INVALID_UNICHAR_ID;
515  ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
516  use_dawgs, NC_ANYTHING, prev, step);
517  if (top_n_flag == TN_TOP2 && code != null_char_) {
518  float prob = outputs[code] + outputs[null_char_];
519  if (prev != nullptr && prev_cont == NC_ANYTHING &&
520  prev->code != null_char_ &&
521  ((prev->code == top_code_ && code == second_code_) ||
522  (code == top_code_ && prev->code == second_code_))) {
523  prob += outputs[prev->code];
524  }
525  float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
526  ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
527  use_dawgs, NC_ONLY_DUP, prev, step);
528  }
529  }
530  }
531  const GenericVector<int>* next_codes = recoder_.GetNextCodes(prefix);
532  if (next_codes != NULL) {
533  for (int i = 0; i < next_codes->size(); ++i) {
534  int code = (*next_codes)[i];
535  if (top_n_flags_[code] != top_n_flag) continue;
536  if (prev != nullptr && prev->code == code && !is_simple_text_) continue;
537  float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
538  PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert,
539  worst_dict_cert, dict_ratio, use_dawgs,
540  NC_ANYTHING, prev, step);
541  if (top_n_flag == TN_TOP2 && code != null_char_) {
542  float prob = outputs[code] + outputs[null_char_];
543  if (prev != nullptr && prev_cont == NC_ANYTHING &&
544  prev->code != null_char_ &&
545  ((prev->code == top_code_ && code == second_code_) ||
546  (code == top_code_ && prev->code == second_code_))) {
547  prob += outputs[prev->code];
548  }
549  float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
550  PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID,
551  cert, worst_dict_cert, dict_ratio, use_dawgs,
552  NC_ONLY_DUP, prev, step);
553  }
554  }
555  }
556 }
557 
558 // Continues for a new unichar, using dawg or non-dawg as per flag.
559 void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert,
560  float worst_dict_cert, float dict_ratio,
561  bool use_dawgs, NodeContinuation cont,
562  const RecodeNode* prev,
563  RecodeBeam* step) {
564  if (use_dawgs) {
565  if (cert > worst_dict_cert) {
566  ContinueDawg(code, unichar_id, cert, cont, prev, step);
567  }
568  } else {
569  RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
570  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false,
571  false, false, false, cert * dict_ratio, prev, nullptr,
572  nodawg_heap);
573  if (dict_ != nullptr &&
574  ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
575  !dict_->getUnicharset().IsSpaceDelimited(unichar_id))) {
576  // Any top choice position that can start a new word, ie a space or
577  // any non-space-delimited character, should also be considered
578  // by the dawg search, so push initial dawg to the dawg heap.
579  float dawg_cert = cert;
580  PermuterType permuter = TOP_CHOICE_PERM;
581  // Since we use the space either side of a dictionary word in the
582  // certainty of the word, (to properly handle weak spaces) and the
583  // space is coming from a non-dict word, we need special conditions
584  // to avoid degrading the certainty of the dict word that follows.
585  // With a space we don't multiply the certainty by dict_ratio, and we
586  // flag the space with NO_PERM to indicate that we should not use the
587  // predecessor nulls to generate the confidence for the space, as they
588  // have already been multiplied by dict_ratio, and we can't go back to
589  // insert more entries in any previous heaps.
590  if (unichar_id == UNICHAR_SPACE)
591  permuter = NO_PERM;
592  else
593  dawg_cert *= dict_ratio;
594  PushInitialDawgIfBetter(code, unichar_id, permuter, false, false,
595  dawg_cert, cont, prev, step);
596  }
597  }
598 }
599 
600 // Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev,
601 // appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id
602 // is a valid continuation of whatever is in prev.
603 void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
604  NodeContinuation cont,
605  const RecodeNode* prev, RecodeBeam* step) {
606  RecodeHeap* dawg_heap = &step->beams_[BeamIndex(true, cont, 0)];
607  RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
608  if (unichar_id == INVALID_UNICHAR_ID) {
609  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false,
610  false, false, cert, prev, nullptr, dawg_heap);
611  return;
612  }
613  // Avoid dictionary probe if score a total loss.
614  float score = cert;
615  if (prev != NULL) score += prev->score;
616  if (dawg_heap->size() >= kBeamWidths[0] &&
617  score <= dawg_heap->PeekTop().data.score &&
618  nodawg_heap->size() >= kBeamWidths[0] &&
619  score <= nodawg_heap->PeekTop().data.score) {
620  return;
621  }
622  const RecodeNode* uni_prev = prev;
623  // Prev may be a partial code, null_char, or duplicate, so scan back to the
624  // last valid unichar_id.
625  while (uni_prev != NULL &&
626  (uni_prev->unichar_id == INVALID_UNICHAR_ID || uni_prev->duplicate))
627  uni_prev = uni_prev->prev;
628  if (unichar_id == UNICHAR_SPACE) {
629  if (uni_prev != NULL && uni_prev->end_of_word) {
630  // Space is good. Push initial state, to the dawg beam and a regular
631  // space to the top choice beam.
632  PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false,
633  false, cert, cont, prev, step);
634  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter,
635  false, false, false, false, cert, prev, nullptr,
636  nodawg_heap);
637  }
638  return;
639  } else if (uni_prev != NULL && uni_prev->start_of_dawg &&
640  uni_prev->unichar_id != UNICHAR_SPACE &&
641  dict_->getUnicharset().IsSpaceDelimited(uni_prev->unichar_id) &&
642  dict_->getUnicharset().IsSpaceDelimited(unichar_id)) {
643  return; // Can't break words between space delimited chars.
644  }
645  DawgPositionVector initial_dawgs;
646  DawgPositionVector* updated_dawgs = new DawgPositionVector;
647  DawgArgs dawg_args(&initial_dawgs, updated_dawgs, NO_PERM);
648  bool word_start = false;
649  if (uni_prev == NULL) {
650  // Starting from beginning of line.
651  dict_->default_dawgs(&initial_dawgs, false);
652  word_start = true;
653  } else if (uni_prev->dawgs != NULL) {
654  // Continuing a previous dict word.
655  dawg_args.active_dawgs = uni_prev->dawgs;
656  word_start = uni_prev->start_of_dawg;
657  } else {
658  return; // Can't continue if not a dict word.
659  }
660  PermuterType permuter = static_cast<PermuterType>(
661  dict_->def_letter_is_okay(&dawg_args, unichar_id, false));
662  if (permuter != NO_PERM) {
663  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
664  word_start, dawg_args.valid_end, false, cert, prev,
665  dawg_args.updated_dawgs, dawg_heap);
666  if (dawg_args.valid_end && !space_delimited_) {
667  // We can start another word right away, so push initial state as well,
668  // to the dawg beam, and the regular character to the top choice beam,
669  // since non-dict words can start here too.
670  PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true,
671  cert, cont, prev, step);
672  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
673  word_start, true, false, cert, prev, NULL, nodawg_heap);
674  }
675  } else {
676  delete updated_dawgs;
677  }
678 }
679 
680 // Adds a RecodeNode composed of the tuple (code, unichar_id,
681 // initial-dawg-state, prev, cert) to the given heap if/ there is room or if
682 // better than the current worst element if already full.
683 void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id,
684  PermuterType permuter,
685  bool start, bool end, float cert,
686  NodeContinuation cont,
687  const RecodeNode* prev,
688  RecodeBeam* step) {
689  RecodeNode* best_initial_dawg = &step->best_initial_dawgs_[cont];
690  float score = cert;
691  if (prev != NULL) score += prev->score;
692  if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
693  DawgPositionVector* initial_dawgs = new DawgPositionVector;
694  dict_->default_dawgs(initial_dawgs, false);
695  RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert,
696  score, prev, initial_dawgs,
697  ComputeCodeHash(code, false, prev));
698  *best_initial_dawg = node;
699  }
700 }
701 
702 // Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
703 // false, false, false, false, cert, prev, NULL) to heap if there is room
704 // or if better than the current worst element if already full.
705 /* static */
706 void RecodeBeamSearch::PushDupOrNoDawgIfBetter(
707  int length, bool dup, int code, int unichar_id, float cert,
708  float worst_dict_cert, float dict_ratio, bool use_dawgs,
709  NodeContinuation cont, const RecodeNode* prev, RecodeBeam* step) {
710  int index = BeamIndex(use_dawgs, cont, length);
711  if (use_dawgs) {
712  if (cert > worst_dict_cert) {
713  PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
714  prev ? prev->permuter : NO_PERM, false, false, false,
715  dup, cert, prev, nullptr, &step->beams_[index]);
716  }
717  } else {
718  cert *= dict_ratio;
719  if (cert >= kMinCertainty || code == null_char_) {
720  PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
721  prev ? prev->permuter : TOP_CHOICE_PERM, false, false,
722  false, dup, cert, prev, nullptr, &step->beams_[index]);
723  }
724  }
725 }
726 
727 // Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
728 // dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room
729 // or if better than the current worst element if already full.
730 void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
731  PermuterType permuter, bool dawg_start,
732  bool word_start, bool end, bool dup,
733  float cert, const RecodeNode* prev,
735  RecodeHeap* heap) {
736  float score = cert;
737  if (prev != NULL) score += prev->score;
738  if (heap->size() < max_size || score > heap->PeekTop().data.score) {
739  uinT64 hash = ComputeCodeHash(code, dup, prev);
740  RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
741  dup, cert, score, prev, d, hash);
742  if (UpdateHeapIfMatched(&node, heap)) return;
743  RecodePair entry(score, node);
744  heap->Push(&entry);
745  ASSERT_HOST(entry.data.dawgs == NULL);
746  if (heap->size() > max_size) heap->Pop(&entry);
747  } else {
748  delete d;
749  }
750 }
751 
752 // Adds a RecodeNode to heap if there is room
753 // or if better than the current worst element if already full.
754 void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode* node,
755  RecodeHeap* heap) {
756  if (heap->size() < max_size || node->score > heap->PeekTop().data.score) {
757  if (UpdateHeapIfMatched(node, heap)) {
758  return;
759  }
760  RecodePair entry(node->score, *node);
761  heap->Push(&entry);
762  ASSERT_HOST(entry.data.dawgs == NULL);
763  if (heap->size() > max_size) heap->Pop(&entry);
764  }
765 }
766 
767 // Searches the heap for a matching entry, and updates the score with
768 // reshuffle if needed. Returns true if there was a match.
769 bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
770  RecodeHeap* heap) {
771  // TODO(rays) consider hash map instead of linear search.
772  // It might not be faster because the hash map would have to be updated
773  // every time a heap reshuffle happens, and that would be a lot of overhead.
774  GenericVector<RecodePair>* nodes = heap->heap();
775  for (int i = 0; i < nodes->size(); ++i) {
776  RecodeNode& node = (*nodes)[i].data;
777  if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
778  node.permuter == new_node->permuter &&
779  node.start_of_dawg == new_node->start_of_dawg) {
780  if (new_node->score > node.score) {
781  // The new one is better. Update the entire node in the heap and
782  // reshuffle.
783  node = *new_node;
784  (*nodes)[i].key = node.score;
785  heap->Reshuffle(&(*nodes)[i]);
786  }
787  return true;
788  }
789  }
790  return false;
791 }
792 
793 // Computes and returns the code-hash for the given code and prev.
794 uinT64 RecodeBeamSearch::ComputeCodeHash(int code, bool dup,
795  const RecodeNode* prev) const {
796  uinT64 hash = prev == nullptr ? 0 : prev->code_hash;
797  if (!dup && code != null_char_) {
798  int num_classes = recoder_.code_range();
799  uinT64 carry = (((hash >> 32) * num_classes) >> 32);
800  hash *= num_classes;
801  hash += carry;
802  hash += code;
803  }
804  return hash;
805 }
806 
807 // Backtracks to extract the best path through the lattice that was built
808 // during Decode. On return the best_nodes vector essentially contains the set
809 // of code, score pairs that make the optimal path with the constraint that
810 // the recoder can decode the code sequence back to a sequence of unichar-ids.
811 void RecodeBeamSearch::ExtractBestPaths(
813  GenericVector<const RecodeNode*>* second_nodes) const {
814  // Scan both beams to extract the best and second best paths.
815  const RecodeNode* best_node = NULL;
816  const RecodeNode* second_best_node = NULL;
817  const RecodeBeam* last_beam = beam_[beam_size_ - 1];
818  for (int c = 0; c < NC_COUNT; ++c) {
819  if (c == NC_ONLY_DUP) continue;
820  NodeContinuation cont = static_cast<NodeContinuation>(c);
821  for (int is_dawg = 0; is_dawg < 2; ++is_dawg) {
822  int beam_index = BeamIndex(is_dawg, cont, 0);
823  int heap_size = last_beam->beams_[beam_index].size();
824  for (int h = 0; h < heap_size; ++h) {
825  const RecodeNode* node = &last_beam->beams_[beam_index].get(h).data;
826  if (is_dawg) {
827  // dawg_node may be a null_char, or duplicate, so scan back to the
828  // last valid unichar_id.
829  const RecodeNode* dawg_node = node;
830  while (dawg_node != NULL &&
831  (dawg_node->unichar_id == INVALID_UNICHAR_ID ||
832  dawg_node->duplicate))
833  dawg_node = dawg_node->prev;
834  if (dawg_node == NULL || (!dawg_node->end_of_word &&
835  dawg_node->unichar_id != UNICHAR_SPACE)) {
836  // Dawg node is not valid.
837  continue;
838  }
839  }
840  if (best_node == NULL || node->score > best_node->score) {
841  second_best_node = best_node;
842  best_node = node;
843  } else if (second_best_node == NULL ||
844  node->score > second_best_node->score) {
845  second_best_node = node;
846  }
847  }
848  }
849  }
850  if (second_nodes != NULL) ExtractPath(second_best_node, second_nodes);
851  ExtractPath(best_node, best_nodes);
852 }
853 
854 // Helper backtracks through the lattice from the given node, storing the
855 // path and reversing it.
856 void RecodeBeamSearch::ExtractPath(
857  const RecodeNode* node, GenericVector<const RecodeNode*>* path) const {
858  path->truncate(0);
859  while (node != NULL) {
860  path->push_back(node);
861  node = node->prev;
862  }
863  path->reverse();
864 }
865 
866 // Helper prints debug information on the given lattice path.
867 void RecodeBeamSearch::DebugPath(
868  const UNICHARSET* unicharset,
869  const GenericVector<const RecodeNode*>& path) const {
870  for (int c = 0; c < path.size(); ++c) {
871  const RecodeNode& node = *path[c];
872  tprintf("%d ", c);
873  node.Print(null_char_, *unicharset, 1);
874  }
875 }
876 
877 // Helper prints debug information on the given unichar path.
878 void RecodeBeamSearch::DebugUnicharPath(
879  const UNICHARSET* unicharset, const GenericVector<const RecodeNode*>& path,
880  const GenericVector<int>& unichar_ids, const GenericVector<float>& certs,
881  const GenericVector<float>& ratings,
882  const GenericVector<int>& xcoords) const {
883  int num_ids = unichar_ids.size();
884  double total_rating = 0.0;
885  for (int c = 0; c < num_ids; ++c) {
886  int coord = xcoords[c];
887  tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
888  unicharset->debug_str(unichar_ids[c]).string(), ratings[c],
889  certs[c], path[coord]->start_of_word, path[coord]->end_of_word,
890  path[coord]->permuter);
891  total_rating += ratings[c];
892  }
893  tprintf("Path total rating = %g\n", total_rating);
894 }
895 
896 } // namespace tesseract.
double u[max]
static bool IsDawgFromBeamsIndex(int index)
Definition: recodebeam.h:224
const UNICHARSET & getUnicharset() const
Definition: dict.h:97
const Pair & get(int index) const
Definition: genericheap.h:87
void Push(Pair *entry)
Definition: genericheap.h:95
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
int Width() const
Definition: networkio.h:107
void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:123
RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
Definition: recodebeam.cpp:62
float * f(int t)
Definition: networkio.h:115
void DebugBeams(const UNICHARSET &unicharset) const
Definition: recodebeam.cpp:210
void init_to_size(int size, T t)
static const float kMinCertainty
Definition: recodebeam.h:213
uint64_t uinT64
Definition: host.h:41
bool valid_end
Definition: dict.h:84
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
#define MAX_INT16
Definition: host.h:61
static const int kMaxCodeLen
const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
int push_back(T object)
const Pair & PeekTop() const
Definition: genericheap.h:108
int def_letter_is_okay(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.cpp:370
void Print(int null_char, const UNICHARSET &unicharset, int depth) const
Definition: recodebeam.cpp:42
void Reshuffle(Pair *pair)
Definition: genericheap.h:182
#define tprintf(...)
Definition: tprintf.h:31
static int LengthFromBeamsIndex(int index)
Definition: recodebeam.h:220
const char * string() const
Definition: strngs.cpp:198
void set_matrix_cell(int col, int row)
Definition: ratngs.h:156
bool empty() const
Definition: genericvector.h:90
void truncate(int size)
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:100
int size() const
Definition: genericvector.h:72
static const int kNumBeams
Definition: recodebeam.h:218
const char * kNodeContNames[]
Definition: recodebeam.cpp:39
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:853
inT16 left() const
Definition: rect.h:68
MATRIX * ratings
Definition: pageres.h:215
int dim1() const
Definition: matrix.h:201
GenericVector< Pair > * heap()
Definition: genericheap.h:83
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words)
Definition: recodebeam.cpp:138
void scale(const float f)
Definition: rect.h:171
BOOL8 combination
Definition: pageres.h:318
int dim2() const
Definition: matrix.h:202
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:893
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:612
int DecodeUnichar(const RecodedCharID &code) const
const RecodeNode * prev
Definition: recodebeam.h:164
DawgPositionVector * updated_dawgs
Definition: dict.h:81
static NodeContinuation ContinuationFromBeamsIndex(int index)
Definition: recodebeam.h:221
inT16 top() const
Definition: rect.h:54
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:228
Definition: rect.h:30
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)
Definition: recodebeam.cpp:76
T & back() const
void put(ICOORD pos, const T &thing)
Definition: matrix.h:215
#define MIN(x, y)
Definition: ndminx.h:28
Definition: matrix.h:563
void Set(int index, int value)
static float ProbToCertainty(float prob)
Definition: networkio.cpp:568
void default_dawgs(DawgPositionVector *anylength_dawgs, bool suppress_patterns) const
Definition: dict.cpp:585
inT16 height() const
Definition: rect.h:104
DawgPositionVector * active_dawgs
Definition: dict.h:80
int NumFeatures() const
Definition: networkio.h:111
NodeContinuation
Definition: recodebeam.h:69
int size() const
Definition: unicharset.h:299
inT16 bottom() const
Definition: rect.h:61
const UNICHARSET * uch_set
Definition: pageres.h:192
bool Pop(Pair *entry)
Definition: genericheap.h:118
float space_certainty
Definition: pageres.h:300
PermuterType permuter
Definition: recodebeam.h:144
Definition: werd.h:60
DawgPositionVector * dawgs
Definition: recodebeam.h:166
PermuterType
Definition: ratngs.h:240
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
integer coordinate
Definition: points.h:30