tesseract  4.00.00dev
intmatcher.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.c
3  ** Purpose: Generic high level classification routines.
4  ** Author: Robert Moss
5  ** History: Wed Feb 13 17:35:28 MST 1991, RWM, Created.
6  ** Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add
7  ** support for adaptive matching.
8  ** (c) Copyright Hewlett-Packard Company, 1988.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  ******************************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 /*----------------------------------------------------------------------------
26  Include Files and Type Defines
27 ----------------------------------------------------------------------------*/
28 #include "intmatcher.h"
29 
30 #include "fontinfo.h"
31 #include "intproto.h"
32 #include "callcpp.h"
33 #include "scrollview.h"
34 #include "float2int.h"
35 #include "globals.h"
36 #include "helpers.h"
37 #include "classify.h"
38 #include "shapetable.h"
39 #include <math.h>
40 
43 
44 /*----------------------------------------------------------------------------
45  Global Data Definitions and Declarations
46 ----------------------------------------------------------------------------*/
47 // Parameters of the sigmoid used to convert similarity to evidence in the
48 // similarity_evidence_table_ that is used to convert distance metric to an
49 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
51 const float IntegerMatcher::kSimilarityCenter = 0.0075;
52 
53 #define offset_table_entries \
54  255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
55  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
56  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, \
57  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, \
58  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
59  0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
60  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, \
61  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, \
62  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
63  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
64  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
65 
66 #define INTMATCHER_OFFSET_TABLE_SIZE 256
67 
68 #define next_table_entries \
69  0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, \
70  0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, \
71  0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, \
72  0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32, \
73  0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, \
74  0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, \
75  0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, \
76  0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62, \
77  0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, \
78  0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, \
79  0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, \
80  0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92, \
81  0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, \
82  0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, \
83  0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, \
84  0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2, \
85  0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, \
86  0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, \
87  0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, \
88  0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2, \
89  0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe
90 
91 // See http://b/19318793 (#6) for a complete discussion. Merging arrays
92 // offset_table and next_table helps improve performance of PIE code.
93 static const uinT8 data_table[512] = {offset_table_entries, next_table_entries};
94 
95 static const uinT8* const offset_table = &data_table[0];
96 static const uinT8* const next_table =
97  &data_table[INTMATCHER_OFFSET_TABLE_SIZE];
98 
99 namespace tesseract {
100 
101 // Encapsulation of the intermediate data and computations made by the class
102 // pruner. The class pruner implements a simple linear classifier on binary
103 // features by heavily quantizing the feature space, and applying
104 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
105 // weights is compensated by a non-constant bias that is dependent on the
106 // number of features present.
107 class ClassPruner {
108  public:
109  ClassPruner(int max_classes) {
110  // The unrolled loop in ComputeScores means that the array sizes need to
111  // be rounded up so that the array is big enough to accommodate the extra
112  // entries accessed by the unrolling. Each pruner word is of sized
113  // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
114  // BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
115  // See ComputeScores.
116  max_classes_ = max_classes;
117  rounded_classes_ = RoundUp(
119  class_count_ = new int[rounded_classes_];
120  norm_count_ = new int[rounded_classes_];
121  sort_key_ = new int[rounded_classes_ + 1];
122  sort_index_ = new int[rounded_classes_ + 1];
123  for (int i = 0; i < rounded_classes_; i++) {
124  class_count_[i] = 0;
125  }
126  pruning_threshold_ = 0;
127  num_features_ = 0;
128  num_classes_ = 0;
129  }
130 
132  delete []class_count_;
133  delete []norm_count_;
134  delete []sort_key_;
135  delete []sort_index_;
136  }
137 
140  void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
141  int num_features, const INT_FEATURE_STRUCT* features) {
142  num_features_ = num_features;
143  int num_pruners = int_templates->NumClassPruners;
144  for (int f = 0; f < num_features; ++f) {
145  const INT_FEATURE_STRUCT* feature = &features[f];
146  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
147  int x = feature->X * NUM_CP_BUCKETS >> 8;
148  int y = feature->Y * NUM_CP_BUCKETS >> 8;
149  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
150  int class_id = 0;
151  // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
152  // we need a collection of them, indexed by pruner_set.
153  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
154  // Look up quantized feature in a 3-D array, an array of weights for
155  // each class.
156  const uinT32* pruner_word_ptr =
157  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
158  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
159  uinT32 pruner_word = *pruner_word_ptr++;
160  // This inner loop is unrolled to speed up the ClassPruner.
161  // Currently gcc would not unroll it unless it is set to O3
162  // level of optimization or -funroll-loops is specified.
163  /*
164  uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
165  for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
166  class_count_[class_id++] += pruner_word & class_mask;
167  pruner_word >>= NUM_BITS_PER_CLASS;
168  }
169  */
170  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
171  pruner_word >>= NUM_BITS_PER_CLASS;
172  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
173  pruner_word >>= NUM_BITS_PER_CLASS;
174  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
175  pruner_word >>= NUM_BITS_PER_CLASS;
176  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
177  pruner_word >>= NUM_BITS_PER_CLASS;
178  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
179  pruner_word >>= NUM_BITS_PER_CLASS;
180  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
181  pruner_word >>= NUM_BITS_PER_CLASS;
182  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
183  pruner_word >>= NUM_BITS_PER_CLASS;
184  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
185  pruner_word >>= NUM_BITS_PER_CLASS;
186  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
187  pruner_word >>= NUM_BITS_PER_CLASS;
188  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
189  pruner_word >>= NUM_BITS_PER_CLASS;
190  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
191  pruner_word >>= NUM_BITS_PER_CLASS;
192  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
193  pruner_word >>= NUM_BITS_PER_CLASS;
194  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
195  pruner_word >>= NUM_BITS_PER_CLASS;
196  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
197  pruner_word >>= NUM_BITS_PER_CLASS;
198  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
199  pruner_word >>= NUM_BITS_PER_CLASS;
200  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
201  }
202  }
203  }
204  }
205 
211  void AdjustForExpectedNumFeatures(const uinT16* expected_num_features,
212  int cutoff_strength) {
213  for (int class_id = 0; class_id < max_classes_; ++class_id) {
214  if (num_features_ < expected_num_features[class_id]) {
215  int deficit = expected_num_features[class_id] - num_features_;
216  class_count_[class_id] -= class_count_[class_id] * deficit /
217  (num_features_ * cutoff_strength + deficit);
218  }
219  }
220  }
221 
224  void DisableDisabledClasses(const UNICHARSET& unicharset) {
225  for (int class_id = 0; class_id < max_classes_; ++class_id) {
226  if (!unicharset.get_enabled(class_id))
227  class_count_[class_id] = 0; // This char is disabled!
228  }
229  }
230 
232  void DisableFragments(const UNICHARSET& unicharset) {
233  for (int class_id = 0; class_id < max_classes_; ++class_id) {
234  // Do not include character fragments in the class pruner
235  // results if disable_character_fragments is true.
236  if (unicharset.get_fragment(class_id)) {
237  class_count_[class_id] = 0;
238  }
239  }
240  }
241 
246  void NormalizeForXheight(int norm_multiplier,
247  const uinT8* normalization_factors) {
248  for (int class_id = 0; class_id < max_classes_; class_id++) {
249  norm_count_[class_id] = class_count_[class_id] -
250  ((norm_multiplier * normalization_factors[class_id]) >> 8);
251  }
252  }
253 
256  for (int class_id = 0; class_id < max_classes_; class_id++) {
257  norm_count_[class_id] = class_count_[class_id];
258  }
259  }
260 
264  void PruneAndSort(int pruning_factor, int keep_this,
265  bool max_of_non_fragments, const UNICHARSET& unicharset) {
266  int max_count = 0;
267  for (int c = 0; c < max_classes_; ++c) {
268  if (norm_count_[c] > max_count &&
269  // This additional check is added in order to ensure that
270  // the classifier will return at least one non-fragmented
271  // character match.
272  // TODO(daria): verify that this helps accuracy and does not
273  // hurt performance.
274  (!max_of_non_fragments || !unicharset.get_fragment(c))) {
275  max_count = norm_count_[c];
276  }
277  }
278  // Prune Classes.
279  pruning_threshold_ = (max_count * pruning_factor) >> 8;
280  // Select Classes.
281  if (pruning_threshold_ < 1)
282  pruning_threshold_ = 1;
283  num_classes_ = 0;
284  for (int class_id = 0; class_id < max_classes_; class_id++) {
285  if (norm_count_[class_id] >= pruning_threshold_ ||
286  class_id == keep_this) {
287  ++num_classes_;
288  sort_index_[num_classes_] = class_id;
289  sort_key_[num_classes_] = norm_count_[class_id];
290  }
291  }
292 
293  // Sort Classes using Heapsort Algorithm.
294  if (num_classes_ > 1)
295  HeapSort(num_classes_, sort_key_, sort_index_);
296  }
297 
300  void DebugMatch(const Classify& classify,
301  const INT_TEMPLATES_STRUCT* int_templates,
302  const INT_FEATURE_STRUCT* features) const {
303  int num_pruners = int_templates->NumClassPruners;
304  int max_num_classes = int_templates->NumClasses;
305  for (int f = 0; f < num_features_; ++f) {
306  const INT_FEATURE_STRUCT* feature = &features[f];
307  tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
308  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
309  int x = feature->X * NUM_CP_BUCKETS >> 8;
310  int y = feature->Y * NUM_CP_BUCKETS >> 8;
311  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
312  int class_id = 0;
313  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
314  // Look up quantized feature in a 3-D array, an array of weights for
315  // each class.
316  const uinT32* pruner_word_ptr =
317  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
318  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
319  uinT32 pruner_word = *pruner_word_ptr++;
320  for (int word_class = 0; word_class < 16 &&
321  class_id < max_num_classes; ++word_class, ++class_id) {
322  if (norm_count_[class_id] >= pruning_threshold_) {
323  tprintf(" %s=%d,",
324  classify.ClassIDToDebugStr(int_templates,
325  class_id, 0).string(),
326  pruner_word & CLASS_PRUNER_CLASS_MASK);
327  }
328  pruner_word >>= NUM_BITS_PER_CLASS;
329  }
330  }
331  tprintf("\n");
332  }
333  }
334  }
335 
337  void SummarizeResult(const Classify& classify,
338  const INT_TEMPLATES_STRUCT* int_templates,
339  const uinT16* expected_num_features,
340  int norm_multiplier,
341  const uinT8* normalization_factors) const {
342  tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
343  for (int i = 0; i < num_classes_; ++i) {
344  int class_id = sort_index_[num_classes_ - i];
345  STRING class_string = classify.ClassIDToDebugStr(int_templates,
346  class_id, 0);
347  tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
348  class_string.string(),
349  class_count_[class_id],
350  expected_num_features[class_id],
351  (norm_multiplier * normalization_factors[class_id]) >> 8,
352  sort_key_[num_classes_ - i],
353  100.0 - 100.0 * sort_key_[num_classes_ - i] /
354  (CLASS_PRUNER_CLASS_MASK * num_features_));
355  }
356  }
357 
361  CP_RESULT_STRUCT empty;
362  results->init_to_size(num_classes_, empty);
363  for (int c = 0; c < num_classes_; ++c) {
364  (*results)[c].Class = sort_index_[num_classes_ - c];
365  (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
366  (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
367  }
368  return num_classes_;
369  }
370 
371  private:
373  int *class_count_;
377  int *norm_count_;
379  int *sort_key_;
381  int *sort_index_;
383  int max_classes_;
385  int rounded_classes_;
387  int pruning_threshold_;
389  int num_features_;
391  int num_classes_;
392 };
393 
394 /*----------------------------------------------------------------------------
395  Public Code
396 ----------------------------------------------------------------------------*/
413  int num_features, int keep_this,
415  const uinT8* normalization_factors,
416  const uinT16* expected_num_features,
418  ClassPruner pruner(int_templates->NumClasses);
419  // Compute initial match scores for all classes.
420  pruner.ComputeScores(int_templates, num_features, features);
421  // Adjust match scores for number of expected features.
422  pruner.AdjustForExpectedNumFeatures(expected_num_features,
423  classify_cp_cutoff_strength);
424  // Apply disabled classes in unicharset - only works without a shape_table.
425  if (shape_table_ == NULL)
426  pruner.DisableDisabledClasses(unicharset);
427  // If fragments are disabled, remove them, also only without a shape table.
428  if (disable_character_fragments && shape_table_ == NULL)
429  pruner.DisableFragments(unicharset);
430 
431  // If we have good x-heights, apply the given normalization factors.
432  if (normalization_factors != NULL) {
433  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
434  normalization_factors);
435  } else {
436  pruner.NoNormalization();
437  }
438  // Do the actual pruning and sort the short-list.
439  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
440  shape_table_ == NULL, unicharset);
441 
442  if (classify_debug_level > 2) {
443  pruner.DebugMatch(*this, int_templates, features);
444  }
445  if (classify_debug_level > 1) {
446  pruner.SummarizeResult(*this, int_templates, expected_num_features,
447  classify_class_pruner_multiplier,
448  normalization_factors);
449  }
450  // Convert to the expected output format.
451  return pruner.SetupResults(results);
452 }
453 
454 } // namespace tesseract
455 
475 void IntegerMatcher::Match(INT_CLASS ClassTemplate,
476  BIT_VECTOR ProtoMask,
477  BIT_VECTOR ConfigMask,
478  inT16 NumFeatures,
479  const INT_FEATURE_STRUCT* Features,
480  UnicharRating* Result,
481  int AdaptFeatureThreshold,
482  int Debug,
483  bool SeparateDebugWindows) {
484  ScratchEvidence *tables = new ScratchEvidence();
485  int Feature;
486  int BestMatch;
487 
488  if (MatchDebuggingOn (Debug))
489  cprintf ("Integer Matcher -------------------------------------------\n");
490 
491  tables->Clear(ClassTemplate);
492  Result->feature_misses = 0;
493 
494  for (Feature = 0; Feature < NumFeatures; Feature++) {
495  int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
496  Feature, &Features[Feature],
497  tables, Debug);
498  // Count features that were missed over all configs.
499  if (csum == 0)
500  ++Result->feature_misses;
501  }
502 
503 #ifndef GRAPHICS_DISABLED
504  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
505  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
506  NumFeatures, Debug);
507  }
508 
509  if (DisplayProtoMatchesOn(Debug)) {
510  DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask,
511  *tables, SeparateDebugWindows);
512  }
513 
514  if (DisplayFeatureMatchesOn(Debug)) {
515  DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
516  Features, AdaptFeatureThreshold, Debug,
517  SeparateDebugWindows);
518  }
519 #endif
520 
521  tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures);
522  tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures);
523 
524  BestMatch = FindBestMatch(ClassTemplate, *tables, Result);
525 
526 #ifndef GRAPHICS_DISABLED
527  if (PrintMatchSummaryOn(Debug))
528  Result->Print();
529 
530  if (MatchDebuggingOn(Debug))
531  cprintf("Match Complete --------------------------------------------\n");
532 #endif
533 
534  delete tables;
535 }
536 
558  INT_CLASS ClassTemplate,
559  BIT_VECTOR ProtoMask,
560  BIT_VECTOR ConfigMask,
561  uinT16 BlobLength,
562  inT16 NumFeatures,
563  INT_FEATURE_ARRAY Features,
564  PROTO_ID *ProtoArray,
565  int AdaptProtoThreshold,
566  int Debug) {
567  ScratchEvidence *tables = new ScratchEvidence();
568  int NumGoodProtos = 0;
569 
570  /* DEBUG opening heading */
571  if (MatchDebuggingOn (Debug))
572  cprintf
573  ("Find Good Protos -------------------------------------------\n");
574 
575  tables->Clear(ClassTemplate);
576 
577  for (int Feature = 0; Feature < NumFeatures; Feature++)
578  UpdateTablesForFeature(
579  ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
580  tables, Debug);
581 
582 #ifndef GRAPHICS_DISABLED
583  if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
584  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
585  NumFeatures, Debug);
586 #endif
587 
588  /* Average Proto Evidences & Find Good Protos */
589  for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
590  /* Compute Average for Actual Proto */
591  int Temp = 0;
592  for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++)
593  Temp += tables->proto_evidence_[proto][i];
594 
595  Temp /= ClassTemplate->ProtoLengths[proto];
596 
597  /* Find Good Protos */
598  if (Temp >= AdaptProtoThreshold) {
599  *ProtoArray = proto;
600  ProtoArray++;
601  NumGoodProtos++;
602  }
603  }
604 
605  if (MatchDebuggingOn (Debug))
606  cprintf ("Match Complete --------------------------------------------\n");
607  delete tables;
608 
609  return NumGoodProtos;
610 }
611 
628  INT_CLASS ClassTemplate,
629  BIT_VECTOR ProtoMask,
630  BIT_VECTOR ConfigMask,
631  uinT16 BlobLength,
632  inT16 NumFeatures,
633  INT_FEATURE_ARRAY Features,
634  FEATURE_ID *FeatureArray,
635  int AdaptFeatureThreshold,
636  int Debug) {
637  ScratchEvidence *tables = new ScratchEvidence();
638  int NumBadFeatures = 0;
639 
640  /* DEBUG opening heading */
641  if (MatchDebuggingOn(Debug))
642  cprintf("Find Bad Features -------------------------------------------\n");
643 
644  tables->Clear(ClassTemplate);
645 
646  for (int Feature = 0; Feature < NumFeatures; Feature++) {
647  UpdateTablesForFeature(
648  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
649  tables, Debug);
650 
651  /* Find Best Evidence for Current Feature */
652  int best = 0;
653  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
654  if (tables->feature_evidence_[i] > best)
655  best = tables->feature_evidence_[i];
656 
657  /* Find Bad Features */
658  if (best < AdaptFeatureThreshold) {
659  *FeatureArray = Feature;
660  FeatureArray++;
661  NumBadFeatures++;
662  }
663  }
664 
665 #ifndef GRAPHICS_DISABLED
666  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
667  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
668  NumFeatures, Debug);
669 #endif
670 
671  if (MatchDebuggingOn(Debug))
672  cprintf("Match Complete --------------------------------------------\n");
673 
674  delete tables;
675  return NumBadFeatures;
676 }
677 
678 
679 void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
680  classify_debug_level_ = classify_debug_level;
681 
682  /* Initialize table for evidence to similarity lookup */
683  for (int i = 0; i < SE_TABLE_SIZE; i++) {
684  uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
685  double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0;
686  double evidence = Similarity / kSimilarityCenter;
687  evidence = 255.0 / (evidence * evidence + 1.0);
688 
689  if (kSEExponentialMultiplier > 0.0) {
690  double scale = 1.0 - exp(-kSEExponentialMultiplier) *
691  exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE));
692  evidence *= ClipToRange(scale, 0.0, 1.0);
693  }
694 
695  similarity_evidence_table_[i] = (uinT8) (evidence + 0.5);
696  }
697 
698  /* Initialize evidence computation variables */
699  evidence_table_mask_ =
700  ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
701  mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
702  table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
703  evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
704 }
705 
706 /*----------------------------------------------------------------------------
707  Private Code
708 ----------------------------------------------------------------------------*/
709 void ScratchEvidence::Clear(const INT_CLASS class_template) {
710  memset(sum_feature_evidence_, 0,
711  class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
712  memset(proto_evidence_, 0,
713  class_template->NumProtos * sizeof(proto_evidence_[0]));
714 }
715 
717  memset(feature_evidence_, 0,
718  class_template->NumConfigs * sizeof(feature_evidence_[0]));
719 }
720 
727 void IMDebugConfiguration(int FeatureNum,
728  uinT16 ActualProtoNum,
729  uinT8 Evidence,
730  BIT_VECTOR ConfigMask,
731  uinT32 ConfigWord) {
732  cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
733  FeatureNum, (int) ActualProtoNum, (int) Evidence);
734  while (ConfigWord) {
735  if (ConfigWord & 1)
736  cprintf ("1");
737  else
738  cprintf ("0");
739  ConfigWord >>= 1;
740  }
741  cprintf ("\n");
742 }
743 
750 void IMDebugConfigurationSum(int FeatureNum,
751  uinT8 *FeatureEvidence,
752  inT32 ConfigCount) {
753  cprintf("F=%3d, C=", FeatureNum);
754  for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
755  cprintf("%4d", FeatureEvidence[ConfigNum]);
756  }
757  cprintf("\n");
758 }
759 
771 int IntegerMatcher::UpdateTablesForFeature(
772  INT_CLASS ClassTemplate,
773  BIT_VECTOR ProtoMask,
774  BIT_VECTOR ConfigMask,
775  int FeatureNum,
776  const INT_FEATURE_STRUCT* Feature,
777  ScratchEvidence *tables,
778  int Debug) {
779  uinT32 ConfigWord;
780  uinT32 ProtoWord;
781  uinT32 ProtoNum;
782  uinT32 ActualProtoNum;
783  uinT8 proto_byte;
784  inT32 proto_word_offset;
785  inT32 proto_offset;
786  uinT8 config_byte;
787  inT32 config_offset;
788  PROTO_SET ProtoSet;
789  uinT32 *ProtoPrunerPtr;
790  INT_PROTO Proto;
791  int ProtoSetIndex;
792  uinT8 Evidence;
793  uinT32 XFeatureAddress;
794  uinT32 YFeatureAddress;
795  uinT32 ThetaFeatureAddress;
796  uinT8* UINT8Pointer;
797  int ProtoIndex;
798  uinT8 Temp;
799  int* IntPointer;
800  int ConfigNum;
801  inT32 M3;
802  inT32 A3;
803  uinT32 A4;
804 
805  tables->ClearFeatureEvidence(ClassTemplate);
806 
807  /* Precompute Feature Address offset for Proto Pruning */
808  XFeatureAddress = ((Feature->X >> 2) << 1);
809  YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
810  ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
811 
812  for (ProtoSetIndex = 0, ActualProtoNum = 0;
813  ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
814  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
815  ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner);
816  for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
817  ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
818  (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
819  /* Prune Protos of current Proto Set */
820  ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
821  ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
822  ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
823  ProtoWord &= *ProtoMask;
824 
825  if (ProtoWord != 0) {
826  proto_byte = ProtoWord & 0xff;
827  ProtoWord >>= 8;
828  proto_word_offset = 0;
829  while (ProtoWord != 0 || proto_byte != 0) {
830  while (proto_byte == 0) {
831  proto_byte = ProtoWord & 0xff;
832  ProtoWord >>= 8;
833  proto_word_offset += 8;
834  }
835  proto_offset = offset_table[proto_byte] + proto_word_offset;
836  proto_byte = next_table[proto_byte];
837  Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
838  ConfigWord = Proto->Configs[0];
839  A3 = (((Proto->A * (Feature->X - 128)) << 1)
840  - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9));
841  M3 =
842  (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1;
843 
844  if (A3 < 0)
845  A3 = ~A3;
846  if (M3 < 0)
847  M3 = ~M3;
848  A3 >>= mult_trunc_shift_bits_;
849  M3 >>= mult_trunc_shift_bits_;
850  if (A3 > evidence_mult_mask_)
851  A3 = evidence_mult_mask_;
852  if (M3 > evidence_mult_mask_)
853  M3 = evidence_mult_mask_;
854 
855  A4 = (A3 * A3) + (M3 * M3);
856  A4 >>= table_trunc_shift_bits_;
857  if (A4 > evidence_table_mask_)
858  Evidence = 0;
859  else
860  Evidence = similarity_evidence_table_[A4];
861 
862  if (PrintFeatureMatchesOn (Debug))
863  IMDebugConfiguration (FeatureNum,
864  ActualProtoNum + proto_offset,
865  Evidence, ConfigMask, ConfigWord);
866 
867  ConfigWord &= *ConfigMask;
868 
869  UINT8Pointer = tables->feature_evidence_ - 8;
870  config_byte = 0;
871  while (ConfigWord != 0 || config_byte != 0) {
872  while (config_byte == 0) {
873  config_byte = ConfigWord & 0xff;
874  ConfigWord >>= 8;
875  UINT8Pointer += 8;
876  }
877  config_offset = offset_table[config_byte];
878  config_byte = next_table[config_byte];
879  if (Evidence > UINT8Pointer[config_offset])
880  UINT8Pointer[config_offset] = Evidence;
881  }
882 
883  UINT8Pointer =
884  &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
885  for (ProtoIndex =
886  ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
887  ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
888  if (Evidence > *UINT8Pointer) {
889  Temp = *UINT8Pointer;
890  *UINT8Pointer = Evidence;
891  Evidence = Temp;
892  }
893  else if (Evidence == 0)
894  break;
895  }
896  }
897  }
898  }
899  }
900 
901  if (PrintFeatureMatchesOn(Debug)) {
902  IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
903  ClassTemplate->NumConfigs);
904  }
905 
906  IntPointer = tables->sum_feature_evidence_;
907  UINT8Pointer = tables->feature_evidence_;
908  int SumOverConfigs = 0;
909  for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
910  int evidence = *UINT8Pointer++;
911  SumOverConfigs += evidence;
912  *IntPointer++ += evidence;
913  }
914  return SumOverConfigs;
915 }
916 
923 #ifndef GRAPHICS_DISABLED
924 void IntegerMatcher::DebugFeatureProtoError(
925  INT_CLASS ClassTemplate,
926  BIT_VECTOR ProtoMask,
927  BIT_VECTOR ConfigMask,
928  const ScratchEvidence& tables,
929  inT16 NumFeatures,
930  int Debug) {
931  FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS];
932  int ConfigNum;
933  uinT32 ConfigWord;
934  int ProtoSetIndex;
935  uinT16 ProtoNum;
936  uinT8 ProtoWordNum;
937  PROTO_SET ProtoSet;
938  uinT16 ActualProtoNum;
939 
940  if (PrintMatchSummaryOn(Debug)) {
941  cprintf("Configuration Mask:\n");
942  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
943  cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
944  cprintf("\n");
945 
946  cprintf("Feature Error for Configurations:\n");
947  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
948  cprintf(
949  " %5.1f",
950  100.0 * (1.0 -
951  (FLOAT32) tables.sum_feature_evidence_[ConfigNum]
952  / NumFeatures / 256.0));
953  }
954  cprintf("\n\n\n");
955  }
956 
957  if (PrintMatchSummaryOn (Debug)) {
958  cprintf ("Proto Mask:\n");
959  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
960  ProtoSetIndex++) {
961  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
962  for (ProtoWordNum = 0; ProtoWordNum < 2;
963  ProtoWordNum++, ProtoMask++) {
964  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
965  for (ProtoNum = 0;
966  ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
967  && (ActualProtoNum < ClassTemplate->NumProtos));
968  ProtoNum++, ActualProtoNum++)
969  cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
970  cprintf ("\n");
971  }
972  }
973  cprintf ("\n");
974  }
975 
976  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
977  ProtoConfigs[i] = 0;
978 
979  if (PrintProtoMatchesOn (Debug)) {
980  cprintf ("Proto Evidence:\n");
981  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
982  ProtoSetIndex++) {
983  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
984  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
985  for (ProtoNum = 0;
986  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
987  (ActualProtoNum < ClassTemplate->NumProtos));
988  ProtoNum++, ActualProtoNum++) {
989  cprintf ("P %3d =", ActualProtoNum);
990  int temp = 0;
991  for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
992  uinT8 data = tables.proto_evidence_[ActualProtoNum][j];
993  cprintf(" %d", data);
994  temp += data;
995  }
996 
997  cprintf(" = %6.4f%%\n",
998  temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
999 
1000  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1001  ConfigNum = 0;
1002  while (ConfigWord) {
1003  cprintf ("%5d", ConfigWord & 1 ? temp : 0);
1004  if (ConfigWord & 1)
1005  ProtoConfigs[ConfigNum] += temp;
1006  ConfigNum++;
1007  ConfigWord >>= 1;
1008  }
1009  cprintf("\n");
1010  }
1011  }
1012  }
1013 
1014  if (PrintMatchSummaryOn (Debug)) {
1015  cprintf ("Proto Error for Configurations:\n");
1016  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1017  cprintf (" %5.1f",
1018  100.0 * (1.0 -
1019  ProtoConfigs[ConfigNum] /
1020  ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
1021  cprintf ("\n\n");
1022  }
1023 
1024  if (PrintProtoMatchesOn (Debug)) {
1025  cprintf ("Proto Sum for Configurations:\n");
1026  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1027  cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
1028  cprintf ("\n\n");
1029 
1030  cprintf ("Proto Length for Configurations:\n");
1031  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1032  cprintf (" %4.1f",
1033  (float) ClassTemplate->ConfigLengths[ConfigNum]);
1034  cprintf ("\n\n");
1035  }
1036 
1037 }
1038 
1039 void IntegerMatcher::DisplayProtoDebugInfo(
1040  INT_CLASS ClassTemplate,
1041  BIT_VECTOR ProtoMask,
1042  BIT_VECTOR ConfigMask,
1043  const ScratchEvidence& tables,
1044  bool SeparateDebugWindows) {
1045  uinT16 ProtoNum;
1046  uinT16 ActualProtoNum;
1047  PROTO_SET ProtoSet;
1048  int ProtoSetIndex;
1049 
1051  if (SeparateDebugWindows) {
1054  }
1055 
1056 
1057  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1058  ProtoSetIndex++) {
1059  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1060  ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
1061  for (ProtoNum = 0;
1062  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
1063  (ActualProtoNum < ClassTemplate->NumProtos));
1064  ProtoNum++, ActualProtoNum++) {
1065  /* Compute Average for Actual Proto */
1066  int temp = 0;
1067  for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1068  temp += tables.proto_evidence_[ActualProtoNum][i];
1069 
1070  temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
1071 
1072  if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
1073  DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
1074  }
1075  }
1076  }
1077 }
1078 
1079 
1080 void IntegerMatcher::DisplayFeatureDebugInfo(
1081  INT_CLASS ClassTemplate,
1082  BIT_VECTOR ProtoMask,
1083  BIT_VECTOR ConfigMask,
1084  inT16 NumFeatures,
1085  const INT_FEATURE_STRUCT* Features,
1086  int AdaptFeatureThreshold,
1087  int Debug,
1088  bool SeparateDebugWindows) {
1089  ScratchEvidence *tables = new ScratchEvidence();
1090 
1091  tables->Clear(ClassTemplate);
1092 
1094  if (SeparateDebugWindows) {
1097  }
1098 
1099  for (int Feature = 0; Feature < NumFeatures; Feature++) {
1100  UpdateTablesForFeature(
1101  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
1102  tables, 0);
1103 
1104  /* Find Best Evidence for Current Feature */
1105  int best = 0;
1106  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
1107  if (tables->feature_evidence_[i] > best)
1108  best = tables->feature_evidence_[i];
1109 
1110  /* Update display for current feature */
1111  if (ClipMatchEvidenceOn(Debug)) {
1112  if (best < AdaptFeatureThreshold)
1113  DisplayIntFeature(&Features[Feature], 0.0);
1114  else
1115  DisplayIntFeature(&Features[Feature], 1.0);
1116  } else {
1117  DisplayIntFeature(&Features[Feature], best / 255.0);
1118  }
1119  }
1120 
1121  delete tables;
1122 }
1123 #endif
1124 
1129  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) {
1130 
1131  int *IntPointer;
1132  uinT32 ConfigWord;
1133  int ProtoSetIndex;
1134  uinT16 ProtoNum;
1135  PROTO_SET ProtoSet;
1136  int NumProtos;
1137  uinT16 ActualProtoNum;
1138 
1139  NumProtos = ClassTemplate->NumProtos;
1140 
1141  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1142  ProtoSetIndex++) {
1143  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1144  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
1145  for (ProtoNum = 0;
1146  ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
1147  ProtoNum++, ActualProtoNum++) {
1148  int temp = 0;
1149  for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1150  temp += proto_evidence_[ActualProtoNum] [i];
1151 
1152  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1153  ConfigWord &= *ConfigMask;
1154  IntPointer = sum_feature_evidence_;
1155  while (ConfigWord) {
1156  if (ConfigWord & 1)
1157  *IntPointer += temp;
1158  IntPointer++;
1159  ConfigWord >>= 1;
1160  }
1161  }
1162  }
1163 }
1164 
1170  INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) {
1171 
1172  for (int i = 0; i < ClassTemplate->NumConfigs; i++) {
1173  sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) /
1174  (NumFeatures + ClassTemplate->ConfigLengths[i]);
1175  }
1176 }
1177 
1185 int IntegerMatcher::FindBestMatch(
1186  INT_CLASS class_template,
1187  const ScratchEvidence &tables,
1188  UnicharRating* result) {
1189  int best_match = 0;
1190  result->config = 0;
1191  result->fonts.truncate(0);
1192  result->fonts.reserve(class_template->NumConfigs);
1193 
1194  /* Find best match */
1195  for (int c = 0; c < class_template->NumConfigs; ++c) {
1196  int rating = tables.sum_feature_evidence_[c];
1197  if (*classify_debug_level_ > 2)
1198  tprintf("Config %d, rating=%d\n", c, rating);
1199  if (rating > best_match) {
1200  result->config = c;
1201  best_match = rating;
1202  }
1203  result->fonts.push_back(ScoredFont(c, rating));
1204  }
1205 
1206  // Compute confidence on a Probability scale.
1207  result->rating = best_match / 65536.0f;
1208 
1209  return best_match;
1210 }
1211 
1216 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
1217  int normalization_factor,
1218  int matcher_multiplier) {
1219  return (rating * blob_length +
1220  matcher_multiplier * normalization_factor / 256.0) /
1221  (blob_length + matcher_multiplier);
1222 }
1223 
1235 void
1236 HeapSort (int n, register int ra[], register int rb[]) {
1237  int i, rra, rrb;
1238  int l, j, ir;
1239 
1240  l = (n >> 1) + 1;
1241  ir = n;
1242  for (;;) {
1243  if (l > 1) {
1244  rra = ra[--l];
1245  rrb = rb[l];
1246  }
1247  else {
1248  rra = ra[ir];
1249  rrb = rb[ir];
1250  ra[ir] = ra[1];
1251  rb[ir] = rb[1];
1252  if (--ir == 1) {
1253  ra[1] = rra;
1254  rb[1] = rrb;
1255  return;
1256  }
1257  }
1258  i = l;
1259  j = l << 1;
1260  while (j <= ir) {
1261  if (j < ir && ra[j] < ra[j + 1])
1262  ++j;
1263  if (rra < ra[j]) {
1264  ra[i] = ra[j];
1265  rb[i] = rb[j];
1266  j += (i = j);
1267  }
1268  else
1269  j = ir + 1;
1270  }
1271  ra[i] = rra;
1272  rb[i] = rrb;
1273  }
1274 }
void InitFeatureDisplayWindowIfReqd()
Definition: intproto.cpp:1849
void DisableDisabledClasses(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:224
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
static const float kSEExponentialMultiplier
Definition: intmatcher.h:92
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
void HeapSort(int n, register int ra[], register int rb[])
#define PrintMatchSummaryOn(D)
Definition: intproto.h:198
void DisableFragments(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:232
#define DisplayProtoMatchesOn(D)
Definition: intproto.h:200
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:97
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
void ComputeScores(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features)
Definition: intmatcher.cpp:140
int32_t inT32
Definition: host.h:38
void DebugMatch(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const
Definition: intmatcher.cpp:300
int SetupResults(GenericVector< CP_RESULT_STRUCT > *results) const
Definition: intmatcher.cpp:360
#define MatchDebuggingOn(D)
Definition: intproto.h:197
void init_to_size(int size, T t)
#define ClipMatchEvidenceOn(D)
Definition: intproto.h:203
uinT8 NumProtoSets
Definition: intproto.h:109
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:694
uinT8 NumConfigs
Definition: intproto.h:110
void NormalizeForXheight(int norm_multiplier, const uinT8 *normalization_factors)
Definition: intmatcher.cpp:246
void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, FLOAT32 Evidence)
Definition: intproto.cpp:623
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
static const float kSimilarityCenter
Definition: intmatcher.h:94
#define tprintf(...)
Definition: tprintf.h:31
#define SE_TABLE_BITS
Definition: intmatcher.h:66
const char * string() const
Definition: strngs.cpp:198
#define INTMATCHER_OFFSET_TABLE_SIZE
Definition: intmatcher.cpp:66
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:709
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
uinT16 NumProtos
Definition: intproto.h:108
#define BITS_PER_WERD
Definition: intproto.h:44
uinT8 * ProtoLengths
Definition: intproto.h:112
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
bool disable_character_fragments
int16_t inT16
Definition: host.h:36
#define NUM_CP_BUCKETS
Definition: intproto.h:52
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:838
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:716
#define next_table_entries
Definition: intmatcher.cpp:68
uint32_t uinT32
Definition: host.h:39
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:679
void IMDebugConfiguration(int FeatureNum, uinT16 ActualProtoNum, uinT8 Evidence, BIT_VECTOR ConfigMask, uinT32 ConfigWord)
Definition: intmatcher.cpp:727
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:71
Definition: strngs.h:45
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:557
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:122
void SummarizeResult(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uinT16 *expected_num_features, int norm_multiplier, const uinT8 *normalization_factors) const
Definition: intmatcher.cpp:337
#define PrintFeatureMatchesOn(D)
Definition: intproto.h:201
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
uinT32 Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:86
#define NUM_PP_BUCKETS
Definition: intproto.h:51
#define SE_TABLE_SIZE
Definition: intmatcher.h:67
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:627
ClassPruner(int max_classes)
Definition: intmatcher.cpp:109
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1817
void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features)
void IMDebugConfigurationSum(int FeatureNum, uinT8 *FeatureEvidence, inT32 ConfigCount)
Definition: intmatcher.cpp:750
float FLOAT32
Definition: host.h:42
int8_t inT8
Definition: host.h:34
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
void InitProtoDisplayWindowIfReqd()
Definition: intproto.cpp:1838
uint8_t uinT8
Definition: host.h:35
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:412
#define offset_table_entries
Definition: intmatcher.cpp:53
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
void PruneAndSort(int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset)
Definition: intmatcher.cpp:264
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence)
Definition: intproto.cpp:644
#define CLASS_PRUNER_CLASS_MASK
Definition: intproto.h:55
uinT8 FEATURE_ID
Definition: matchdefs.h:47
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
#define DisplayFeatureMatchesOn(D)
Definition: intproto.h:199
uint16_t uinT16
Definition: host.h:37
void AdjustForExpectedNumFeatures(const uinT16 *expected_num_features, int cutoff_strength)
Definition: intmatcher.cpp:211
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
uinT8 feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:70
uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:72
const char features[]
Definition: feature_tests.c:2
inT16 PROTO_ID
Definition: matchdefs.h:41
int RoundUp(int n, int block_size)
Definition: helpers.h:116
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures)
#define PrintProtoMatchesOn(D)
Definition: intproto.h:202