tesseract  4.00.00dev
tesseract::Classify Class Reference

#include <classify.h>

Inheritance diagram for tesseract::Classify:
tesseract::CCStruct tesseract::CUtil tesseract::CCUtil tesseract::Wordrec tesseract::Tesseract

Public Member Functions

 Classify ()
 
virtual ~Classify ()
 
DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
ADAPT_TEMPLATES NewAdaptedTemplates (bool InitFromUnicharset)
 
int GetFontinfoId (ADAPT_CLASS Class, uinT8 ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (TFile *fp, CLASS_CUTOFF_ARRAY Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
ADAPT_TEMPLATES ReadAdaptedTemplates (TFile *File)
 
FLOAT32 ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (TFile *fp)
 
void ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class)
 
INT_TEMPLATES CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (TessdataManager *mgr)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (FLOAT32 Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
STRING ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uinT8 *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES ReadIntTemplates (TFile *fp)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const STRING &filename)
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()
 
 ~CCStruct ()
 
- Public Member Functions inherited from tesseract::CUtil
 CUtil ()
 
 ~CUtil ()
 
void read_variables (const char *filename, bool global_only)
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const char *argv0, const char *basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 

Static Public Member Functions

static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
 

Public Attributes

bool allow_blob_division = true
 
bool prioritize_division = FALSE
 
int tessedit_single_match = FALSE
 
bool classify_enable_learning = true
 
int classify_debug_level = 0
 
int classify_norm_method = character
 
double classify_char_norm_range = 0.2
 
double classify_min_norm_scale_x = 0.0
 
double classify_max_norm_scale_x = 0.325
 
double classify_min_norm_scale_y = 0.0
 
double classify_max_norm_scale_y = 0.325
 
double classify_max_rating_ratio = 1.5
 
double classify_max_certainty_margin = 5.5
 
bool tess_cn_matching = 0
 
bool tess_bn_matching = 0
 
bool classify_enable_adaptive_matcher = 1
 
bool classify_use_pre_adapted_templates = 0
 
bool classify_save_adapted_templates = 0
 
bool classify_enable_adaptive_debugger = 0
 
bool classify_nonlinear_norm = 0
 
int matcher_debug_level = 0
 
int matcher_debug_flags = 0
 
int classify_learning_debug_level = 0
 
double matcher_good_threshold = 0.125
 
double matcher_reliable_adaptive_result = 0.0
 
double matcher_perfect_threshold = 0.02
 
double matcher_bad_match_pad = 0.15
 
double matcher_rating_margin = 0.1
 
double matcher_avg_noise_size = 12.0
 
int matcher_permanent_classes_min = 1
 
int matcher_min_examples_for_prototyping = 3
 
int matcher_sufficient_examples_for_prototyping = 5
 
double matcher_clustering_max_angle_delta = 0.015
 
double classify_misfit_junk_penalty = 0.0
 
double rating_scale = 1.5
 
double certainty_scale = 20.0
 
double tessedit_class_miss_scale = 0.00390625
 
double classify_adapted_pruning_factor = 2.5
 
double classify_adapted_pruning_threshold = -1.0
 
int classify_adapt_proto_threshold = 230
 
int classify_adapt_feature_threshold = 230
 
bool disable_character_fragments = TRUE
 
double classify_character_fragments_garbage_certainty_threshold = -3.0
 
bool classify_debug_character_fragments = FALSE
 
bool matcher_debug_separate_windows = FALSE
 
char * classify_learn_debug_str = ""
 
int classify_class_pruner_threshold = 229
 
int classify_class_pruner_multiplier = 15
 
int classify_cp_cutoff_strength = 7
 
int classify_integer_matcher_multiplier = 10
 
INT_TEMPLATES PreTrainedTemplates
 
ADAPT_TEMPLATES AdaptedTemplates
 
ADAPT_TEMPLATES BackupAdaptedTemplates
 
BIT_VECTOR AllProtosOn
 
BIT_VECTOR AllConfigsOn
 
BIT_VECTOR AllConfigsOff
 
BIT_VECTOR TempProtoMask
 
bool EnableLearning
 
NORM_PROTOSNormProtos
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
int il1_adaption_test = 0
 
bool classify_bln_numeric_mode = 0
 
double speckle_large_max_size = 0.30
 
double speckle_rating_penalty = 10.0
 
- Public Attributes inherited from tesseract::CCUtil
STRING datadir
 
STRING imagebasename
 
STRING lang
 
STRING language_data_path_prefix
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
STRING imagefile
 
STRING directory
 
char * m_data_sub_dir = "tessdata/"
 
int ambigs_debug_level = 0
 
bool use_definite_ambigs_for_classifier = 0
 
bool use_ambigs_for_adaption = 0
 

Protected Attributes

IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_
 

Additional Inherited Members

- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 

Detailed Description

Definition at line 61 of file classify.h.

Constructor & Destructor Documentation

◆ Classify()

tesseract::Classify::Classify ( )

Definition at line 35 of file classify.cpp.

36  : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping",
37  this->params()),
39  "Prioritize blob division over chopping", this->params()),
40  INT_MEMBER(tessedit_single_match, FALSE, "Top choice only from CP",
41  this->params()),
42  BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier",
43  this->params()),
44  INT_MEMBER(classify_debug_level, 0, "Classify debug level",
45  this->params()),
46  INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
47  this->params()),
49  "Character Normalization Range ...", this->params()),
50  double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...",
51  this->params()), /* PREV DEFAULT 0.1 */
53  "Max char x-norm scale ...",
54  this->params()), /* PREV DEFAULT 0.3 */
55  double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...",
56  this->params()), /* PREV DEFAULT 0.1 */
58  "Max char y-norm scale ...",
59  this->params()), /* PREV DEFAULT 0.3 */
61  "Veto ratio between classifier ratings", this->params()),
63  "Veto difference between classifier certainties",
64  this->params()),
65  BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
66  this->params()),
67  BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
68  this->params()),
70  "Enable adaptive classifier", this->params()),
72  "Use pre-adapted classifier templates", this->params()),
74  "Save adapted templates to a file", this->params()),
75  BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
76  this->params()),
78  "Non-linear stroke-density normalization", this->params()),
79  INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
80  INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
81  INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
82  this->params()),
83  double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
84  this->params()),
85  double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)",
86  this->params()),
87  double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
88  this->params()),
89  double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
90  this->params()),
91  double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
92  this->params()),
93  double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
94  this->params()),
95  INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
96  this->params()),
98  "Reliable Config Threshold", this->params()),
100  "Enable adaption even if the ambiguities have not been seen",
101  this->params()),
103  "Maximum angle delta for prototype clustering",
104  this->params()),
106  "Penalty to apply when a non-alnum is vertically out of "
107  "its expected textline position",
108  this->params()),
109  double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
110  double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
111  this->params()),
113  "Scale factor for features not used", this->params()),
116  "Prune poor adapted results this much worse than best result",
117  this->params()),
119  "Threshold at which classify_adapted_pruning_factor starts",
120  this->params()),
122  "Threshold for good protos during adaptive 0-255",
123  this->params()),
125  "Threshold for good features during adaptive 0-255",
126  this->params()),
128  "Do not include character fragments in the"
129  " results of the classifier",
130  this->params()),
132  -3.0,
133  "Exclude fragments that do not look like whole"
134  " characters from training and adaption",
135  this->params()),
137  "Bring up graphical debugging windows for fragments training",
138  this->params()),
140  "Use two different windows for debugging the matching: "
141  "One for the protos and one for the features.",
142  this->params()),
143  STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
144  this->params()),
146  "Class Pruner Threshold 0-255", this->params()),
148  "Class Pruner Multiplier 0-255: ", this->params()),
150  "Class Pruner CutoffStrength: ", this->params()),
152  "Integer Matcher Multiplier 0-255: ", this->params()),
153  EnableLearning(true),
155  "Don't adapt to i/I at beginning of word", this->params()),
157  "Assume the input is numbers [0-9].", this->params()),
158  double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
159  this->params()),
161  "Penalty to add to worst rating for noise", this->params()),
162  shape_table_(NULL),
163  dict_(this),
164  static_classifier_(NULL) {
165  fontinfo_table_.set_compare_callback(
167  fontinfo_table_.set_clear_callback(
169  fontset_table_.set_compare_callback(
171  fontset_table_.set_clear_callback(
173  AdaptedTemplates = NULL;
174  BackupAdaptedTemplates = NULL;
175  PreTrainedTemplates = NULL;
176  AllProtosOn = NULL;
177  AllConfigsOn = NULL;
178  AllConfigsOff = NULL;
179  TempProtoMask = NULL;
180  NormProtos = NULL;
181 
182  NumAdaptationsFailed = 0;
183 
184  learn_debug_win_ = NULL;
185  learn_fragmented_word_debug_win_ = NULL;
186  learn_fragments_debug_win_ = NULL;
187 }
BIT_VECTOR TempProtoMask
Definition: classify.h:482
bool classify_enable_adaptive_matcher
Definition: classify.h:408
double matcher_good_threshold
Definition: classify.h:419
double matcher_avg_noise_size
Definition: classify.h:424
bool classify_nonlinear_norm
Definition: classify.h:415
bool matcher_debug_separate_windows
Definition: classify.h:457
#define TRUE
Definition: capi.h:45
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool classify_bln_numeric_mode
Definition: classify.h:499
ParamsVectors * params()
Definition: ccutil.h:62
UnicityTable< FontSet > fontset_table_
Definition: classify.h:495
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:309
bool classify_save_adapted_templates
Definition: classify.h:412
bool allow_blob_division
Definition: classify.h:381
int classify_cp_cutoff_strength
Definition: classify.h:466
double tessedit_class_miss_scale
Definition: classify.h:438
int classify_adapt_feature_threshold
Definition: classify.h:446
int classify_integer_matcher_multiplier
Definition: classify.h:468
BIT_VECTOR AllProtosOn
Definition: classify.h:479
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:120
char * classify_learn_debug_str
Definition: classify.h:458
double classify_max_norm_scale_y
Definition: classify.h:399
bool classify_debug_character_fragments
Definition: classify.h:454
bool disable_character_fragments
Definition: classify.h:449
double matcher_clustering_max_angle_delta
Definition: classify.h:431
bool classify_enable_adaptive_debugger
Definition: classify.h:413
double classify_char_norm_range
Definition: classify.h:395
double matcher_reliable_adaptive_result
Definition: classify.h:420
ShapeTable * shape_table_
Definition: classify.h:511
int classify_learning_debug_level
Definition: classify.h:418
int classify_class_pruner_multiplier
Definition: classify.h:464
double classify_min_norm_scale_x
Definition: classify.h:396
bool classify_use_pre_adapted_templates
Definition: classify.h:410
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:476
double speckle_rating_penalty
Definition: classify.h:502
double certainty_scale
Definition: classify.h:436
BIT_VECTOR AllConfigsOff
Definition: classify.h:481
double matcher_perfect_threshold
Definition: classify.h:421
#define FALSE
Definition: capi.h:46
double matcher_bad_match_pad
Definition: classify.h:422
int classify_adapt_proto_threshold
Definition: classify.h:444
double classify_min_norm_scale_y
Definition: classify.h:398
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472
double classify_adapted_pruning_threshold
Definition: classify.h:442
double classify_max_norm_scale_x
Definition: classify.h:397
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:306
int classify_class_pruner_threshold
Definition: classify.h:462
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:452
int matcher_permanent_classes_min
Definition: classify.h:425
double classify_misfit_junk_penalty
Definition: classify.h:434
double classify_adapted_pruning_factor
Definition: classify.h:440
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:429
BIT_VECTOR AllConfigsOn
Definition: classify.h:480
double classify_max_rating_ratio
Definition: classify.h:401
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:128
int matcher_min_examples_for_prototyping
Definition: classify.h:427
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:139
NORM_PROTOS * NormProtos
Definition: classify.h:485
double matcher_rating_margin
Definition: classify.h:423
double classify_max_certainty_margin
Definition: classify.h:403
bool classify_enable_learning
Definition: classify.h:388
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:146
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:487
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:303
double speckle_large_max_size
Definition: classify.h:500
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:300
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:468
bool prioritize_division
Definition: classify.h:386

◆ ~Classify()

tesseract::Classify::~Classify ( )
virtual

Definition at line 189 of file classify.cpp.

189  {
191  delete learn_debug_win_;
192  delete learn_fragmented_word_debug_win_;
193  delete learn_fragments_debug_win_;
194 }
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:456

Member Function Documentation

◆ AdaptableWord()

bool tesseract::Classify::AdaptableWord ( WERD_RES word)

Return TRUE if the specified word is acceptable for adaptation.

Globals: none

Parameters
wordcurrent word
Returns
TRUE or FALSE
Note
Exceptions: none
History: Thu May 30 14:25:06 1991, DSJ, Created.

Definition at line 836 of file adaptmatch.cpp.

836  {
837  if (word->best_choice == NULL) return false;
838  int BestChoiceLength = word->best_choice->length();
839  float adaptable_score =
841  return // rules that apply in general - simplest to compute first
842  BestChoiceLength > 0 &&
843  BestChoiceLength == word->rebuild_word->NumBlobs() &&
844  BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
845  // This basically ensures that the word is at least a dictionary match
846  // (freq word, user word, system dawg word, etc).
847  // Since all the other adjustments will make adjust factor higher
848  // than higher than adaptable_score=1.1+0.05=1.15
849  // Since these are other flags that ensure that the word is dict word,
850  // this check could be at times redundant.
851  word->best_choice->adjust_factor() <= adaptable_score &&
852  // Make sure that alternative choices are not dictionary words.
853  word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score);
854 }
#define ADAPTABLE_WERD_ADJUSTMENT
Definition: adaptmatch.cpp:73
WERD_CHOICE * best_choice
Definition: pageres.h:219
Dict & getDict()
Definition: classify.h:65
int length() const
Definition: ratngs.h:301
double segment_penalty_dict_case_ok
Definition: dict.h:584
TWERD * rebuild_word
Definition: pageres.h:244
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
Definition: pageres.cpp:431
int NumBlobs() const
Definition: blobs.h:425
float adjust_factor() const
Definition: ratngs.h:304
#define MAX_ADAPTABLE_WERD_SIZE
Definition: adaptmatch.cpp:71

◆ AdaptiveClassifier()

void tesseract::Classify::AdaptiveClassifier ( TBLOB Blob,
BLOB_CHOICE_LIST *  Choices 
)

This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.

It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.

Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.

This routine also performs some simple speckle filtering.

Note
Exceptions: none
History: Mon Mar 11 10:00:58 1991, DSJ, Created.
Parameters
Blobblob to be classified
[out]ChoicesList of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher.

Definition at line 185 of file adaptmatch.cpp.

185  {
186  assert(Choices != NULL);
187  ADAPT_RESULTS *Results = new ADAPT_RESULTS;
188  Results->Initialize();
189 
190  ASSERT_HOST(AdaptedTemplates != NULL);
191 
192  DoAdaptiveMatch(Blob, Results);
193 
194  RemoveBadMatches(Results);
196  RemoveExtraPuncs(Results);
197  Results->ComputeBest();
198  ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
199  Choices);
200 
201  // TODO(rays) Move to before ConvertMatchesToChoices!
202  if (LargeSpeckle(*Blob) || Choices->length() == 0)
203  AddLargeSpeckleTo(Results->BlobLength, Choices);
204 
205  if (matcher_debug_level >= 1) {
206  tprintf("AD Matches = ");
207  PrintAdaptiveMatchResults(*Results);
208  }
209 
210 #ifndef GRAPHICS_DISABLED
212  DebugAdaptiveClassifier(Blob, Results);
213 #endif
214 
215  delete Results;
216 } /* AdaptiveClassifier */
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:230
void RemoveBadMatches(ADAPT_RESULTS *Results)
inT32 BlobLength
Definition: adaptmatch.cpp:83
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
#define tprintf(...)
Definition: tprintf.h:31
void ComputeBest()
Definition: adaptmatch.cpp:99
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
void Initialize()
Definition: adaptmatch.cpp:93
bool classify_enable_adaptive_debugger
Definition: classify.h:413
#define ASSERT_HOST(x)
Definition: errcode.h:84
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
const DENORM & denorm() const
Definition: blobs.h:340
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:207
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
TBOX bounding_box() const
Definition: blobs.cpp:482

◆ AdaptiveClassifierIsEmpty()

bool tesseract::Classify::AdaptiveClassifierIsEmpty ( ) const
inline

Definition at line 284 of file classify.h.

284  {
285  return AdaptedTemplates->NumPermClasses == 0;
286  }
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472

◆ AdaptiveClassifierIsFull()

bool tesseract::Classify::AdaptiveClassifierIsFull ( ) const
inline

Definition at line 283 of file classify.h.

283 { return NumAdaptationsFailed > 0; }

◆ AdaptToChar()

void tesseract::Classify::AdaptToChar ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
FLOAT32  Threshold,
ADAPT_TEMPLATES  adaptive_templates 
)
Parameters
Blobblob to add to templates for ClassId
ClassIdclass to add blob to
FontinfoIdfont information from pre-trained templates
Thresholdminimum match rating to existing template
adaptive_templatescurrent set of adapted templates

Globals:

  • AllProtosOn dummy mask to match against all protos
  • AllConfigsOn dummy mask to match against all configs
Returns
none
Note
Exceptions: none
History: Thu Mar 14 09:36:03 1991, DSJ, Created.

Definition at line 872 of file adaptmatch.cpp.

874  {
875  int NumFeatures;
876  INT_FEATURE_ARRAY IntFeatures;
877  UnicharRating int_result;
878  INT_CLASS IClass;
879  ADAPT_CLASS Class;
880  TEMP_CONFIG TempConfig;
881  FEATURE_SET FloatFeatures;
882  int NewTempConfigId;
883 
884  if (!LegalClassId (ClassId))
885  return;
886 
887  int_result.unichar_id = ClassId;
888  Class = adaptive_templates->Class[ClassId];
889  assert(Class != NULL);
890  if (IsEmptyAdaptedClass(Class)) {
891  InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
892  } else {
893  IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
894 
895  NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
896  if (NumFeatures <= 0) {
897  return; // Features already freed by GetAdaptiveFeatures.
898  }
899 
900  // Only match configs with the matching font.
901  BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
902  for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
903  if (GetFontinfoId(Class, cfg) == FontinfoId) {
904  SET_BIT(MatchingFontConfigs, cfg);
905  } else {
906  reset_bit(MatchingFontConfigs, cfg);
907  }
908  }
909  im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
910  NumFeatures, IntFeatures,
913  FreeBitVector(MatchingFontConfigs);
914 
915  SetAdaptiveThreshold(Threshold);
916 
917  if (1.0f - int_result.rating <= Threshold) {
918  if (ConfigIsPermanent(Class, int_result.config)) {
920  tprintf("Found good match to perm config %d = %4.1f%%.\n",
921  int_result.config, int_result.rating * 100.0);
922  FreeFeatureSet(FloatFeatures);
923  return;
924  }
925 
926  TempConfig = TempConfigFor(Class, int_result.config);
927  IncreaseConfidence(TempConfig);
928  if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
929  Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
930  }
932  tprintf("Increasing reliability of temp config %d to %d.\n",
933  int_result.config, TempConfig->NumTimesSeen);
934 
935  if (TempConfigReliable(ClassId, TempConfig)) {
936  MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
937  UpdateAmbigsGroup(ClassId, Blob);
938  }
939  } else {
941  tprintf("Found poor match to temp config %d = %4.1f%%.\n",
942  int_result.config, int_result.rating * 100.0);
944  DisplayAdaptedChar(Blob, IClass);
945  }
946  NewTempConfigId =
947  MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId,
948  NumFeatures, IntFeatures, FloatFeatures);
949  if (NewTempConfigId >= 0 &&
950  TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
951  MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
952  UpdateAmbigsGroup(ClassId, Blob);
953  }
954 
955 #ifndef GRAPHICS_DISABLED
957  DisplayAdaptedChar(Blob, IClass);
958  }
959 #endif
960  }
961  FreeFeatureSet(FloatFeatures);
962  }
963 } /* AdaptToChar */
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
Definition: adaptmatch.cpp:703
bool matcher_debug_separate_windows
Definition: classify.h:457
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:54
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:71
#define MAX_NUM_PROTOS
Definition: intproto.h:47
uinT8 NumConfigs
Definition: intproto.h:110
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:965
int classify_adapt_feature_threshold
Definition: classify.h:446
BIT_VECTOR AllProtosOn
Definition: classify.h:479
#define tprintf(...)
Definition: tprintf.h:31
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
#define LegalClassId(c)
Definition: intproto.h:179
#define IncreaseConfidence(TempConfig)
Definition: adaptive.h:107
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:89
uinT8 MaxNumTimesSeen
Definition: adaptive.h:65
int classify_learning_debug_level
Definition: classify.h:418
INT_TEMPLATES Templates
Definition: adaptive.h:76
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:798
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
#define ClassForClassId(T, c)
Definition: intproto.h:181
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:101
IntegerMatcher im_
Definition: classify.h:502
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:188
uinT8 NumTimesSeen
Definition: adaptive.h:41
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
#define NO_DEBUG
Definition: adaptmatch.cpp:70
void SetAdaptiveThreshold(FLOAT32 Threshold)
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:89
#define SET_BIT(array, bit)
Definition: bitvec.h:57
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:92
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
#define reset_bit(array, bit)
Definition: bitvec.h:59
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475

◆ AddLargeSpeckleTo()

void tesseract::Classify::AddLargeSpeckleTo ( int  blob_length,
BLOB_CHOICE_LIST *  choices 
)

Definition at line 207 of file classify.cpp.

207  {
208  BLOB_CHOICE_IT bc_it(choices);
209  // If there is no classifier result, we will use the worst possible certainty
210  // and corresponding rating.
211  float certainty = -getDict().certainty_scale;
212  float rating = rating_scale * blob_length;
213  if (!choices->empty() && blob_length > 0) {
214  bc_it.move_to_last();
215  BLOB_CHOICE* worst_choice = bc_it.data();
216  // Add speckle_rating_penalty to worst rating, matching old value.
217  rating = worst_choice->rating() + speckle_rating_penalty;
218  // Compute the rating to correspond to the certainty. (Used to be kept
219  // the same, but that messes up the language model search.)
220  certainty = -rating * getDict().certainty_scale /
221  (rating_scale * blob_length);
222  }
223  BLOB_CHOICE* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
224  -1, 0.0f, MAX_FLOAT32, 0,
226  bc_it.add_to_end(blob_choice);
227 }
double certainty_scale
Definition: dict.h:611
Dict & getDict()
Definition: classify.h:65
float rating() const
Definition: ratngs.h:79
double speckle_rating_penalty
Definition: classify.h:502
#define MAX_FLOAT32
Definition: host.h:66

◆ AddNewResult()

void tesseract::Classify::AddNewResult ( const UnicharRating new_result,
ADAPT_RESULTS results 
)

This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.

Globals:

Parameters
new_resultnew result to add
[out]resultsresults to add new result to
Note
Exceptions: none
History: Tue Mar 12 18:19:29 1991, DSJ, Created.

Definition at line 1016 of file adaptmatch.cpp.

1017  {
1018  int old_match = FindScoredUnichar(new_result.unichar_id, *results);
1019 
1020  if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
1021  (old_match < results->match.size() &&
1022  new_result.rating <= results->match[old_match].rating))
1023  return; // New one not good enough.
1024 
1025  if (!unicharset.get_fragment(new_result.unichar_id))
1026  results->HasNonfragment = true;
1027 
1028  if (old_match < results->match.size()) {
1029  results->match[old_match].rating = new_result.rating;
1030  } else {
1031  results->match.push_back(new_result);
1032  }
1033 
1034  if (new_result.rating > results->best_rating &&
1035  // Ensure that fragments do not affect best rating, class and config.
1036  // This is needed so that at least one non-fragmented character is
1037  // always present in the results.
1038  // TODO(daria): verify that this helps accuracy and does not
1039  // hurt performance.
1040  !unicharset.get_fragment(new_result.unichar_id)) {
1041  results->best_match_index = old_match;
1042  results->best_rating = new_result.rating;
1043  results->best_unichar_id = new_result.unichar_id;
1044  }
1045 } /* AddNewResult */
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:694
int best_match_index
Definition: adaptmatch.cpp:86
bool HasNonfragment
Definition: adaptmatch.cpp:84
int push_back(T object)
UNICHARSET unicharset
Definition: ccutil.h:68
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
FLOAT32 best_rating
Definition: adaptmatch.cpp:87

◆ AmbigClassifier()

void tesseract::Classify::AmbigClassifier ( const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
const TBLOB blob,
INT_TEMPLATES  templates,
ADAPT_CLASS classes,
UNICHAR_ID ambiguities,
ADAPT_RESULTS results 
)

This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.

Globals:

Parameters
blobblob to be classified
templatesbuilt-in templates to classify against
classesadapted class templates
ambiguitiesarray of unichar id's to match against
[out]resultsplace to put match results
int_features
fx_info
Note
Exceptions: none
History: Tue Mar 12 19:40:36 1991, DSJ, Created.

Definition at line 1070 of file adaptmatch.cpp.

1077  {
1078  if (int_features.empty()) return;
1079  uinT8* CharNormArray = new uinT8[unicharset.size()];
1080  UnicharRating int_result;
1081 
1082  results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
1083  CharNormArray);
1084  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1085  if (debug)
1086  tprintf("AM Matches = ");
1087 
1088  int top = blob->bounding_box().top();
1089  int bottom = blob->bounding_box().bottom();
1090  while (*ambiguities >= 0) {
1091  CLASS_ID class_id = *ambiguities;
1092 
1093  int_result.unichar_id = class_id;
1094  im_.Match(ClassForClassId(templates, class_id),
1096  int_features.size(), &int_features[0],
1097  &int_result,
1100 
1101  ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
1102  results->BlobLength,
1104  CharNormArray, &int_result, results);
1105  ambiguities++;
1106  }
1107  delete [] CharNormArray;
1108 } /* AmbigClassifier */
bool matcher_debug_separate_windows
Definition: classify.h:457
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
inT32 BlobLength
Definition: adaptmatch.cpp:83
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
int classify_adapt_feature_threshold
Definition: classify.h:446
int classify_integer_matcher_multiplier
Definition: classify.h:468
BIT_VECTOR AllProtosOn
Definition: classify.h:479
#define tprintf(...)
Definition: tprintf.h:31
bool empty() const
Definition: genericvector.h:90
int size() const
Definition: genericvector.h:72
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
#define ClassForClassId(T, c)
Definition: intproto.h:181
IntegerMatcher im_
Definition: classify.h:502
UNICHARSET unicharset
Definition: ccutil.h:68
inT16 top() const
Definition: rect.h:54
#define NO_DEBUG
Definition: adaptmatch.cpp:70
BIT_VECTOR AllConfigsOn
Definition: classify.h:480
uint8_t uinT8
Definition: host.h:35
int size() const
Definition: unicharset.h:299
inT16 bottom() const
Definition: rect.h:61
TBOX bounding_box() const
Definition: blobs.cpp:482
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475

◆ BaselineClassifier()

UNICHAR_ID * tesseract::Classify::BaselineClassifier ( TBLOB Blob,
const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
ADAPT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Globals:

  • BaselineCutoffs expected num features for each class
Parameters
Blobblob to be classified
Templatescurrent set of adapted templates
Resultsplace to put match results
int_features
fx_info
Returns
Array of possible ambiguous chars that should be checked.
Note
Exceptions: none
History: Tue Mar 12 19:38:03 1991, DSJ, Created.

Definition at line 1292 of file adaptmatch.cpp.

1295  {
1296  if (int_features.empty()) return NULL;
1297  uinT8* CharNormArray = new uinT8[unicharset.size()];
1298  ClearCharNormArray(CharNormArray);
1299 
1301  PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
1302  CharNormArray, BaselineCutoffs, &Results->CPResults);
1303 
1304  if (matcher_debug_level >= 2 || classify_debug_level > 1)
1305  tprintf("BL Matches = ");
1306 
1307  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
1308  CharNormArray,
1309  Templates->Class, matcher_debug_flags, 0,
1310  Blob->bounding_box(), Results->CPResults, Results);
1311 
1312  delete [] CharNormArray;
1313  CLASS_ID ClassId = Results->best_unichar_id;
1314  if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0)
1315  return NULL;
1316 
1317  return Templates->Class[ClassId]->
1318  Config[Results->match[Results->best_match_index].config].Perm->Ambigs;
1319 } /* BaselineClassifier */
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:89
int best_match_index
Definition: adaptmatch.cpp:86
inT32 BlobLength
Definition: adaptmatch.cpp:83
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
const double kStandardFeatureLength
Definition: intfx.h:46
#define tprintf(...)
Definition: tprintf.h:31
bool empty() const
Definition: genericvector.h:90
int IntCastRounded(double x)
Definition: helpers.h:179
void ClearCharNormArray(uinT8 *char_norm_array)
Definition: float2int.cpp:48
CLUSTERCONFIG Config
int size() const
Definition: genericvector.h:72
INT_TEMPLATES Templates
Definition: adaptive.h:76
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
UNICHARSET unicharset
Definition: ccutil.h:68
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
uint8_t uinT8
Definition: host.h:35
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:412
int size() const
Definition: unicharset.h:299
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
TBOX bounding_box() const
Definition: blobs.cpp:482

◆ CharNormClassifier()

int tesseract::Classify::CharNormClassifier ( TBLOB blob,
const TrainingSample sample,
ADAPT_RESULTS adapt_results 
)

This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Parameters
blobblob to be classified
sampletemplates to classify unknown against
adapt_resultsplace to put match results

Globals:

  • CharNormCutoffs expected num features for each class
  • AllProtosOn mask that enables all protos
  • AllConfigsOn mask that enables all configs
Note
Exceptions: none
History: Tue Mar 12 16:02:52 1991, DSJ, Created.

Definition at line 1341 of file adaptmatch.cpp.

1343  {
1344  // This is the length that is used for scaling ratings vs certainty.
1345  adapt_results->BlobLength =
1346  IntCastRounded(sample.outline_length() / kStandardFeatureLength);
1347  GenericVector<UnicharRating> unichar_results;
1348  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
1349  -1, &unichar_results);
1350  // Convert results to the format used internally by AdaptiveClassifier.
1351  for (int r = 0; r < unichar_results.size(); ++r) {
1352  AddNewResult(unichar_results[r], adapt_results);
1353  }
1354  return sample.num_features();
1355 } /* CharNormClassifier */
Pix * pix() const
Definition: normalis.h:248
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
inT32 BlobLength
Definition: adaptmatch.cpp:83
const double kStandardFeatureLength
Definition: intfx.h:46
int IntCastRounded(double x)
Definition: helpers.h:179
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
const DENORM & denorm() const
Definition: blobs.h:340
Definition: cluster.h:32

◆ CharNormTrainingSample()

int tesseract::Classify::CharNormTrainingSample ( bool  pruner_only,
int  keep_this,
const TrainingSample sample,
GenericVector< UnicharRating > *  results 
)

Definition at line 1359 of file adaptmatch.cpp.

1362  {
1363  results->clear();
1364  ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
1365  adapt_results->Initialize();
1366  // Compute the bounding box of the features.
1367  int num_features = sample.num_features();
1368  // Only the top and bottom of the blob_box are used by MasterMatcher, so
1369  // fabricate right and left using top and bottom.
1370  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
1371  sample.geo_feature(GeoTop), sample.geo_feature(GeoTop));
1372  // Compute the char_norm_array from the saved cn_feature.
1373  FEATURE norm_feature = sample.GetCNFeature();
1374  uinT8* char_norm_array = new uinT8[unicharset.size()];
1375  int num_pruner_classes = MAX(unicharset.size(),
1377  uinT8* pruner_norm_array = new uinT8[num_pruner_classes];
1378  adapt_results->BlobLength =
1379  static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
1380  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
1381  pruner_norm_array);
1382 
1383  PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
1384  pruner_norm_array,
1385  shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1386  &adapt_results->CPResults);
1387  delete [] pruner_norm_array;
1388  if (keep_this >= 0) {
1389  adapt_results->CPResults[0].Class = keep_this;
1390  adapt_results->CPResults.truncate(1);
1391  }
1392  if (pruner_only) {
1393  // Convert pruner results to output format.
1394  for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
1395  int class_id = adapt_results->CPResults[i].Class;
1396  results->push_back(
1397  UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
1398  }
1399  } else {
1400  MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
1401  char_norm_array,
1402  NULL, matcher_debug_flags,
1404  blob_box, adapt_results->CPResults, adapt_results);
1405  // Convert master matcher results to output format.
1406  for (int i = 0; i < adapt_results->match.size(); i++) {
1407  results->push_back(adapt_results->match[i]);
1408  }
1410  }
1411  delete [] char_norm_array;
1412  delete adapt_results;
1413  return num_features;
1414 } /* CharNormTrainingSample */
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:89
inT32 BlobLength
Definition: adaptmatch.cpp:83
int push_back(T object)
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
int classify_integer_matcher_multiplier
Definition: classify.h:468
FLOAT32 ActualOutlineLength(FEATURE Feature)
Definition: normfeat.cpp:32
void truncate(int size)
void Initialize()
Definition: adaptmatch.cpp:93
int size() const
Definition: genericvector.h:72
ShapeTable * shape_table_
Definition: classify.h:511
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
UNICHARSET unicharset
Definition: ccutil.h:68
#define MAX(x, y)
Definition: ndminx.h:24
Definition: rect.h:30
uint8_t uinT8
Definition: host.h:35
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:412
int size() const
Definition: unicharset.h:299
Definition: cluster.h:32
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:468

◆ ClassAndConfigIDToFontOrShapeID()

int tesseract::Classify::ClassAndConfigIDToFontOrShapeID ( int  class_id,
int  int_result_config 
) const

Definition at line 2269 of file adaptmatch.cpp.

2270  {
2271  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2272  // Older inttemps have no font_ids.
2273  if (font_set_id < 0)
2274  return kBlankFontinfoId;
2275  const FontSet &fs = fontset_table_.get(font_set_id);
2276  ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
2277  return fs.configs[int_result_config];
2278 }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:495
#define ASSERT_HOST(x)
Definition: errcode.h:84
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:468

◆ ClassIDToDebugStr()

STRING tesseract::Classify::ClassIDToDebugStr ( const INT_TEMPLATES_STRUCT templates,
int  class_id,
int  config_id 
) const

Definition at line 2256 of file adaptmatch.cpp.

2257  {
2258  STRING class_string;
2259  if (templates == PreTrainedTemplates && shape_table_ != NULL) {
2260  int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2261  class_string = shape_table_->DebugStr(shape_id);
2262  } else {
2263  class_string = unicharset.debug_str(class_id);
2264  }
2265  return class_string;
2266 }
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:287
ShapeTable * shape_table_
Definition: classify.h:511
Definition: strngs.h:45
UNICHARSET unicharset
Definition: ccutil.h:68
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:468

◆ ClassifyAsNoise()

void tesseract::Classify::ClassifyAsNoise ( ADAPT_RESULTS results)

This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.

Parameters
resultsresults to add noise classification to

Globals:

  • matcher_avg_noise_size avg. length of a noise blob
Note
Exceptions: none
History: Tue Mar 12 18:36:52 1991, DSJ, Created.

Definition at line 1432 of file adaptmatch.cpp.

1432  {
1433  float rating = results->BlobLength / matcher_avg_noise_size;
1434  rating *= rating;
1435  rating /= 1.0 + rating;
1436 
1437  AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
1438 } /* ClassifyAsNoise */
double matcher_avg_noise_size
Definition: classify.h:424
inT32 BlobLength
Definition: adaptmatch.cpp:83
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)

◆ ClearCharNormArray()

void tesseract::Classify::ClearCharNormArray ( uinT8 char_norm_array)

For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.

Globals:

  • none
Parameters
char_norm_arrayarray to be cleared
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 48 of file float2int.cpp.

48  {
49  memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size());
50 } /* ClearCharNormArray */
UNICHARSET unicharset
Definition: ccutil.h:68
int size() const
Definition: unicharset.h:299

◆ ComputeCharNormArrays()

void tesseract::Classify::ComputeCharNormArrays ( FEATURE_STRUCT norm_feature,
INT_TEMPLATES_STRUCT templates,
uinT8 char_norm_array,
uinT8 pruner_array 
)

Definition at line 1734 of file adaptmatch.cpp.

1737  {
1738  ComputeIntCharNormArray(*norm_feature, char_norm_array);
1739  if (pruner_array != NULL) {
1740  if (shape_table_ == NULL) {
1741  ComputeIntCharNormArray(*norm_feature, pruner_array);
1742  } else {
1743  memset(pruner_array, MAX_UINT8,
1744  templates->NumClasses * sizeof(pruner_array[0]));
1745  // Each entry in the pruner norm array is the MIN of all the entries of
1746  // the corresponding unichars in the CharNormArray.
1747  for (int id = 0; id < templates->NumClasses; ++id) {
1748  int font_set_id = templates->Class[id]->font_set_id;
1749  const FontSet &fs = fontset_table_.get(font_set_id);
1750  for (int config = 0; config < fs.size; ++config) {
1751  const Shape& shape = shape_table_->GetShape(fs.configs[config]);
1752  for (int c = 0; c < shape.size(); ++c) {
1753  if (char_norm_array[shape[c].unichar_id] < pruner_array[id])
1754  pruner_array[id] = char_norm_array[shape[c].unichar_id];
1755  }
1756  }
1757  }
1758  }
1759  }
1760  FreeFeature(norm_feature);
1761 }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:495
#define MAX_UINT8
Definition: host.h:63
ShapeTable * shape_table_
Definition: classify.h:511
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
Definition: float2int.cpp:69
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:320
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
void FreeFeature(FEATURE Feature)
Definition: ocrfeatures.cpp:59

◆ ComputeCorrectedRating()

double tesseract::Classify::ComputeCorrectedRating ( bool  debug,
int  unichar_id,
double  cp_rating,
double  im_rating,
int  feature_misses,
int  bottom,
int  top,
int  blob_length,
int  matcher_multiplier,
const uinT8 cn_factors 
)

Definition at line 1227 of file adaptmatch.cpp.

1232  {
1233  // Compute class feature corrections.
1234  double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length,
1235  cn_factors[unichar_id],
1236  matcher_multiplier);
1237  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1238  double vertical_penalty = 0.0;
1239  // Penalize non-alnums for being vertical misfits.
1240  if (!unicharset.get_isalpha(unichar_id) &&
1241  !unicharset.get_isdigit(unichar_id) &&
1242  cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1243  int min_bottom, max_bottom, min_top, max_top;
1244  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
1245  &min_top, &max_top);
1246  if (debug) {
1247  tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1248  top, min_top, max_top, bottom, min_bottom, max_bottom);
1249  }
1250  if (top < min_top || top > max_top ||
1251  bottom < min_bottom || bottom > max_bottom) {
1252  vertical_penalty = classify_misfit_junk_penalty;
1253  }
1254  }
1255  double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1256  if (result < WORST_POSSIBLE_RATING)
1257  result = WORST_POSSIBLE_RATING;
1258  if (debug) {
1259  tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1260  unicharset.id_to_unichar(unichar_id),
1261  result * 100.0,
1262  cp_rating * 100.0,
1263  (1.0 - im_rating) * 100.0,
1264  (cn_corrected - (1.0 - im_rating)) * 100.0,
1265  cn_factors[unichar_id],
1266  miss_penalty * 100.0,
1267  vertical_penalty * 100.0);
1268  }
1269  return result;
1270 }
double tessedit_class_miss_scale
Definition: classify.h:438
#define WORST_POSSIBLE_RATING
Definition: adaptmatch.cpp:77
#define tprintf(...)
Definition: tprintf.h:31
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:451
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:472
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:528
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
IntegerMatcher im_
Definition: classify.h:502
UNICHARSET unicharset
Definition: ccutil.h:68
double classify_misfit_junk_penalty
Definition: classify.h:434

◆ ComputeIntCharNormArray()

void tesseract::Classify::ComputeIntCharNormArray ( const FEATURE_STRUCT norm_feature,
uinT8 char_norm_array 
)

For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.

Globals:

  • PreTrainedTemplates current set of built-in templates
Parameters
norm_featurecharacter normalization feature
[out]char_norm_arrayplace to put results of size unicharset.size()
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 69 of file float2int.cpp.

70  {
71  for (int i = 0; i < unicharset.size(); i++) {
72  if (i < PreTrainedTemplates->NumClasses) {
73  int norm_adjust = static_cast<int>(INT_CHAR_NORM_RANGE *
74  ComputeNormMatch(i, norm_feature, FALSE));
75  char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
76  } else {
77  // Classes with no templates (eg. ambigs & ligatures) default
78  // to worst match.
79  char_norm_array[i] = MAX_INT_CHAR_NORM;
80  }
81  }
82 } /* ComputeIntCharNormArray */
#define MAX_INT_CHAR_NORM
Definition: float2int.cpp:28
#define FALSE
Definition: capi.h:46
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:122
#define INT_CHAR_NORM_RANGE
Definition: intproto.h:133
FLOAT32 ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
Definition: normmatch.cpp:88
UNICHARSET unicharset
Definition: ccutil.h:68
int size() const
Definition: unicharset.h:299

◆ ComputeIntFeatures()

void tesseract::Classify::ComputeIntFeatures ( FEATURE_SET  Features,
INT_FEATURE_ARRAY  IntFeatures 
)

This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.

Globals:

  • none
Parameters
Featuresfloating point pico-features to be converted
[out]IntFeaturesarray to put converted features into
Note
Exceptions: none
History: Wed Feb 20 10:58:45 1991, DSJ, Created.

Definition at line 100 of file float2int.cpp.

101  {
102  int Fid;
103  FEATURE Feature;
104  FLOAT32 YShift;
105 
107  YShift = BASELINE_Y_SHIFT;
108  else
109  YShift = Y_SHIFT;
110 
111  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
112  Feature = Features->Features[Fid];
113 
114  IntFeatures[Fid].X =
116  IntFeatures[Fid].Y =
117  Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE);
118  IntFeatures[Fid].Theta = CircBucketFor(Feature->Params[PicoFeatDir],
120  IntFeatures[Fid].CP_misses = 0;
121  }
122 } /* ComputeIntFeatures */
uinT8 Bucket8For(FLOAT32 param, FLOAT32 offset, int num_buckets)
Definition: intproto.cpp:435
FEATURE Features[1]
Definition: ocrfeatures.h:72
#define Y_SHIFT
Definition: intproto.h:41
#define BASELINE_Y_SHIFT
Definition: float2int.h:28
#define X_SHIFT
Definition: intproto.h:40
#define ANGLE_SHIFT
Definition: intproto.h:39
uinT8 CircBucketFor(FLOAT32 param, FLOAT32 offset, int num_buckets)
Definition: intproto.cpp:449
float FLOAT32
Definition: host.h:42
#define INT_FEAT_RANGE
Definition: float2int.h:27
FLOAT32 Params[1]
Definition: ocrfeatures.h:65

◆ ComputeNormMatch()

FLOAT32 tesseract::Classify::ComputeNormMatch ( CLASS_ID  ClassId,
const FEATURE_STRUCT feature,
BOOL8  DebugMatch 
)

This routine compares Features against each character normalization proto for ClassId and returns the match rating of the best match.

Parameters
ClassIdid of class to match against
featurecharacter normalization feature
DebugMatchcontrols dump of debug info

Globals: NormProtos character normalization prototypes

Returns
Best match rating for Feature against protos of ClassId.
Note
Exceptions: none
History: Wed Dec 19 16:56:12 1990, DSJ, Created.

Definition at line 88 of file normmatch.cpp.

90  {
91  LIST Protos;
92  FLOAT32 BestMatch;
93  FLOAT32 Match;
94  FLOAT32 Delta;
95  PROTOTYPE *Proto;
96  int ProtoId;
97 
98  if (ClassId >= NormProtos->NumProtos) {
99  ClassId = NO_CLASS;
100  }
101 
102  /* handle requests for classification as noise */
103  if (ClassId == NO_CLASS) {
104  /* kludge - clean up constants and make into control knobs later */
105  Match = (feature.Params[CharNormLength] *
106  feature.Params[CharNormLength] * 500.0 +
107  feature.Params[CharNormRx] *
108  feature.Params[CharNormRx] * 8000.0 +
109  feature.Params[CharNormRy] *
110  feature.Params[CharNormRy] * 8000.0);
111  return (1.0 - NormEvidenceOf (Match));
112  }
113 
114  BestMatch = MAX_FLOAT32;
115  Protos = NormProtos->Protos[ClassId];
116 
117  if (DebugMatch) {
118  tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId));
119  }
120 
121  ProtoId = 0;
122  iterate(Protos) {
123  Proto = (PROTOTYPE *) first_node (Protos);
124  Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY];
125  Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY];
126  if (DebugMatch) {
127  tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
128  Proto->Mean[CharNormY], Delta,
129  Proto->Weight.Elliptical[CharNormY], Match);
130  }
131  Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx];
132  Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx];
133  if (DebugMatch) {
134  tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
135  Proto->Mean[CharNormRx], Delta,
136  Proto->Weight.Elliptical[CharNormRx], Match);
137  }
138  // Ry is width! See intfx.cpp.
139  Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy];
140  if (DebugMatch) {
141  tprintf("Width: Proto=%g, Delta=%g, Var=%g\n",
142  Proto->Mean[CharNormRy], Delta,
143  Proto->Weight.Elliptical[CharNormRy]);
144  }
145  Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy];
146  Delta *= kWidthErrorWeighting;
147  Match += Delta;
148  if (DebugMatch) {
149  tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n",
150  Match, Match / classify_norm_adj_midpoint,
151  NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match)));
152  }
153 
154  if (Match < BestMatch)
155  BestMatch = Match;
156 
157  ProtoId++;
158  }
159  return 1.0 - NormEvidenceOf(BestMatch);
160 } /* ComputeNormMatch */
FLOAT32 * Elliptical
Definition: cluster.h:64
#define tprintf(...)
Definition: tprintf.h:31
FLOATUNION Weight
Definition: cluster.h:83
double NormEvidenceOf(register double NormAdj)
Definition: normmatch.cpp:184
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define NO_CLASS
Definition: matchdefs.h:36
const double kWidthErrorWeighting
Definition: normmatch.cpp:66
LIST * Protos
Definition: normmatch.cpp:42
#define MAX_FLOAT32
Definition: host.h:66
FLOAT32 * Mean
Definition: cluster.h:78
UNICHARSET unicharset
Definition: ccutil.h:68
#define first_node(l)
Definition: oldlist.h:139
double classify_norm_adj_midpoint
Definition: normmatch.cpp:63
float FLOAT32
Definition: host.h:42
NORM_PROTOS * NormProtos
Definition: classify.h:485
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define iterate(l)
Definition: oldlist.h:159

◆ ConvertMatchesToChoices()

void tesseract::Classify::ConvertMatchesToChoices ( const DENORM denorm,
const TBOX box,
ADAPT_RESULTS Results,
BLOB_CHOICE_LIST *  Choices 
)

The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.

Definition at line 1446 of file adaptmatch.cpp.

1448  {
1449  assert(Choices != NULL);
1450  FLOAT32 Rating;
1451  FLOAT32 Certainty;
1452  BLOB_CHOICE_IT temp_it;
1453  bool contains_nonfrag = false;
1454  temp_it.set_to_list(Choices);
1455  int choices_length = 0;
1456  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
1457  // number of returned results, but with a shape_table_ we want to have room
1458  // for at least the biggest shape (which might contain hundreds of Indic
1459  // grapheme fragments) and more, so use double the size of the biggest shape
1460  // if that is more than the default.
1461  int max_matches = MAX_MATCHES;
1462  if (shape_table_ != NULL) {
1463  max_matches = shape_table_->MaxNumUnichars() * 2;
1464  if (max_matches < MAX_MATCHES)
1465  max_matches = MAX_MATCHES;
1466  }
1467 
1468  float best_certainty = -MAX_FLOAT32;
1469  for (int i = 0; i < Results->match.size(); i++) {
1470  const UnicharRating& result = Results->match[i];
1471  bool adapted = result.adapted;
1472  bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != NULL);
1473  if (temp_it.length()+1 == max_matches &&
1474  !contains_nonfrag && current_is_frag) {
1475  continue; // look for a non-fragmented character to fill the
1476  // last spot in Choices if only fragments are present
1477  }
1478  // BlobLength can never be legally 0, this means recognition failed.
1479  // But we must return a classification result because some invoking
1480  // functions (chopper/permuter) do not anticipate a null blob choice.
1481  // So we need to assign a poor, but not infinitely bad score.
1482  if (Results->BlobLength == 0) {
1483  Certainty = -20;
1484  Rating = 100; // should be -certainty * real_blob_length
1485  } else {
1486  Rating = Certainty = (1.0f - result.rating);
1487  Rating *= rating_scale * Results->BlobLength;
1488  Certainty *= -(getDict().certainty_scale);
1489  }
1490  // Adapted results, by their very nature, should have good certainty.
1491  // Those that don't are at best misleading, and often lead to errors,
1492  // so don't accept adapted results that are too far behind the best result,
1493  // whether adapted or static.
1494  // TODO(rays) find some way of automatically tuning these constants.
1495  if (Certainty > best_certainty) {
1496  best_certainty = MIN(Certainty, classify_adapted_pruning_threshold);
1497  } else if (adapted &&
1498  Certainty / classify_adapted_pruning_factor < best_certainty) {
1499  continue; // Don't accept bad adapted results.
1500  }
1501 
1502  float min_xheight, max_xheight, yshift;
1503  denorm.XHeightRange(result.unichar_id, unicharset, box,
1504  &min_xheight, &max_xheight, &yshift);
1505  BLOB_CHOICE* choice =
1506  new BLOB_CHOICE(result.unichar_id, Rating, Certainty,
1508  min_xheight, max_xheight, yshift,
1509  adapted ? BCC_ADAPTED_CLASSIFIER
1511  choice->set_fonts(result.fonts);
1512  temp_it.add_to_end(choice);
1513  contains_nonfrag |= !current_is_frag; // update contains_nonfrag
1514  choices_length++;
1515  if (choices_length >= max_matches) break;
1516  }
1517  Results->match.truncate(choices_length);
1518 } // ConvertMatchesToChoices
double certainty_scale
Definition: dict.h:611
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:694
Dict & getDict()
Definition: classify.h:65
inT32 BlobLength
Definition: adaptmatch.cpp:83
int MaxNumUnichars() const
Definition: shapetable.cpp:461
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
Definition: normalis.cpp:428
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
void truncate(int size)
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:94
int size() const
Definition: genericvector.h:72
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:623
ShapeTable * shape_table_
Definition: classify.h:511
#define MAX_FLOAT32
Definition: host.h:66
UNICHARSET unicharset
Definition: ccutil.h:68
double classify_adapted_pruning_threshold
Definition: classify.h:442
float FLOAT32
Definition: host.h:42
#define MIN(x, y)
Definition: ndminx.h:28
double classify_adapted_pruning_factor
Definition: classify.h:440
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define MAX_MATCHES
Definition: adaptmatch.cpp:68

◆ ConvertProto()

void tesseract::Classify::ConvertProto ( PROTO  Proto,
int  ProtoId,
INT_CLASS  Class 
)

This routine converts Proto to integer format and installs it as ProtoId in Class.

Parameters
Protofloating-pt proto to be converted to integer format
ProtoIdid of proto
Classinteger class to add converted proto to
Returns
none
Note
Globals: none
Exceptions: none
History: Fri Feb 8 11:22:43 1991, DSJ, Created.

Definition at line 516 of file intproto.cpp.

516  {
517  INT_PROTO P;
518  FLOAT32 Param;
519 
520  assert(ProtoId < Class->NumProtos);
521 
522  P = ProtoForProtoId(Class, ProtoId);
523 
524  Param = Proto->A * 128;
525  P->A = TruncateParam(Param, -128, 127, NULL);
526 
527  Param = -Proto->B * 256;
528  P->B = TruncateParam(Param, 0, 255, NULL);
529 
530  Param = Proto->C * 128;
531  P->C = TruncateParam(Param, -128, 127, NULL);
532 
533  Param = Proto->Angle * 256;
534  if (Param < 0 || Param >= 256)
535  P->Angle = 0;
536  else
537  P->Angle = (uinT8) Param;
538 
539  /* round proto length to nearest integer number of pico-features */
540  Param = (Proto->Length / GetPicoFeatureLength()) + 0.5;
541  Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255, NULL);
543  cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)",
544  P->A, P->B, P->C, Class->ProtoLengths[ProtoId]);
545 } /* ConvertProto */
FLOAT32 A
Definition: protos.h:44
uinT8 * ProtoLengths
Definition: intproto.h:112
int classify_learning_debug_level
Definition: classify.h:418
#define ProtoForProtoId(C, P)
Definition: intproto.h:171
FLOAT32 Length
Definition: protos.h:50
int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id)
Definition: intproto.cpp:1796
float FLOAT32
Definition: host.h:42
FLOAT32 Angle
Definition: protos.h:49
uint8_t uinT8
Definition: host.h:35
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
FLOAT32 C
Definition: protos.h:46
FLOAT32 B
Definition: protos.h:45
#define GetPicoFeatureLength()
Definition: picofeat.h:59

◆ CreateIntTemplates()

INT_TEMPLATES tesseract::Classify::CreateIntTemplates ( CLASSES  FloatProtos,
const UNICHARSET target_unicharset 
)

This routine converts from the old floating point format to the new integer format.

Parameters
FloatProtosprototypes in old floating pt format
target_unicharsetthe UNICHARSET to use
Returns
New set of training templates in integer format.
Note
Globals: none
Exceptions: none
History: Thu Feb 7 14:40:42 1991, DSJ, Created.

Definition at line 557 of file intproto.cpp.

559  {
560  INT_TEMPLATES IntTemplates;
561  CLASS_TYPE FClass;
562  INT_CLASS IClass;
563  int ClassId;
564  int ProtoId;
565  int ConfigId;
566 
567  IntTemplates = NewIntTemplates();
568 
569  for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
570  FClass = &(FloatProtos[ClassId]);
571  if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 &&
572  strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) {
573  cprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n",
574  target_unicharset.id_to_unichar(ClassId));
575  }
576  assert(UnusedClassIdIn(IntTemplates, ClassId));
577  IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs);
578  FontSet fs;
579  fs.size = FClass->font_set.size();
580  fs.configs = new int[fs.size];
581  for (int i = 0; i < fs.size; ++i) {
582  fs.configs[i] = FClass->font_set.get(i);
583  }
584  if (this->fontset_table_.contains(fs)) {
585  IClass->font_set_id = this->fontset_table_.get_id(fs);
586  delete[] fs.configs;
587  } else {
588  IClass->font_set_id = this->fontset_table_.push_back(fs);
589  }
590  AddIntClass(IntTemplates, ClassId, IClass);
591 
592  for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) {
593  AddIntProto(IClass);
594  ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass);
595  AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass,
597  AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates);
598  }
599 
600  for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) {
601  AddIntConfig(IClass);
602  ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass);
603  }
604  }
605  return (IntTemplates);
606 } /* CreateIntTemplates */
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
Definition: intproto.cpp:342
UnicityTable< FontSet > fontset_table_
Definition: classify.h:495
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:516
int size() const
Return the size used.
inT16 NumConfigs
Definition: protos.h:62
inT16 NumProtos
Definition: protos.h:59
CONFIGS Configurations
Definition: protos.h:64
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
int classify_learning_debug_level
Definition: classify.h:418
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:270
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:723
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:293
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:238
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:384
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:487
UnicityTableEqEq< int > font_set
Definition: protos.h:65
int size() const
Definition: unicharset.h:299
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
const T & get(int id) const
Return the object from an id.
#define ProtoIn(Class, Pid)
Definition: protos.h:123
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:664

◆ DebugAdaptiveClassifier()

void tesseract::Classify::DebugAdaptiveClassifier ( TBLOB blob,
ADAPT_RESULTS Results 
)
Parameters
blobblob whose classification is being debugged
Resultsresults of match being debugged

Globals: none

Note
Exceptions: none
History: Wed Mar 13 16:44:41 1991, DSJ, Created.

Definition at line 1533 of file adaptmatch.cpp.

1534  {
1535  if (static_classifier_ == NULL) return;
1536  INT_FX_RESULT_STRUCT fx_info;
1538  TrainingSample* sample =
1539  BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
1540  if (sample == NULL) return;
1541  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
1542  Results->best_unichar_id);
1543 } /* DebugAdaptiveClassifier */
Pix * pix() const
Definition: normalis.h:248
const DENORM & denorm() const
Definition: blobs.h:340
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
Definition: cluster.h:32
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)

◆ DisplayAdaptedChar()

void tesseract::Classify::DisplayAdaptedChar ( TBLOB blob,
INT_CLASS_STRUCT int_class 
)

Definition at line 965 of file adaptmatch.cpp.

965  {
966 #ifndef GRAPHICS_DISABLED
967  INT_FX_RESULT_STRUCT fx_info;
969  TrainingSample* sample =
971  &bl_features);
972  if (sample == NULL) return;
973 
974  UnicharRating int_result;
975  im_.Match(int_class, AllProtosOn, AllConfigsOn,
976  bl_features.size(), &bl_features[0],
979  tprintf("Best match to temp config %d = %4.1f%%.\n",
980  int_result.config, int_result.rating * 100.0);
982  uinT32 ConfigMask;
983  ConfigMask = 1 << int_result.config;
985  im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
986  bl_features.size(), &bl_features[0],
990  }
991 
992  delete sample;
993 #endif
994 }
bool classify_nonlinear_norm
Definition: classify.h:415
bool matcher_debug_separate_windows
Definition: classify.h:457
int classify_adapt_feature_threshold
Definition: classify.h:446
BIT_VECTOR AllProtosOn
Definition: classify.h:479
#define tprintf(...)
Definition: tprintf.h:31
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
int size() const
Definition: genericvector.h:72
int classify_learning_debug_level
Definition: classify.h:418
uint32_t uinT32
Definition: host.h:39
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
IntegerMatcher im_
Definition: classify.h:502
#define NO_DEBUG
Definition: adaptmatch.cpp:70
BIT_VECTOR AllConfigsOn
Definition: classify.h:480
Definition: cluster.h:32
void UpdateMatchDisplay()
Definition: intproto.cpp:467
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475

◆ DoAdaptiveMatch()

void tesseract::Classify::DoAdaptiveMatch ( TBLOB Blob,
ADAPT_RESULTS Results 
)

This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.

Parameters
Blobblob to be classified
Resultsplace to put match results

Globals:

  • PreTrainedTemplates built-in training templates
  • AdaptedTemplates templates adapted for this page
  • matcher_reliable_adaptive_result rating limit for a great match
Note
Exceptions: none
History: Tue Mar 12 08:50:11 1991, DSJ, Created.

Definition at line 1569 of file adaptmatch.cpp.

1569  {
1570  UNICHAR_ID *Ambiguities;
1571 
1572  INT_FX_RESULT_STRUCT fx_info;
1574  TrainingSample* sample =
1576  &bl_features);
1577  if (sample == NULL) return;
1578 
1580  tess_cn_matching) {
1581  CharNormClassifier(Blob, *sample, Results);
1582  } else {
1583  Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
1584  AdaptedTemplates, Results);
1585  if ((!Results->match.empty() &&
1586  MarginalMatch(Results->best_rating,
1588  !tess_bn_matching) ||
1589  Results->match.empty()) {
1590  CharNormClassifier(Blob, *sample, Results);
1591  } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1592  AmbigClassifier(bl_features, fx_info, Blob,
1595  Ambiguities,
1596  Results);
1597  }
1598  }
1599 
1600  // Force the blob to be classified as noise
1601  // if the results contain only fragments.
1602  // TODO(daria): verify that this is better than
1603  // just adding a NULL classification.
1604  if (!Results->HasNonfragment || Results->match.empty())
1605  ClassifyAsNoise(Results);
1606  delete sample;
1607 } /* DoAdaptiveMatch */
bool classify_nonlinear_norm
Definition: classify.h:415
int UNICHAR_ID
Definition: unichar.h:33
bool HasNonfragment
Definition: adaptmatch.cpp:84
bool empty() const
Definition: genericvector.h:90
double matcher_reliable_adaptive_result
Definition: classify.h:420
bool MarginalMatch(float confidence, float matcher_great_threshold)
Definition: adaptmatch.cpp:122
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
int matcher_permanent_classes_min
Definition: classify.h:425
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
Definition: cluster.h:32
void ClassifyAsNoise(ADAPT_RESULTS *Results)
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:468
FLOAT32 best_rating
Definition: adaptmatch.cpp:87

◆ EndAdaptiveClassifier()

void tesseract::Classify::EndAdaptiveClassifier ( )

This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.

Globals:

Note
Exceptions: none
History: Tue Mar 19 14:37:06 1991, DSJ, Created.

Definition at line 456 of file adaptmatch.cpp.

456  {
457  STRING Filename;
458  FILE *File;
459 
460  if (AdaptedTemplates != NULL &&
462  Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
463  File = fopen (Filename.string(), "wb");
464  if (File == NULL)
465  cprintf ("Unable to save adapted templates to %s!\n", Filename.string());
466  else {
467  cprintf ("\nSaving adapted templates to %s ...", Filename.string());
468  fflush(stdout);
470  cprintf ("\n");
471  fclose(File);
472  }
473  }
474 
475  if (AdaptedTemplates != NULL) {
477  AdaptedTemplates = NULL;
478  }
479  if (BackupAdaptedTemplates != NULL) {
481  BackupAdaptedTemplates = NULL;
482  }
483 
484  if (PreTrainedTemplates != NULL) {
486  PreTrainedTemplates = NULL;
487  }
489  FreeNormProtos();
490  if (AllProtosOn != NULL) {
495  AllProtosOn = NULL;
496  AllConfigsOn = NULL;
497  AllConfigsOff = NULL;
498  TempProtoMask = NULL;
499  }
500  delete shape_table_;
501  shape_table_ = NULL;
502  if (static_classifier_ != NULL) {
503  delete static_classifier_;
504  static_classifier_ = NULL;
505  }
506 } /* EndAdaptiveClassifier */
BIT_VECTOR TempProtoMask
Definition: classify.h:482
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:66
bool classify_enable_adaptive_matcher
Definition: classify.h:408
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:54
Dict & getDict()
Definition: classify.h:65
bool classify_save_adapted_templates
Definition: classify.h:412
BIT_VECTOR AllProtosOn
Definition: classify.h:479
const char * string() const
Definition: strngs.cpp:198
ShapeTable * shape_table_
Definition: classify.h:511
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:476
BIT_VECTOR AllConfigsOff
Definition: classify.h:481
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:197
Definition: strngs.h:45
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:489
STRING imagefile
Definition: ccutil.h:70
BIT_VECTOR AllConfigsOn
Definition: classify.h:480
void EndDangerousAmbigs()
Definition: stopper.cpp:367
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:739
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:468

◆ ExpandShapesAndApplyCorrections()

void tesseract::Classify::ExpandShapesAndApplyCorrections ( ADAPT_CLASS classes,
bool  debug,
int  class_id,
int  bottom,
int  top,
float  cp_rating,
int  blob_length,
int  matcher_multiplier,
const uinT8 cn_factors,
UnicharRating int_result,
ADAPT_RESULTS final_results 
)

Definition at line 1153 of file adaptmatch.cpp.

1157  {
1158  if (classes != NULL) {
1159  // Adapted result. Convert configs to fontinfo_ids.
1160  int_result->adapted = true;
1161  for (int f = 0; f < int_result->fonts.size(); ++f) {
1162  int_result->fonts[f].fontinfo_id =
1163  GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id);
1164  }
1165  } else {
1166  // Pre-trained result. Map fonts using font_sets_.
1167  int_result->adapted = false;
1168  for (int f = 0; f < int_result->fonts.size(); ++f) {
1169  int_result->fonts[f].fontinfo_id =
1171  int_result->fonts[f].fontinfo_id);
1172  }
1173  if (shape_table_ != NULL) {
1174  // Two possible cases:
1175  // 1. Flat shapetable. All unichar-ids of the shapes referenced by
1176  // int_result->fonts are the same. In this case build a new vector of
1177  // mapped fonts and replace the fonts in int_result.
1178  // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
1179  // by int_result. In this case, build a vector of UnicharRating to
1180  // gather together different font-ids for each unichar. Also covers case1.
1181  GenericVector<UnicharRating> mapped_results;
1182  for (int f = 0; f < int_result->fonts.size(); ++f) {
1183  int shape_id = int_result->fonts[f].fontinfo_id;
1184  const Shape& shape = shape_table_->GetShape(shape_id);
1185  for (int c = 0; c < shape.size(); ++c) {
1186  int unichar_id = shape[c].unichar_id;
1187  if (!unicharset.get_enabled(unichar_id)) continue;
1188  // Find the mapped_result for unichar_id.
1189  int r = 0;
1190  for (r = 0; r < mapped_results.size() &&
1191  mapped_results[r].unichar_id != unichar_id; ++r) {}
1192  if (r == mapped_results.size()) {
1193  mapped_results.push_back(*int_result);
1194  mapped_results[r].unichar_id = unichar_id;
1195  mapped_results[r].fonts.truncate(0);
1196  }
1197  for (int i = 0; i < shape[c].font_ids.size(); ++i) {
1198  mapped_results[r].fonts.push_back(
1199  ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score));
1200  }
1201  }
1202  }
1203  for (int m = 0; m < mapped_results.size(); ++m) {
1204  mapped_results[m].rating =
1205  ComputeCorrectedRating(debug, mapped_results[m].unichar_id,
1206  cp_rating, int_result->rating,
1207  int_result->feature_misses, bottom, top,
1208  blob_length, matcher_multiplier, cn_factors);
1209  AddNewResult(mapped_results[m], final_results);
1210  }
1211  return;
1212  }
1213  }
1214  if (unicharset.get_enabled(class_id)) {
1215  int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating,
1216  int_result->rating,
1217  int_result->feature_misses,
1218  bottom, top, blob_length,
1219  matcher_multiplier, cn_factors);
1220  AddNewResult(*int_result, final_results);
1221  }
1222 }
int push_back(T object)
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
void truncate(int size)
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
int size() const
Definition: genericvector.h:72
ShapeTable * shape_table_
Definition: classify.h:511
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:838
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:320
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:188
UNICHARSET unicharset
Definition: ccutil.h:68
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const

◆ ExtractFeatures()

void tesseract::Classify::ExtractFeatures ( const TBLOB blob,
bool  nonlinear_norm,
GenericVector< INT_FEATURE_STRUCT > *  bl_features,
GenericVector< INT_FEATURE_STRUCT > *  cn_features,
INT_FX_RESULT_STRUCT results,
GenericVector< int > *  outline_cn_counts 
)
static

Definition at line 445 of file intfx.cpp.

450  {
451  DENORM bl_denorm, cn_denorm;
452  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm,
453  &bl_denorm, &cn_denorm, results);
454  if (outline_cn_counts != NULL)
455  outline_cn_counts->truncate(0);
456  // Iterate the outlines.
457  for (TESSLINE* ol = blob.outlines; ol != NULL; ol = ol->next) {
458  // Iterate the polygon.
459  EDGEPT* loop_pt = ol->FindBestStartPt();
460  EDGEPT* pt = loop_pt;
461  if (pt == NULL) continue;
462  do {
463  if (pt->IsHidden()) continue;
464  // Find a run of equal src_outline.
465  EDGEPT* last_pt = pt;
466  do {
467  last_pt = last_pt->next;
468  } while (last_pt != loop_pt && !last_pt->IsHidden() &&
469  last_pt->src_outline == pt->src_outline);
470  last_pt = last_pt->prev;
471  // Until the adaptive classifier can be weaned off polygon segments,
472  // we have to force extraction from the polygon for the bl_features.
473  ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength,
474  true, bl_features);
475  ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength,
476  false, cn_features);
477  pt = last_pt;
478  } while ((pt = pt->next) != loop_pt);
479  if (outline_cn_counts != NULL)
480  outline_cn_counts->push_back(cn_features->size());
481  }
482  results->NumBL = bl_features->size();
483  results->NumCN = cn_features->size();
484  results->YBottom = blob.bounding_box().bottom();
485  results->YTop = blob.bounding_box().top();
486  results->Width = blob.bounding_box().width();
487 }
TESSLINE * next
Definition: blobs.h:258
C_OUTLINE * src_outline
Definition: blobs.h:171
EDGEPT * prev
Definition: blobs.h:170
TESSLINE * outlines
Definition: blobs.h:377
const double kStandardFeatureLength
Definition: intfx.h:46
int push_back(T object)
void truncate(int size)
int size() const
Definition: genericvector.h:72
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
bool IsHidden() const
Definition: blobs.h:153
EDGEPT * next
Definition: blobs.h:169
Definition: blobs.h:76
inT16 top() const
Definition: rect.h:54
inT16 width() const
Definition: rect.h:111
inT16 bottom() const
Definition: rect.h:61
TBOX bounding_box() const
Definition: blobs.cpp:482

◆ ExtractIntCNFeatures()

FEATURE_SET tesseract::Classify::ExtractIntCNFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Integer character-normalized features for blob.
Note
Exceptions: none
History: 8/8/2011, rays, Created.

Definition at line 230 of file picofeat.cpp.

231  {
232  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
235  blob, false, &local_fx_info, &bl_features);
236  if (sample == NULL) return NULL;
237 
238  int num_features = sample->num_features();
239  const INT_FEATURE_STRUCT* features = sample->features();
240  FEATURE_SET feature_set = NewFeatureSet(num_features);
241  for (int f = 0; f < num_features; ++f) {
242  FEATURE feature = NewFeature(&IntFeatDesc);
243 
244  feature->Params[IntX] = features[f].X;
245  feature->Params[IntY] = features[f].Y;
246  feature->Params[IntDir] = features[f].Theta;
247  AddFeature(feature_set, feature);
248  }
249  delete sample;
250 
251  return feature_set;
252 } /* ExtractIntCNFeatures */
FEATURE_SET NewFeatureSet(int NumFeatures)
Definition: picofeat.h:30
const FEATURE_DESC_STRUCT IntFeatDesc
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:88
Definition: picofeat.h:29
const INT_FEATURE_STRUCT * features() const
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
Definition: cluster.h:32
const char features[]
Definition: feature_tests.c:2
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:43

◆ ExtractIntGeoFeatures()

FEATURE_SET tesseract::Classify::ExtractIntGeoFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Geometric (top/bottom/width) features for blob.
Note
Exceptions: none
History: 8/8/2011, rays, Created.

Definition at line 262 of file picofeat.cpp.

263  {
264  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
267  blob, false, &local_fx_info, &bl_features);
268  if (sample == NULL) return NULL;
269 
270  FEATURE_SET feature_set = NewFeatureSet(1);
271  FEATURE feature = NewFeature(&IntFeatDesc);
272 
273  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
274  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
275  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
276  AddFeature(feature_set, feature);
277  delete sample;
278 
279  return feature_set;
280 } /* ExtractIntGeoFeatures */
FEATURE_SET NewFeatureSet(int NumFeatures)
const FEATURE_DESC_STRUCT IntFeatDesc
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:88
int geo_feature(int index) const
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
Definition: cluster.h:32
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:43

◆ ExtractOutlineFeatures()

FEATURE_SET tesseract::Classify::ExtractOutlineFeatures ( TBLOB Blob)

Convert each segment in the outline to a feature and return the features.

Parameters
Blobblob to extract pico-features from
Returns
Outline-features for Blob.
Note
Globals: none
Exceptions: none
History:
  • 11/13/90, DSJ, Created.
  • 05/24/91, DSJ, Updated for either char or baseline normalize.

Definition at line 47 of file outfeat.cpp.

47  {
48  LIST Outlines;
49  LIST RemainingOutlines;
50  MFOUTLINE Outline;
51  FEATURE_SET FeatureSet;
52  FLOAT32 XScale, YScale;
53 
54  FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES);
55  if (Blob == NULL)
56  return (FeatureSet);
57 
58  Outlines = ConvertBlob (Blob);
59 
60  NormalizeOutlines(Outlines, &XScale, &YScale);
61  RemainingOutlines = Outlines;
62  iterate(RemainingOutlines) {
63  Outline = (MFOUTLINE) first_node (RemainingOutlines);
64  ConvertToOutlineFeatures(Outline, FeatureSet);
65  }
67  NormalizeOutlineX(FeatureSet);
68  FreeOutlines(Outlines);
69  return (FeatureSet);
70 } /* ExtractOutlineFeatures */
FEATURE_SET NewFeatureSet(int NumFeatures)
#define MAX_OUTLINE_FEATURES
Definition: outfeat.h:35
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:40
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:301
LIST MFOUTLINE
Definition: mfoutline.h:33
#define first_node(l)
Definition: oldlist.h:139
float FLOAT32
Definition: host.h:42
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: outfeat.cpp:122
#define iterate(l)
Definition: oldlist.h:159
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:179
void NormalizeOutlineX(FEATURE_SET FeatureSet)
Definition: outfeat.cpp:163

◆ ExtractPicoFeatures()

FEATURE_SET tesseract::Classify::ExtractPicoFeatures ( TBLOB Blob)

Operation: Dummy for now.

Globals:

  • classify_norm_method normalization method currently specified
    Parameters
    Blobblob to extract pico-features from
    Returns
    Pico-features for Blob.
    Note
    Exceptions: none
    History: 9/4/90, DSJ, Created.

Definition at line 67 of file picofeat.cpp.

67  {
68  LIST Outlines;
69  LIST RemainingOutlines;
70  MFOUTLINE Outline;
71  FEATURE_SET FeatureSet;
72  FLOAT32 XScale, YScale;
73 
74  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
75  Outlines = ConvertBlob(Blob);
76  NormalizeOutlines(Outlines, &XScale, &YScale);
77  RemainingOutlines = Outlines;
78  iterate(RemainingOutlines) {
79  Outline = (MFOUTLINE) first_node (RemainingOutlines);
80  ConvertToPicoFeatures2(Outline, FeatureSet);
81  }
83  NormalizePicoX(FeatureSet);
84  FreeOutlines(Outlines);
85  return (FeatureSet);
86 
87 } /* ExtractPicoFeatures */
FEATURE_SET NewFeatureSet(int NumFeatures)
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:40
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:301
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:163
#define MAX_PICO_FEATURES
Definition: picofeat.h:47
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:204
LIST MFOUTLINE
Definition: mfoutline.h:33
#define first_node(l)
Definition: oldlist.h:139
float FLOAT32
Definition: host.h:42
#define iterate(l)
Definition: oldlist.h:159
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:179

◆ FreeNormProtos()

void tesseract::Classify::FreeNormProtos ( )

Definition at line 162 of file normmatch.cpp.

162  {
163  if (NormProtos != NULL) {
164  for (int i = 0; i < NormProtos->NumProtos; i++)
168  Efree(NormProtos);
169  NormProtos = NULL;
170  }
171 }
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:573
LIST * Protos
Definition: normmatch.cpp:42
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
NORM_PROTOS * NormProtos
Definition: classify.h:485
void Efree(void *ptr)
Definition: emalloc.cpp:79

◆ get_fontinfo_table() [1/2]

UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( )
inline

Definition at line 344 of file classify.h.

344  {
345  return fontinfo_table_;
346  }
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:487

◆ get_fontinfo_table() [2/2]

const UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( ) const
inline

Definition at line 347 of file classify.h.

347  {
348  return fontinfo_table_;
349  }
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:487

◆ get_fontset_table()

UnicityTable<FontSet>& tesseract::Classify::get_fontset_table ( )
inline

Definition at line 350 of file classify.h.

350  {
351  return fontset_table_;
352  }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:495

◆ GetAdaptiveFeatures()

int tesseract::Classify::GetAdaptiveFeatures ( TBLOB Blob,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

This routine sets up the feature extractor to extract baseline normalized pico-features.

The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.

Globals: none

Parameters
Blobblob to extract features from
[out]IntFeaturesarray to fill with integer features
[out]FloatFeaturesplace to return actual floating-pt features
Returns
Number of pico-features returned (0 if an error occurred)
Note
Exceptions: none
History: Tue Mar 12 17:55:18 1991, DSJ, Created.

Definition at line 798 of file adaptmatch.cpp.

800  {
801  FEATURE_SET Features;
802  int NumFeatures;
803 
804  classify_norm_method.set_value(baseline);
805  Features = ExtractPicoFeatures(Blob);
806 
807  NumFeatures = Features->NumFeatures;
808  if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) {
809  FreeFeatureSet(Features);
810  return 0;
811  }
812 
813  ComputeIntFeatures(Features, IntFeatures);
814  *FloatFeatures = Features;
815 
816  return NumFeatures;
817 } /* GetAdaptiveFeatures */
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:71
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:69
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:100
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:67

◆ GetAmbiguities()

UNICHAR_ID * tesseract::Classify::GetAmbiguities ( TBLOB Blob,
CLASS_ID  CorrectClass 
)

This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.

Parameters
Blobblob to get classification ambiguities for
CorrectClasscorrect class for Blob

Globals:

  • CurrentRatings used by qsort compare routine
  • PreTrainedTemplates built-in templates
Returns
String containing all possible ambiguous classes.
Note
Exceptions: none
History: Fri Mar 15 08:08:22 1991, DSJ, Created.

Definition at line 1626 of file adaptmatch.cpp.

1627  {
1628  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
1629  UNICHAR_ID *Ambiguities;
1630  int i;
1631 
1632  Results->Initialize();
1633  INT_FX_RESULT_STRUCT fx_info;
1635  TrainingSample* sample =
1637  &bl_features);
1638  if (sample == NULL) {
1639  delete Results;
1640  return NULL;
1641  }
1642 
1643  CharNormClassifier(Blob, *sample, Results);
1644  delete sample;
1645  RemoveBadMatches(Results);
1647 
1648  /* copy the class id's into an string of ambiguities - don't copy if
1649  the correct class is the only class id matched */
1650  Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
1651  if (Results->match.size() > 1 ||
1652  (Results->match.size() == 1 &&
1653  Results->match[0].unichar_id != CorrectClass)) {
1654  for (i = 0; i < Results->match.size(); i++)
1655  Ambiguities[i] = Results->match[i].unichar_id;
1656  Ambiguities[i] = -1;
1657  } else {
1658  Ambiguities[0] = -1;
1659  }
1660 
1661  delete Results;
1662  return Ambiguities;
1663 } /* GetAmbiguities */
void RemoveBadMatches(ADAPT_RESULTS *Results)
bool classify_nonlinear_norm
Definition: classify.h:415
int UNICHAR_ID
Definition: unichar.h:33
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
void Initialize()
Definition: adaptmatch.cpp:93
int size() const
Definition: genericvector.h:72
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
Definition: cluster.h:32

◆ GetCharNormFeature()

int tesseract::Classify::GetCharNormFeature ( const INT_FX_RESULT_STRUCT fx_info,
INT_TEMPLATES  templates,
uinT8 pruner_norm_array,
uinT8 char_norm_array 
)

This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.

The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the char norm features into the IntFeatures array provided by the caller.

Parameters
templatesused to compute char norm adjustments
pruner_norm_arrayArray of factors from blob normalization process
char_norm_arrayarray to fill with dummy char norm adjustments
fx_infoGlobals:
Returns
Number of features extracted or 0 if an error occurred.
Note
Exceptions: none
History: Tue May 28 10:40:52 1991, DSJ, Created.

Definition at line 1714 of file adaptmatch.cpp.

1717  {
1718  FEATURE norm_feature = NewFeature(&CharNormDesc);
1719  float baseline = kBlnBaselineOffset;
1720  float scale = MF_SCALE_FACTOR;
1721  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
1722  norm_feature->Params[CharNormLength] =
1723  fx_info.Length * scale / LENGTH_COMPRESSION;
1724  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
1725  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
1726  // Deletes norm_feature.
1727  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
1728  pruner_norm_array);
1729  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
1730 } /* GetCharNormFeature */
const FEATURE_DESC_STRUCT CharNormDesc
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
const double kStandardFeatureLength
Definition: intfx.h:46
const int kBlnBaselineOffset
Definition: normalis.h:29
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
int IntCastRounded(double x)
Definition: helpers.h:179
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:88
FLOAT32 Params[1]
Definition: ocrfeatures.h:65

◆ GetClassToDebug()

CLASS_ID tesseract::Classify::GetClassToDebug ( const char *  Prompt,
bool *  adaptive_on,
bool *  pretrained_on,
int shape_id 
)

This routine prompts the user with Prompt and waits for the user to enter something in the debug window.

Parameters
Promptprompt to print while waiting for input from window
adaptive_on
pretrained_on
shape_id
Returns
Character entered in the debug window.
Note
Globals: none
Exceptions: none
History: Thu Mar 21 16:55:13 1991, DSJ, Created.

Definition at line 1329 of file intproto.cpp.

1330  {
1331  tprintf("%s\n", Prompt);
1332  SVEvent* ev;
1333  SVEventType ev_type;
1334  int unichar_id = INVALID_UNICHAR_ID;
1335  // Wait until a click or popup event.
1336  do {
1338  ev_type = ev->type;
1339  if (ev_type == SVET_POPUP) {
1340  if (ev->command_id == IDA_SHAPE_INDEX) {
1341  if (shape_table_ != NULL) {
1342  *shape_id = atoi(ev->parameter);
1343  *adaptive_on = false;
1344  *pretrained_on = true;
1345  if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
1346  int font_id;
1347  shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id,
1348  &font_id);
1349  tprintf("Shape %d, first unichar=%d, font=%d\n",
1350  *shape_id, unichar_id, font_id);
1351  return unichar_id;
1352  }
1353  tprintf("Shape index '%s' not found in shape table\n", ev->parameter);
1354  } else {
1355  tprintf("No shape table loaded!\n");
1356  }
1357  } else {
1359  unichar_id = unicharset.unichar_to_id(ev->parameter);
1360  if (ev->command_id == IDA_ADAPTIVE) {
1361  *adaptive_on = true;
1362  *pretrained_on = false;
1363  *shape_id = -1;
1364  } else if (ev->command_id == IDA_STATIC) {
1365  *adaptive_on = false;
1366  *pretrained_on = true;
1367  } else {
1368  *adaptive_on = true;
1369  *pretrained_on = true;
1370  }
1371  if (ev->command_id == IDA_ADAPTIVE || shape_table_ == NULL) {
1372  *shape_id = -1;
1373  return unichar_id;
1374  }
1375  for (int s = 0; s < shape_table_->NumShapes(); ++s) {
1376  if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) {
1377  tprintf("%s\n", shape_table_->DebugStr(s).string());
1378  }
1379  }
1380  } else {
1381  tprintf("Char class '%s' not found in unicharset",
1382  ev->parameter);
1383  }
1384  }
1385  }
1386  delete ev;
1387  } while (ev_type != SVET_CLICK);
1388  return 0;
1389 } /* GetClassToDebug */
char * parameter
Definition: scrollview.h:71
SVEventType type
Definition: scrollview.h:64
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:287
ScrollView * IntMatchWindow
Definition: intproto.cpp:179
#define tprintf(...)
Definition: tprintf.h:31
const char * string() const
Definition: strngs.cpp:198
int NumShapes() const
Definition: shapetable.h:275
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
Definition: shapetable.cpp:410
ShapeTable * shape_table_
Definition: classify.h:511
int command_id
Definition: scrollview.h:70
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:320
UNICHARSET unicharset
Definition: ccutil.h:68
SVEventType
Definition: scrollview.h:45
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:152
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:449

◆ getDict()

Dict& tesseract::Classify::getDict ( )
inline

Definition at line 65 of file classify.h.

65  {
66  return dict_;
67  }

◆ GetFontinfoId()

int tesseract::Classify::GetFontinfoId ( ADAPT_CLASS  Class,
uinT8  ConfigId 
)

Definition at line 188 of file adaptive.cpp.

188  {
189  return (ConfigIsPermanent(Class, ConfigId) ?
190  PermConfigFor(Class, ConfigId)->FontinfoId :
191  TempConfigFor(Class, ConfigId)->FontinfoId);
192 }
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:104
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:101
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:92

◆ InitAdaptedClass()

void tesseract::Classify::InitAdaptedClass ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
ADAPT_CLASS  Class,
ADAPT_TEMPLATES  Templates 
)

This routine creates a new adapted class and uses Blob as the model for the first config in that class.

Parameters
Blobblob to model new class after
ClassIdid of the class to be initialized
FontinfoIdfont information inferred from pre-trained templates
Classadapted class to be initialized
Templatesadapted templates to add new class to

Globals:

Note
Exceptions: none
History: Thu Mar 14 12:49:39 1991, DSJ, Created.

Definition at line 703 of file adaptmatch.cpp.

707  {
708  FEATURE_SET Features;
709  int Fid, Pid;
710  FEATURE Feature;
711  int NumFeatures;
712  TEMP_PROTO TempProto;
713  PROTO Proto;
714  INT_CLASS IClass;
716 
717  classify_norm_method.set_value(baseline);
718  Features = ExtractOutlineFeatures(Blob);
719  NumFeatures = Features->NumFeatures;
720  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
721  FreeFeatureSet(Features);
722  return;
723  }
724 
725  Config = NewTempConfig(NumFeatures - 1, FontinfoId);
726  TempConfigFor(Class, 0) = Config;
727 
728  /* this is a kludge to construct cutoffs for adapted templates */
729  if (Templates == AdaptedTemplates)
730  BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
731 
732  IClass = ClassForClassId (Templates->Templates, ClassId);
733 
734  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
735  Pid = AddIntProto (IClass);
736  assert (Pid != NO_PROTO);
737 
738  Feature = Features->Features[Fid];
739  TempProto = NewTempProto ();
740  Proto = &(TempProto->Proto);
741 
742  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
743  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
744  instead of the -0.25 to 0.75 used in baseline normalization */
745  Proto->Angle = Feature->Params[OutlineFeatDir];
746  Proto->X = Feature->Params[OutlineFeatX];
747  Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
748  Proto->Length = Feature->Params[OutlineFeatLength];
749  FillABC(Proto);
750 
751  TempProto->ProtoId = Pid;
752  SET_BIT (Config->Protos, Pid);
753 
754  ConvertProto(Proto, Pid, IClass);
755  AddProtoToProtoPruner(Proto, Pid, IClass,
757 
758  Class->TempProtos = push (Class->TempProtos, TempProto);
759  }
760  FreeFeatureSet(Features);
761 
762  AddIntConfig(IClass);
763  ConvertConfig (AllProtosOn, 0, IClass);
764 
766  tprintf("Added new class '%s' with class id %d and %d protos.\n",
767  unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
769  DisplayAdaptedChar(Blob, IClass);
770  }
771 
772  if (IsEmptyAdaptedClass(Class))
773  (Templates->NumNonEmptyClasses)++;
774 } /* InitAdaptedClass */
FEATURE Features[1]
Definition: ocrfeatures.h:72
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:71
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:221
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:516
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:69
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:965
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:75
BIT_VECTOR AllProtosOn
Definition: classify.h:479
#define tprintf(...)
Definition: tprintf.h:31
void FillABC(PROTO Proto)
Definition: protos.cpp:197
BIT_VECTOR Protos
Definition: adaptive.h:44
CLUSTERCONFIG Config
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
int classify_learning_debug_level
Definition: classify.h:418
INT_TEMPLATES Templates
Definition: adaptive.h:76
FLOAT32 Length
Definition: protos.h:50
#define ClassForClassId(T, c)
Definition: intproto.h:181
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:270
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:101
uinT16 ProtoId
Definition: adaptive.h:30
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:293
UNICHARSET unicharset
Definition: ccutil.h:68
FLOAT32 Y
Definition: protos.h:48
FLOAT32 X
Definition: protos.h:47
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:248
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:384
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:487
FLOAT32 Angle
Definition: protos.h:49
#define NO_PROTO
Definition: matchdefs.h:42
PROTO_STRUCT Proto
Definition: adaptive.h:32
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:89
#define SET_BIT(array, bit)
Definition: bitvec.h:57
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
LIST push(LIST list, void *element)
Definition: oldlist.cpp:317
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:47

◆ InitAdaptiveClassifier()

void tesseract::Classify::InitAdaptiveClassifier ( TessdataManager mgr)

This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be loaded. Should only be set to true if the necessary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates

Note
History: Mon Mar 11 12:49:34 1991, DSJ, Created.

Definition at line 527 of file adaptmatch.cpp.

527  {
529  return;
530  if (AllProtosOn != NULL)
531  EndAdaptiveClassifier(); // Don't leak with multiple inits.
532 
533  // If there is no language_data_path_prefix, the classifier will be
534  // adaptive only.
535  if (language_data_path_prefix.length() > 0 && mgr != nullptr) {
536  TFile fp;
537  ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp));
539 
540  if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) {
541  shape_table_ = new ShapeTable(unicharset);
542  if (!shape_table_->DeSerialize(&fp)) {
543  tprintf("Error loading shape table!\n");
544  delete shape_table_;
545  shape_table_ = NULL;
546  }
547  }
548 
549  ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp));
550  ReadNewCutoffs(&fp, CharNormCutoffs);
551 
552  ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp));
553  NormProtos = ReadNormProtos(&fp);
554  static_classifier_ = new TessClassifier(false, this);
555  }
556 
558  InitIntegerFX();
559 
567 
568  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
569  BaselineCutoffs[i] = 0;
570  }
571 
573  TFile fp;
574  STRING Filename;
575 
576  Filename = imagefile;
577  Filename += ADAPT_TEMPLATE_SUFFIX;
578  if (!fp.Open(Filename.string(), nullptr)) {
580  } else {
581  cprintf("\nReading pre-adapted templates from %s ...\n",
582  Filename.string());
583  fflush(stdout);
585  cprintf("\n");
587 
588  for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
589  BaselineCutoffs[i] = CharNormCutoffs[i];
590  }
591  }
592  } else {
593  if (AdaptedTemplates != NULL)
596  }
597 } /* InitAdaptiveClassifier */
BIT_VECTOR TempProtoMask
Definition: classify.h:482
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:66
bool classify_enable_adaptive_matcher
Definition: classify.h:408
#define MAX_NUM_PROTOS
Definition: intproto.h:47
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:165
NORM_PROTOS * ReadNormProtos(TFile *fp)
Definition: normmatch.cpp:245
BIT_VECTOR AllProtosOn
Definition: classify.h:479
#define tprintf(...)
Definition: tprintf.h:31
const char * string() const
Definition: strngs.cpp:198
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:252
STRING language_data_path_prefix
Definition: ccutil.h:67
void ReadNewCutoffs(TFile *fp, CLASS_CUTOFF_ARRAY Cutoffs)
Definition: cutoffs.cpp:52
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:359
inT32 length() const
Definition: strngs.cpp:193
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:266
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:89
#define ASSERT_HOST(x)
Definition: errcode.h:84
ShapeTable * shape_table_
Definition: classify.h:511
bool classify_use_pre_adapted_templates
Definition: classify.h:410
INT_TEMPLATES Templates
Definition: adaptive.h:76
BIT_VECTOR AllConfigsOff
Definition: classify.h:481
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:679
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:197
INT_TEMPLATES ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:761
Definition: strngs.h:45
#define zero_all_bits(array, length)
Definition: bitvec.h:33
#define set_all_bits(array, length)
Definition: bitvec.h:41
IntegerMatcher im_
Definition: classify.h:502
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472
UNICHARSET unicharset
Definition: ccutil.h:68
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:456
STRING imagefile
Definition: ccutil.h:70
BIT_VECTOR AllConfigsOn
Definition: classify.h:480
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
NORM_PROTOS * NormProtos
Definition: classify.h:485
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
void InitIntegerFX()
Definition: intfx.cpp:55
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:468

◆ LargeSpeckle()

bool tesseract::Classify::LargeSpeckle ( const TBLOB blob)

Definition at line 230 of file classify.cpp.

230  {
231  double speckle_size = kBlnXHeight * speckle_large_max_size;
232  TBOX bbox = blob.bounding_box();
233  return bbox.width() < speckle_size && bbox.height() < speckle_size;
234 }
const int kBlnXHeight
Definition: normalis.h:28
Definition: rect.h:30
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
TBOX bounding_box() const
Definition: blobs.cpp:482
double speckle_large_max_size
Definition: classify.h:500

◆ LearnBlob()

void tesseract::Classify::LearnBlob ( const STRING fontname,
TBLOB Blob,
const DENORM cn_denorm,
const INT_FX_RESULT_STRUCT fx_info,
const char *  blob_text 
)

Definition at line 69 of file blobclass.cpp.

72  {
74  CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
75  CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
76  CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
77  CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
78 
79  if (ValidCharDescription(feature_defs_, CharDesc)) {
80  // Label the features with a class name and font name.
81  tr_file_data_ += "\n";
82  tr_file_data_ += fontname;
83  tr_file_data_ += " ";
84  tr_file_data_ += blob_text;
85  tr_file_data_ += "\n";
86 
87  // write micro-features to file and clean up
88  WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
89  } else {
90  tprintf("Blob learned was invalid!\n");
91  }
92  FreeCharDescription(CharDesc);
93 } // LearnBlob
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, STRING *str)
Definition: featdefs.cpp:193
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc)
Definition: featdefs.cpp:214
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:230
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:506
#define tprintf(...)
Definition: tprintf.h:31
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:44
CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs)
Definition: featdefs.cpp:162
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info)
Definition: normfeat.cpp:61
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:262
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM &cn_denorm)
Definition: mf.cpp:45
void FreeCharDescription(CHAR_DESC CharDesc)
Definition: featdefs.cpp:141

◆ LearnPieces()

void tesseract::Classify::LearnPieces ( const char *  fontname,
int  start,
int  length,
float  threshold,
CharSegmentationType  segmentation,
const char *  correct_text,
WERD_RES word 
)

Definition at line 368 of file adaptmatch.cpp.

370  {
371  // TODO(daria) Remove/modify this if/when we want
372  // to train and/or adapt to n-grams.
373  if (segmentation != CST_WHOLE &&
374  (segmentation != CST_FRAGMENT || disable_character_fragments))
375  return;
376 
377  if (length > 1) {
378  SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
379  start + length - 1);
380  }
381  TBLOB* blob = word->chopped_word->blobs[start];
382  // Rotate the blob if needed for classification.
383  TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded();
384  if (rotated_blob == NULL)
385  rotated_blob = blob;
386 
387  #ifndef GRAPHICS_DISABLED
388  // Draw debug windows showing the blob that is being learned if needed.
389  if (strcmp(classify_learn_debug_str.string(), correct_text) == 0) {
390  RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600,
391  word->chopped_word->bounding_box());
392  rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
393  learn_debug_win_->Update();
394  window_wait(learn_debug_win_);
395  }
396  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
397  ASSERT_HOST(learn_fragments_debug_win_ != NULL); // set up in LearnWord
398  blob->plot(learn_fragments_debug_win_,
400  learn_fragments_debug_win_->Update();
401  }
402  #endif // GRAPHICS_DISABLED
403 
404  if (fontname != NULL) {
405  classify_norm_method.set_value(character); // force char norm spc 30/11/93
406  tess_bn_matching.set_value(false); // turn it off
407  tess_cn_matching.set_value(false);
408  DENORM bl_denorm, cn_denorm;
409  INT_FX_RESULT_STRUCT fx_info;
411  &bl_denorm, &cn_denorm, &fx_info);
412  LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
413  } else if (unicharset.contains_unichar(correct_text)) {
414  UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
415  int font_id = word->fontinfo != NULL
416  ? fontinfo_table_.get_id(*word->fontinfo)
417  : 0;
419  tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
420  unicharset.id_to_unichar(class_id), threshold, font_id);
421  // If filename is not NULL we are doing recognition
422  // (as opposed to training), so we must have already set word fonts.
423  AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
424  if (BackupAdaptedTemplates != NULL) {
425  // Adapt the backup templates too. They will be used if the primary gets
426  // too full.
427  AdaptToChar(rotated_blob, class_id, font_id, threshold,
429  }
430  } else if (classify_debug_level >= 1) {
431  tprintf("Can't adapt to %s not in unicharset\n", correct_text);
432  }
433  if (rotated_blob != blob) {
434  delete rotated_blob;
435  }
436 
437  SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
438  start + length - 1);
439 } // LearnPieces.
bool classify_nonlinear_norm
Definition: classify.h:415
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:194
int UNICHAR_ID
Definition: unichar.h:33
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:220
#define tprintf(...)
Definition: tprintf.h:31
char * classify_learn_debug_str
Definition: classify.h:458
bool classify_debug_character_fragments
Definition: classify.h:454
bool disable_character_fragments
Definition: classify.h:449
#define ASSERT_HOST(x)
Definition: errcode.h:84
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
int classify_learning_debug_level
Definition: classify.h:418
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:476
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:216
TBLOB * ClassifyNormalizeIfNeeded() const
Definition: blobs.cpp:363
const FontInfo * fontinfo
Definition: pageres.h:288
static void Update()
Definition: scrollview.cpp:715
char window_wait(ScrollView *win)
Definition: callcpp.cpp:111
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:872
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472
UNICHARSET unicharset
Definition: ccutil.h:68
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:524
Definition: blobs.h:261
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:69
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
TWERD * chopped_word
Definition: pageres.h:201
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:487
TBOX bounding_box() const
Definition: blobs.cpp:879
GenericVector< SEAM * > seam_array
Definition: pageres.h:203

◆ LearnWord()

void tesseract::Classify::LearnWord ( const char *  fontname,
WERD_RES word 
)

Definition at line 244 of file adaptmatch.cpp.

244  {
245  int word_len = word->correct_text.size();
246  if (word_len == 0) return;
247 
248  float* thresholds = NULL;
249  if (fontname == NULL) {
250  // Adaption mode.
251  if (!EnableLearning || word->best_choice == NULL)
252  return; // Can't or won't adapt.
253 
255  tprintf("\n\nAdapting to word = %s\n",
256  word->best_choice->debug_string().string());
257  thresholds = new float[word_len];
261  matcher_rating_margin, thresholds);
262  }
263  int start_blob = 0;
264 
265  #ifndef GRAPHICS_DISABLED
267  if (learn_fragmented_word_debug_win_ != NULL) {
268  window_wait(learn_fragmented_word_debug_win_);
269  }
270  RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
271  word->chopped_word->bounding_box());
272  RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
273  word->chopped_word->bounding_box());
274  word->chopped_word->plot(learn_fragmented_word_debug_win_);
276  }
277  #endif // GRAPHICS_DISABLED
278 
279  for (int ch = 0; ch < word_len; ++ch) {
281  tprintf("\nLearning %s\n", word->correct_text[ch].string());
282  }
283  if (word->correct_text[ch].length() > 0) {
284  float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
285 
286  LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
287  CST_WHOLE, word->correct_text[ch].string(), word);
288 
289  if (word->best_state[ch] > 1 && !disable_character_fragments) {
290  // Check that the character breaks into meaningful fragments
291  // that each match a whole character with at least
292  // classify_character_fragments_garbage_certainty_threshold
293  bool garbage = false;
294  int frag;
295  for (frag = 0; frag < word->best_state[ch]; ++frag) {
296  TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag];
298  garbage |= LooksLikeGarbage(frag_blob);
299  }
300  }
301  // Learn the fragments.
302  if (!garbage) {
303  bool pieces_all_natural = word->PiecesAllNatural(start_blob,
304  word->best_state[ch]);
305  if (pieces_all_natural || !prioritize_division) {
306  for (frag = 0; frag < word->best_state[ch]; ++frag) {
307  GenericVector<STRING> tokens;
308  word->correct_text[ch].split(' ', &tokens);
309 
310  tokens[0] = CHAR_FRAGMENT::to_string(
311  tokens[0].string(), frag, word->best_state[ch],
312  pieces_all_natural);
313 
314  STRING full_string;
315  for (int i = 0; i < tokens.size(); i++) {
316  full_string += tokens[i];
317  if (i != tokens.size() - 1)
318  full_string += ' ';
319  }
320  LearnPieces(fontname, start_blob + frag, 1, threshold,
321  CST_FRAGMENT, full_string.string(), word);
322  }
323  }
324  }
325  }
326 
327  // TODO(rays): re-enable this part of the code when we switch to the
328  // new classifier that needs to see examples of garbage.
329  /*
330  if (word->best_state[ch] > 1) {
331  // If the next blob is good, make junk with the rightmost fragment.
332  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
333  LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
334  word->best_state[ch + 1] + 1,
335  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
336  }
337  // If the previous blob is good, make junk with the leftmost fragment.
338  if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
339  LearnPieces(fontname, start_blob - word->best_state[ch - 1],
340  word->best_state[ch - 1] + 1,
341  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
342  }
343  }
344  // If the next blob is good, make a join with it.
345  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
346  STRING joined_text = word->correct_text[ch];
347  joined_text += word->correct_text[ch + 1];
348  LearnPieces(fontname, start_blob,
349  word->best_state[ch] + word->best_state[ch + 1],
350  threshold, CST_NGRAM, joined_text.string(), word);
351  }
352  */
353  }
354  start_blob += word->best_state[ch];
355  }
356  delete [] thresholds;
357 } // LearnWord.
STRING to_string() const
Definition: unicharset.h:73
double matcher_good_threshold
Definition: classify.h:419
void plot(ScrollView *window)
Definition: blobs.cpp:916
GenericVector< int > best_state
Definition: pageres.h:255
WERD_CHOICE * best_choice
Definition: pageres.h:219
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:220
GenericVector< STRING > correct_text
Definition: pageres.h:259
const STRING debug_string() const
Definition: ratngs.h:503
#define tprintf(...)
Definition: tprintf.h:31
bool LooksLikeGarbage(TBLOB *blob)
const char * string() const
Definition: strngs.cpp:198
bool classify_debug_character_fragments
Definition: classify.h:454
int size() const
Definition: genericvector.h:72
bool disable_character_fragments
Definition: classify.h:449
int classify_learning_debug_level
Definition: classify.h:418
double certainty_scale
Definition: classify.h:436
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:368
double matcher_perfect_threshold
Definition: classify.h:421
Definition: strngs.h:45
static void Update()
Definition: scrollview.cpp:715
char window_wait(ScrollView *win)
Definition: callcpp.cpp:111
int length() const
Definition: genericvector.h:85
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:452
Definition: blobs.h:261
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
Definition: pageres.cpp:553
double matcher_rating_margin
Definition: classify.h:423
TWERD * chopped_word
Definition: pageres.h:201
TBOX bounding_box() const
Definition: blobs.cpp:879
bool prioritize_division
Definition: classify.h:386
bool PiecesAllNatural(int start, int count) const
Definition: pageres.cpp:1073

◆ LooksLikeGarbage()

bool tesseract::Classify::LooksLikeGarbage ( TBLOB blob)

Definition at line 1667 of file adaptmatch.cpp.

1667  {
1668  BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
1669  AdaptiveClassifier(blob, ratings);
1670  BLOB_CHOICE_IT ratings_it(ratings);
1673  print_ratings_list("======================\nLooksLikeGarbage() got ",
1674  ratings, unicharset);
1675  }
1676  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
1677  ratings_it.forward()) {
1678  if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != NULL) {
1679  continue;
1680  }
1681  float certainty = ratings_it.data()->certainty();
1682  delete ratings;
1683  return certainty <
1685  }
1686  delete ratings;
1687  return true; // no whole characters in ratings
1688 }
const UNICHARSET & getUnicharset() const
Definition: dict.h:97
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:694
Dict & getDict()
Definition: classify.h:65
bool classify_debug_character_fragments
Definition: classify.h:454
UNICHARSET unicharset
Definition: ccutil.h:68
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:452
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:819

◆ MakeNewTemporaryConfig()

int tesseract::Classify::MakeNewTemporaryConfig ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  FontinfoId,
int  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_SET  FloatFeatures 
)
Parameters
Templatesadapted templates to add new config to
ClassIdclass id to associate with new config
FontinfoIdfont information inferred from pre-trained templates
NumFeaturesnumber of features in IntFeatures
Featuresfeatures describing model for new config
FloatFeaturesfloating-pt representation of features
Returns
The id of the new config created, a negative integer in case of error.
Note
Exceptions: none
History: Fri Mar 15 08:49:46 1991, DSJ, Created.

Definition at line 1778 of file adaptmatch.cpp.

1783  {
1784  INT_CLASS IClass;
1785  ADAPT_CLASS Class;
1786  PROTO_ID OldProtos[MAX_NUM_PROTOS];
1787  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
1788  int NumOldProtos;
1789  int NumBadFeatures;
1790  int MaxProtoId, OldMaxProtoId;
1791  int BlobLength = 0;
1792  int MaskSize;
1793  int ConfigId;
1795  int i;
1796  int debug_level = NO_DEBUG;
1797 
1799  debug_level =
1801 
1802  IClass = ClassForClassId(Templates->Templates, ClassId);
1803  Class = Templates->Class[ClassId];
1804 
1805  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
1806  ++NumAdaptationsFailed;
1808  cprintf("Cannot make new temporary config: maximum number exceeded.\n");
1809  return -1;
1810  }
1811 
1812  OldMaxProtoId = IClass->NumProtos - 1;
1813 
1814  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
1815  BlobLength, NumFeatures, Features,
1816  OldProtos, classify_adapt_proto_threshold,
1817  debug_level);
1818 
1819  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
1820  zero_all_bits(TempProtoMask, MaskSize);
1821  for (i = 0; i < NumOldProtos; i++)
1822  SET_BIT(TempProtoMask, OldProtos[i]);
1823 
1824  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
1825  BlobLength, NumFeatures, Features,
1826  BadFeatures,
1828  debug_level);
1829 
1830  MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
1831  IClass, Class, TempProtoMask);
1832  if (MaxProtoId == NO_PROTO) {
1833  ++NumAdaptationsFailed;
1835  cprintf("Cannot make new temp protos: maximum number exceeded.\n");
1836  return -1;
1837  }
1838 
1839  ConfigId = AddIntConfig(IClass);
1840  ConvertConfig(TempProtoMask, ConfigId, IClass);
1841  Config = NewTempConfig(MaxProtoId, FontinfoId);
1842  TempConfigFor(Class, ConfigId) = Config;
1844 
1846  cprintf("Making new temp config %d fontinfo id %d"
1847  " using %d old and %d new protos.\n",
1848  ConfigId, Config->FontinfoId,
1849  NumOldProtos, MaxProtoId - OldMaxProtoId);
1850 
1851  return ConfigId;
1852 } /* MakeNewTemporaryConfig */
BIT_VECTOR TempProtoMask
Definition: classify.h:482
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
#define MAX_NUM_PROTOS
Definition: intproto.h:47
uinT8 NumConfigs
Definition: intproto.h:110
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:221
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
uinT8 ProtoVectorSize
Definition: adaptive.h:42
#define copy_all_bits(source, dest, length)
Definition: bitvec.h:49
#define PRINT_MATCH_SUMMARY
Definition: intproto.h:190
int classify_adapt_feature_threshold
Definition: classify.h:446
BIT_VECTOR AllProtosOn
Definition: classify.h:479
BIT_VECTOR Protos
Definition: adaptive.h:44
uinT16 NumProtos
Definition: intproto.h:108
#define PRINT_FEATURE_MATCHES
Definition: intproto.h:193
CLUSTERCONFIG Config
int classify_learning_debug_level
Definition: classify.h:418
INT_TEMPLATES Templates
Definition: adaptive.h:76
#define ClassForClassId(T, c)
Definition: intproto.h:181
BIT_VECTOR AllConfigsOff
Definition: classify.h:481
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:270
#define zero_all_bits(array, length)
Definition: bitvec.h:33
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:557
int classify_adapt_proto_threshold
Definition: classify.h:444
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:101
IntegerMatcher im_
Definition: classify.h:502
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:627
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:487
#define NO_PROTO
Definition: matchdefs.h:42
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
#define NO_DEBUG
Definition: adaptmatch.cpp:70
BIT_VECTOR AllConfigsOn
Definition: classify.h:480
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
#define SET_BIT(array, bit)
Definition: bitvec.h:57
uinT8 FEATURE_ID
Definition: matchdefs.h:47
#define PRINT_PROTO_MATCHES
Definition: intproto.h:194
inT16 PROTO_ID
Definition: matchdefs.h:41

◆ MakeNewTempProtos()

PROTO_ID tesseract::Classify::MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS  IClass,
ADAPT_CLASS  Class,
BIT_VECTOR  TempProtoMask 
)

This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.

Parameters
Featuresfloating-pt features describing new character
NumBadFeatnumber of bad features to turn into protos
BadFeatfeature id's of bad features
IClassinteger class templates to add new protos to
Classadapted class templates to add new protos to
TempProtoMaskproto mask to add new protos to

Globals: none

Returns
Max proto id in class after all protos have been added. Exceptions: none History: Fri Mar 15 11:39:38 1991, DSJ, Created.

Definition at line 1875 of file adaptmatch.cpp.

1880  {
1881  FEATURE_ID *ProtoStart;
1882  FEATURE_ID *ProtoEnd;
1883  FEATURE_ID *LastBad;
1884  TEMP_PROTO TempProto;
1885  PROTO Proto;
1886  FEATURE F1, F2;
1887  FLOAT32 X1, X2, Y1, Y2;
1888  FLOAT32 A1, A2, AngleDelta;
1889  FLOAT32 SegmentLength;
1890  PROTO_ID Pid;
1891 
1892  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
1893  ProtoStart < LastBad; ProtoStart = ProtoEnd) {
1894  F1 = Features->Features[*ProtoStart];
1895  X1 = F1->Params[PicoFeatX];
1896  Y1 = F1->Params[PicoFeatY];
1897  A1 = F1->Params[PicoFeatDir];
1898 
1899  for (ProtoEnd = ProtoStart + 1,
1900  SegmentLength = GetPicoFeatureLength();
1901  ProtoEnd < LastBad;
1902  ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
1903  F2 = Features->Features[*ProtoEnd];
1904  X2 = F2->Params[PicoFeatX];
1905  Y2 = F2->Params[PicoFeatY];
1906  A2 = F2->Params[PicoFeatDir];
1907 
1908  AngleDelta = fabs(A1 - A2);
1909  if (AngleDelta > 0.5)
1910  AngleDelta = 1.0 - AngleDelta;
1911 
1912  if (AngleDelta > matcher_clustering_max_angle_delta ||
1913  fabs(X1 - X2) > SegmentLength ||
1914  fabs(Y1 - Y2) > SegmentLength)
1915  break;
1916  }
1917 
1918  F2 = Features->Features[*(ProtoEnd - 1)];
1919  X2 = F2->Params[PicoFeatX];
1920  Y2 = F2->Params[PicoFeatY];
1921  A2 = F2->Params[PicoFeatDir];
1922 
1923  Pid = AddIntProto(IClass);
1924  if (Pid == NO_PROTO)
1925  return (NO_PROTO);
1926 
1927  TempProto = NewTempProto();
1928  Proto = &(TempProto->Proto);
1929 
1930  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
1931  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
1932  instead of the -0.25 to 0.75 used in baseline normalization */
1933  Proto->Length = SegmentLength;
1934  Proto->Angle = A1;
1935  Proto->X = (X1 + X2) / 2.0;
1936  Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
1937  FillABC(Proto);
1938 
1939  TempProto->ProtoId = Pid;
1940  SET_BIT(TempProtoMask, Pid);
1941 
1942  ConvertProto(Proto, Pid, IClass);
1943  AddProtoToProtoPruner(Proto, Pid, IClass,
1945 
1946  Class->TempProtos = push(Class->TempProtos, TempProto);
1947  }
1948  return IClass->NumProtos - 1;
1949 } /* MakeNewTempProtos */
BIT_VECTOR TempProtoMask
Definition: classify.h:482
FEATURE Features[1]
Definition: ocrfeatures.h:72
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:516
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:75
void FillABC(PROTO Proto)
Definition: protos.cpp:197
uinT16 NumProtos
Definition: intproto.h:108
double matcher_clustering_max_angle_delta
Definition: classify.h:431
int classify_learning_debug_level
Definition: classify.h:418
FLOAT32 Length
Definition: protos.h:50
uinT16 ProtoId
Definition: adaptive.h:30
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:293
FLOAT32 Y
Definition: protos.h:48
FLOAT32 X
Definition: protos.h:47
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:248
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:384
float FLOAT32
Definition: host.h:42
FLOAT32 Angle
Definition: protos.h:49
#define NO_PROTO
Definition: matchdefs.h:42
PROTO_STRUCT Proto
Definition: adaptive.h:32
#define SET_BIT(array, bit)
Definition: bitvec.h:57
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define GetPicoFeatureLength()
Definition: picofeat.h:59
uinT8 FEATURE_ID
Definition: matchdefs.h:47
LIST push(LIST list, void *element)
Definition: oldlist.cpp:317
inT16 PROTO_ID
Definition: matchdefs.h:41

◆ MakePermanent()

void tesseract::Classify::MakePermanent ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  ConfigId,
TBLOB Blob 
)
Parameters
Templatescurrent set of adaptive templates
ClassIdclass containing config to be made permanent
ConfigIdconfig to be made permanent
Blobcurrent blob being adapted to

Globals: none

Note
Exceptions: none
History: Thu Mar 14 15:54:08 1991, DSJ, Created.

Definition at line 1964 of file adaptmatch.cpp.

1967  {
1968  UNICHAR_ID *Ambigs;
1970  ADAPT_CLASS Class;
1971  PROTO_KEY ProtoKey;
1972 
1973  Class = Templates->Class[ClassId];
1974  Config = TempConfigFor(Class, ConfigId);
1975 
1976  MakeConfigPermanent(Class, ConfigId);
1977  if (Class->NumPermConfigs == 0)
1978  Templates->NumPermClasses++;
1979  Class->NumPermConfigs++;
1980 
1981  // Initialize permanent config.
1982  Ambigs = GetAmbiguities(Blob, ClassId);
1983  PERM_CONFIG Perm = (PERM_CONFIG) malloc(sizeof(PERM_CONFIG_STRUCT));
1984  Perm->Ambigs = Ambigs;
1985  Perm->FontinfoId = Config->FontinfoId;
1986 
1987  // Free memory associated with temporary config (since ADAPTED_CONFIG
1988  // is a union we need to clean up before we record permanent config).
1989  ProtoKey.Templates = Templates;
1990  ProtoKey.ClassId = ClassId;
1991  ProtoKey.ConfigId = ConfigId;
1992  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
1993  FreeTempConfig(Config);
1994 
1995  // Record permanent config.
1996  PermConfigFor(Class, ConfigId) = Perm;
1997 
1998  if (classify_learning_debug_level >= 1) {
1999  tprintf("Making config %d for %s (ClassId %d) permanent:"
2000  " fontinfo id %d, ambiguities '",
2001  ConfigId, getDict().getUnicharset().debug_str(ClassId).string(),
2002  ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
2003  for (UNICHAR_ID *AmbigsPointer = Ambigs;
2004  *AmbigsPointer >= 0; ++AmbigsPointer)
2005  tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
2006  tprintf("'.\n");
2007  }
2008 } /* MakePermanent */
int UNICHAR_ID
Definition: unichar.h:33
int MakeTempProtoPerm(void *item1, void *item2)
Dict & getDict()
Definition: classify.h:65
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:81
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:54
#define tprintf(...)
Definition: tprintf.h:31
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:104
#define MakeConfigPermanent(Class, ConfigId)
Definition: adaptive.h:95
CLUSTERCONFIG Config
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
int classify_learning_debug_level
Definition: classify.h:418
LIST delete_d(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:120
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:101
UNICHARSET unicharset
Definition: ccutil.h:68
CLASS_ID ClassId
Definition: adaptmatch.cpp:115
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
uinT8 NumPermConfigs
Definition: adaptive.h:64
ADAPT_TEMPLATES Templates
Definition: adaptmatch.cpp:114
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
UNICHAR_ID * Ambigs
Definition: adaptive.h:51

◆ MasterMatcher()

void tesseract::Classify::MasterMatcher ( INT_TEMPLATES  templates,
inT16  num_features,
const INT_FEATURE_STRUCT features,
const uinT8 norm_factors,
ADAPT_CLASS classes,
int  debug,
int  matcher_multiplier,
const TBOX blob_box,
const GenericVector< CP_RESULT_STRUCT > &  results,
ADAPT_RESULTS final_results 
)

Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.

Definition at line 1113 of file adaptmatch.cpp.

1122  {
1123  int top = blob_box.top();
1124  int bottom = blob_box.bottom();
1125  UnicharRating int_result;
1126  for (int c = 0; c < results.size(); c++) {
1127  CLASS_ID class_id = results[c].Class;
1128  BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
1129  : AllProtosOn;
1130  BIT_VECTOR configs = classes != NULL ? classes[class_id]->PermConfigs
1131  : AllConfigsOn;
1132 
1133  int_result.unichar_id = class_id;
1134  im_.Match(ClassForClassId(templates, class_id),
1135  protos, configs,
1136  num_features, features,
1137  &int_result, classify_adapt_feature_threshold, debug,
1139  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1140  ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
1141  results[c].Rating,
1142  final_results->BlobLength,
1143  matcher_multiplier, norm_factors,
1144  &int_result, final_results);
1145  }
1146 }
bool matcher_debug_separate_windows
Definition: classify.h:457
BIT_VECTOR PermConfigs
Definition: adaptive.h:68
inT32 BlobLength
Definition: adaptmatch.cpp:83
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
int classify_adapt_feature_threshold
Definition: classify.h:446
BIT_VECTOR AllProtosOn
Definition: classify.h:479
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
int size() const
Definition: genericvector.h:72
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
#define ClassForClassId(T, c)
Definition: intproto.h:181
IntegerMatcher im_
Definition: classify.h:502
inT16 top() const
Definition: rect.h:54
BIT_VECTOR PermProtos
Definition: adaptive.h:67
BIT_VECTOR AllConfigsOn
Definition: classify.h:480
inT16 bottom() const
Definition: rect.h:61
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475

◆ NewAdaptedTemplates()

ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates ( bool  InitFromUnicharset)

Allocates memory for adapted tempates. each char in unicharset to the newly created templates

Parameters
InitFromUnicharsetif true, add an empty class for
Returns
Ptr to new adapted templates.
Note
Globals: none
Exceptions: none
History: Fri Mar 8 10:15:28 1991, DSJ, Created.

Definition at line 165 of file adaptive.cpp.

165  {
166  ADAPT_TEMPLATES Templates;
167  int i;
168 
169  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
170 
171  Templates->Templates = NewIntTemplates ();
172  Templates->NumPermClasses = 0;
173  Templates->NumNonEmptyClasses = 0;
174 
175  /* Insert an empty class for each unichar id in unicharset */
176  for (i = 0; i < MAX_NUM_CLASSES; i++) {
177  Templates->Class[i] = NULL;
178  if (InitFromUnicharset && i < unicharset.size()) {
179  AddAdaptedClass(Templates, NewAdaptedClass(), i);
180  }
181  }
182 
183  return (Templates);
184 
185 } /* NewAdaptedTemplates */
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:111
void * Emalloc(int Size)
Definition: emalloc.cpp:47
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:50
INT_TEMPLATES Templates
Definition: adaptive.h:76
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:82
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:723
UNICHARSET unicharset
Definition: ccutil.h:68
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
int size() const
Definition: unicharset.h:299
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31

◆ NormalizeOutlines()

void tesseract::Classify::NormalizeOutlines ( LIST  Outlines,
FLOAT32 XScale,
FLOAT32 YScale 
)

This routine normalizes every outline in Outlines according to the currently selected normalization method. It also returns the scale factors that it used to do this scaling. The scale factors returned represent the x and y sizes in the normalized coordinate system that correspond to 1 pixel in the original coordinate system.

Globals:

  • classify_norm_method method being used for normalization
  • classify_char_norm_range map radius of gyration to this value
    Parameters
    Outlineslist of outlines to be normalized
    XScalex-direction scale factor used by routine
    YScaley-direction scale factor used by routine
    Returns
    none (Outlines are changed and XScale and YScale are updated)
    Note
    Exceptions: none
    History: Fri Dec 14 08:14:55 1990, DSJ, Created.

Definition at line 301 of file mfoutline.cpp.

303  {
304  MFOUTLINE Outline;
305 
306  switch (classify_norm_method) {
307  case character:
308  ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
309  break;
310 
311  case baseline:
312  iterate(Outlines) {
313  Outline = (MFOUTLINE) first_node(Outlines);
314  NormalizeOutline(Outline, 0.0);
315  }
316  *XScale = *YScale = MF_SCALE_FACTOR;
317  break;
318  }
319 } /* NormalizeOutlines */
void NormalizeOutline(MFOUTLINE Outline, FLOAT32 XOrigin)
Definition: mfoutline.cpp:265
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
#define ASSERT_HOST(x)
Definition: errcode.h:84
LIST MFOUTLINE
Definition: mfoutline.h:33
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159

◆ PrintAdaptedTemplates()

void tesseract::Classify::PrintAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine prints a summary of the adapted templates in Templates to File.

Parameters
Fileopen text file to print Templates to
Templatesadapted templates to print to File
Note
Globals: none
Exceptions: none
History: Wed Mar 20 13:35:29 1991, DSJ, Created.

Definition at line 266 of file adaptive.cpp.

266  {
267  int i;
268  INT_CLASS IClass;
269  ADAPT_CLASS AClass;
270 
271  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
272  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
273  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
274  fprintf (File, " Id NC NPC NP NPP\n");
275  fprintf (File, "------------------------\n");
276 
277  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
278  IClass = Templates->Templates->Class[i];
279  AClass = Templates->Class[i];
280  if (!IsEmptyAdaptedClass (AClass)) {
281  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
283  IClass->NumConfigs, AClass->NumPermConfigs,
284  IClass->NumProtos,
285  IClass->NumProtos - count (AClass->TempProtos));
286  }
287  }
288  fprintf (File, "\n");
289 
290 } /* PrintAdaptedTemplates */
uinT8 NumConfigs
Definition: intproto.h:110
uinT16 NumProtos
Definition: intproto.h:108
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
INT_TEMPLATES Templates
Definition: adaptive.h:76
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
UNICHARSET unicharset
Definition: ccutil.h:68
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
uinT8 NumPermConfigs
Definition: adaptive.h:64
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:89
int count(LIST var_list)
Definition: oldlist.cpp:103

◆ PrintAdaptiveMatchResults()

void tesseract::Classify::PrintAdaptiveMatchResults ( const ADAPT_RESULTS results)

This routine writes the matches in Results to File.

Parameters
resultsmatch results to write to File

Globals: none

Note
Exceptions: none
History: Mon Mar 18 09:24:53 1991, DSJ, Created.

Definition at line 2062 of file adaptmatch.cpp.

2062  {
2063  for (int i = 0; i < results.match.size(); ++i) {
2064  tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).string());
2065  results.match[i].Print();
2066  }
2067 } /* PrintAdaptiveMatchResults */
#define tprintf(...)
Definition: tprintf.h:31
const char * string() const
Definition: strngs.cpp:198
int size() const
Definition: genericvector.h:72
UNICHARSET unicharset
Definition: ccutil.h:68
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318

◆ PruneClasses()

int tesseract::Classify::PruneClasses ( const INT_TEMPLATES_STRUCT int_templates,
int  num_features,
int  keep_this,
const INT_FEATURE_STRUCT features,
const uinT8 normalization_factors,
const uinT16 expected_num_features,
GenericVector< CP_RESULT_STRUCT > *  results 
)

Runs the class pruner from int_templates on the given features, returning the number of classes output in results.

Parameters
int_templatesClass pruner tables
num_featuresNumber of features in blob
featuresArray of features
normalization_factorsArray of fudge factors from blob normalization process (by CLASS_INDEX)
expected_num_featuresArray of expected number of features for each class (by CLASS_INDEX)
resultsSorted Array of pruned classes. Must be an array of size at least int_templates->NumClasses.
keep_this

Definition at line 412 of file intmatcher.cpp.

417  {
418  ClassPruner pruner(int_templates->NumClasses);
419  // Compute initial match scores for all classes.
420  pruner.ComputeScores(int_templates, num_features, features);
421  // Adjust match scores for number of expected features.
422  pruner.AdjustForExpectedNumFeatures(expected_num_features,
424  // Apply disabled classes in unicharset - only works without a shape_table.
425  if (shape_table_ == NULL)
426  pruner.DisableDisabledClasses(unicharset);
427  // If fragments are disabled, remove them, also only without a shape table.
429  pruner.DisableFragments(unicharset);
430 
431  // If we have good x-heights, apply the given normalization factors.
432  if (normalization_factors != NULL) {
433  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
434  normalization_factors);
435  } else {
436  pruner.NoNormalization();
437  }
438  // Do the actual pruning and sort the short-list.
439  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
440  shape_table_ == NULL, unicharset);
441 
442  if (classify_debug_level > 2) {
443  pruner.DebugMatch(*this, int_templates, features);
444  }
445  if (classify_debug_level > 1) {
446  pruner.SummarizeResult(*this, int_templates, expected_num_features,
448  normalization_factors);
449  }
450  // Convert to the expected output format.
451  return pruner.SetupResults(results);
452 }
int classify_cp_cutoff_strength
Definition: classify.h:466
bool disable_character_fragments
Definition: classify.h:449
ShapeTable * shape_table_
Definition: classify.h:511
int classify_class_pruner_multiplier
Definition: classify.h:464
UNICHARSET unicharset
Definition: ccutil.h:68
int classify_class_pruner_threshold
Definition: classify.h:462

◆ ReadAdaptedTemplates()

ADAPT_TEMPLATES tesseract::Classify::ReadAdaptedTemplates ( TFile fp)

Read a set of adapted templates from File and return a ptr to the templates.

Parameters
Fileopen text file to read adapted templates from
Returns
Ptr to adapted templates read from File.
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:18:10 1991, DSJ, Created.

Definition at line 359 of file adaptive.cpp.

359  {
360  int i;
361  ADAPT_TEMPLATES Templates;
362 
363  /* first read the high level adaptive template struct */
364  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
365  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
366 
367  /* then read in the basic integer templates */
368  Templates->Templates = ReadIntTemplates(fp);
369 
370  /* then read in the adaptive info for each class */
371  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
372  Templates->Class[i] = ReadAdaptedClass(fp);
373  }
374  return (Templates);
375 
376 } /* ReadAdaptedTemplates */
void * Emalloc(int Size)
Definition: emalloc.cpp:47
ADAPT_CLASS ReadAdaptedClass(TFile *fp)
Definition: adaptive.cpp:306
INT_TEMPLATES Templates
Definition: adaptive.h:76
INT_TEMPLATES ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:761
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:82
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
int FRead(void *buffer, int size, int count)
Definition: serialis.cpp:108

◆ ReadIntTemplates()

INT_TEMPLATES tesseract::Classify::ReadIntTemplates ( TFile fp)

This routine reads a set of integer templates from File. File must already be open and must be in the correct binary format.

Parameters
Fileopen file to read templates from
Returns
Pointer to integer templates read from File.
Note
Globals: none
Exceptions: none
History: Wed Feb 27 11:48:46 1991, DSJ, Created.

Definition at line 761 of file intproto.cpp.

761  {
762  int i, j, w, x, y, z;
763  int unicharset_size;
764  int version_id = 0;
765  INT_TEMPLATES Templates;
766  CLASS_PRUNER_STRUCT* Pruner;
767  INT_CLASS Class;
768  uinT8 *Lengths;
769  PROTO_SET ProtoSet;
770 
771  /* variables for conversion from older inttemp formats */
772  int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
773  CLASS_ID class_id, max_class_id;
774  inT16 *IndexFor = new inT16[MAX_NUM_CLASSES];
775  CLASS_ID *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES];
776  CLASS_PRUNER_STRUCT **TempClassPruner =
778  uinT32 SetBitsForMask = // word with NUM_BITS_PER_CLASS
779  (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0
780  uinT32 Mask, NewMask, ClassBits;
781  int MaxNumConfigs = MAX_NUM_CONFIGS;
782  int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
783 
784  /* first read the high level template struct */
785  Templates = NewIntTemplates();
786  // Read Templates in parts for 64 bit compatibility.
787  if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1)
788  tprintf("Bad read of inttemp!\n");
789  if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
790  1) != 1 ||
791  fp->FReadEndian(&Templates->NumClassPruners,
792  sizeof(Templates->NumClassPruners), 1) != 1)
793  tprintf("Bad read of inttemp!\n");
794  if (Templates->NumClasses < 0) {
795  // This file has a version id!
796  version_id = -Templates->NumClasses;
797  if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
798  1) != 1)
799  tprintf("Bad read of inttemp!\n");
800  }
801 
802  if (version_id < 3) {
803  MaxNumConfigs = OLD_MAX_NUM_CONFIGS;
804  WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC;
805  }
806 
807  if (version_id < 2) {
808  if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size) !=
809  unicharset_size) {
810  tprintf("Bad read of inttemp!\n");
811  }
812  if (fp->FReadEndian(ClassIdFor, sizeof(ClassIdFor[0]),
813  Templates->NumClasses) != Templates->NumClasses) {
814  tprintf("Bad read of inttemp!\n");
815  }
816  }
817 
818  /* then read in the class pruners */
819  const int kNumBuckets =
821  for (i = 0; i < Templates->NumClassPruners; i++) {
822  Pruner = new CLASS_PRUNER_STRUCT;
823  if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) !=
824  kNumBuckets) {
825  tprintf("Bad read of inttemp!\n");
826  }
827  if (version_id < 2) {
828  TempClassPruner[i] = Pruner;
829  } else {
830  Templates->ClassPruners[i] = Pruner;
831  }
832  }
833 
834  /* fix class pruners if they came from an old version of inttemp */
835  if (version_id < 2) {
836  // Allocate enough class pruners to cover all the class ids.
837  max_class_id = 0;
838  for (i = 0; i < Templates->NumClasses; i++)
839  if (ClassIdFor[i] > max_class_id)
840  max_class_id = ClassIdFor[i];
841  for (i = 0; i <= CPrunerIdFor(max_class_id); i++) {
842  Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT;
843  memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT));
844  }
845  // Convert class pruners from the old format (indexed by class index)
846  // to the new format (indexed by class id).
847  last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1;
848  for (i = 0; i < Templates->NumClassPruners; i++) {
849  for (x = 0; x < NUM_CP_BUCKETS; x++)
850  for (y = 0; y < NUM_CP_BUCKETS; y++)
851  for (z = 0; z < NUM_CP_BUCKETS; z++)
852  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
853  if (TempClassPruner[i]->p[x][y][z][w] == 0)
854  continue;
855  for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) {
856  bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b;
857  if (bit_number > last_cp_bit_number)
858  break; // the rest of the bits in this word are not used
859  class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS];
860  // Single out NUM_BITS_PER_CLASS bits relating to class_id.
861  Mask = SetBitsForMask << b;
862  ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
863  // Move these bits to the new position in which they should
864  // appear (indexed corresponding to the class_id).
865  new_i = CPrunerIdFor(class_id);
866  new_w = CPrunerWordIndexFor(class_id);
867  new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS;
868  if (new_b > b) {
869  ClassBits <<= (new_b - b);
870  } else {
871  ClassBits >>= (b - new_b);
872  }
873  // Copy bits relating to class_id to the correct position
874  // in Templates->ClassPruner.
875  NewMask = SetBitsForMask << new_b;
876  Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask;
877  Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits;
878  }
879  }
880  }
881  for (i = 0; i < Templates->NumClassPruners; i++) {
882  delete TempClassPruner[i];
883  }
884  }
885 
886  /* then read in each class */
887  for (i = 0; i < Templates->NumClasses; i++) {
888  /* first read in the high level struct for the class */
889  Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT));
890  if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 ||
891  fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 ||
892  fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1)
893  tprintf("Bad read of inttemp!\n");
894  if (version_id == 0) {
895  // Only version 0 writes 5 pointless pointers to the file.
896  for (j = 0; j < 5; ++j) {
897  inT32 junk;
898  if (fp->FRead(&junk, sizeof(junk), 1) != 1)
899  tprintf("Bad read of inttemp!\n");
900  }
901  }
902  int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs;
903  ASSERT_HOST(num_configs <= MaxNumConfigs);
904  if (fp->FReadEndian(Class->ConfigLengths, sizeof(uinT16), num_configs) !=
905  num_configs) {
906  tprintf("Bad read of inttemp!\n");
907  }
908  if (version_id < 2) {
909  ClassForClassId (Templates, ClassIdFor[i]) = Class;
910  } else {
911  ClassForClassId (Templates, i) = Class;
912  }
913 
914  /* then read in the proto lengths */
915  Lengths = NULL;
916  if (MaxNumIntProtosIn (Class) > 0) {
917  Lengths = (uinT8 *)Emalloc(sizeof(uinT8) * MaxNumIntProtosIn(Class));
918  if (fp->FRead(Lengths, sizeof(uinT8), MaxNumIntProtosIn(Class)) !=
919  MaxNumIntProtosIn(Class))
920  tprintf("Bad read of inttemp!\n");
921  }
922  Class->ProtoLengths = Lengths;
923 
924  /* then read in the proto sets */
925  for (j = 0; j < Class->NumProtoSets; j++) {
926  ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT));
927  int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR;
928  if (fp->FReadEndian(&ProtoSet->ProtoPruner,
929  sizeof(ProtoSet->ProtoPruner[0][0][0]),
930  num_buckets) != num_buckets)
931  tprintf("Bad read of inttemp!\n");
932  for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
933  if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A),
934  1) != 1 ||
935  fp->FRead(&ProtoSet->Protos[x].B, sizeof(ProtoSet->Protos[x].B),
936  1) != 1 ||
937  fp->FRead(&ProtoSet->Protos[x].C, sizeof(ProtoSet->Protos[x].C),
938  1) != 1 ||
939  fp->FRead(&ProtoSet->Protos[x].Angle,
940  sizeof(ProtoSet->Protos[x].Angle), 1) != 1)
941  tprintf("Bad read of inttemp!\n");
942  if (fp->FReadEndian(&ProtoSet->Protos[x].Configs,
943  sizeof(ProtoSet->Protos[x].Configs[0]),
944  WerdsPerConfigVec) != WerdsPerConfigVec)
945  cprintf("Bad read of inttemp!\n");
946  }
947  Class->ProtoSets[j] = ProtoSet;
948  }
949  if (version_id < 4) {
950  Class->font_set_id = -1;
951  } else {
952  fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1);
953  }
954  }
955 
956  if (version_id < 2) {
957  /* add an empty NULL class with class id 0 */
958  assert(UnusedClassIdIn (Templates, 0));
959  ClassForClassId (Templates, 0) = NewIntClass (1, 1);
960  ClassForClassId (Templates, 0)->font_set_id = -1;
961  Templates->NumClasses++;
962  /* make sure the classes are contiguous */
963  for (i = 0; i < MAX_NUM_CLASSES; i++) {
964  if (i < Templates->NumClasses) {
965  if (ClassForClassId (Templates, i) == NULL) {
966  fprintf(stderr, "Non-contiguous class ids in inttemp\n");
967  exit(1);
968  }
969  } else {
970  if (ClassForClassId (Templates, i) != NULL) {
971  fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n",
972  i, Templates->NumClasses);
973  exit(1);
974  }
975  }
976  }
977  }
978  if (version_id >= 4) {
980  if (version_id >= 5) {
981  this->fontinfo_table_.read(fp,
983  }
985  }
986 
987  // Clean up.
988  delete[] IndexFor;
989  delete[] ClassIdFor;
990  delete[] TempClassPruner;
991 
992  return (Templates);
993 } /* ReadIntTemplates */
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
#define BITS_PER_CP_VECTOR
Definition: intproto.h:58
bool read_spacing_info(TFile *f, FontInfo *fi)
Definition: fontinfo.cpp:173
#define CPrunerBitIndexFor(c)
Definition: intproto.h:186
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:97
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
bool read_set(TFile *f, FontSet *fs)
Definition: fontinfo.cpp:230
int32_t inT32
Definition: host.h:38
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
uinT8 NumProtoSets
Definition: intproto.h:109
UnicityTable< FontSet > fontset_table_
Definition: classify.h:495
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
uinT8 NumConfigs
Definition: intproto.h:110
#define MAX_NUM_CLASS_PRUNERS
Definition: intproto.h:59
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
struct PROTO_SET_STRUCT * PROTO_SET
void * Emalloc(int Size)
Definition: emalloc.cpp:47
#define tprintf(...)
Definition: tprintf.h:31
uinT16 NumProtos
Definition: intproto.h:108
#define BITS_PER_WERD
Definition: intproto.h:44
#define CPrunerWordIndexFor(c)
Definition: intproto.h:185
uinT8 * ProtoLengths
Definition: intproto.h:112
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
#define NUM_PP_PARAMS
Definition: intproto.h:50
int16_t inT16
Definition: host.h:36
PROTO_PRUNER ProtoPruner
Definition: intproto.h:96
#define NUM_CP_BUCKETS
Definition: intproto.h:52
#define ASSERT_HOST(x)
Definition: errcode.h:84
#define ClassForClassId(T, c)
Definition: intproto.h:181
uint32_t uinT32
Definition: host.h:39
#define OLD_MAX_NUM_CONFIGS
Definition: intproto.cpp:112
bool read_info(TFile *f, FontInfo *fi)
Definition: fontinfo.cpp:152
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
#define CPrunerIdFor(c)
Definition: intproto.h:183
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:723
uinT32 Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:86
#define NUM_PP_BUCKETS
Definition: intproto.h:51
struct INT_CLASS_STRUCT * INT_CLASS
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
uint8_t uinT8
Definition: host.h:35
#define WERDS_PER_PP_VECTOR
Definition: intproto.h:62
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
#define MaxNumIntProtosIn(C)
Definition: intproto.h:168
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:487
uint16_t uinT16
Definition: host.h:37
#define WERDS_PER_CONFIG_VEC
Definition: intproto.h:68
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:664
#define OLD_WERDS_PER_CONFIG_VEC
Definition: intproto.cpp:113

◆ ReadNewCutoffs()

void tesseract::Classify::ReadNewCutoffs ( TFile fp,
CLASS_CUTOFF_ARRAY  Cutoffs 
)

Open Filename, read in all of the class-id/cutoff pairs and insert them into the Cutoffs array. Cutoffs are indexed in the array by class id. Unused entries in the array are set to an arbitrarily high cutoff value.

Parameters
CutoffFilename of file containing cutoff definitions
Cutoffsarray to put cutoffs into
swap
end_offset
Returns
none
Note
Globals: none
Exceptions: none
History: Wed Feb 20 09:38:26 1991, DSJ, Created.

Definition at line 52 of file cutoffs.cpp.

52  {
53  char Class[UNICHAR_LEN + 1];
54  CLASS_ID ClassId;
55  int Cutoff;
56  int i;
57 
58  if (shape_table_ != NULL) {
59  if (!shapetable_cutoffs_.DeSerialize(fp)) {
60  tprintf("Error during read of shapetable pffmtable!\n");
61  }
62  }
63  for (i = 0; i < MAX_NUM_CLASSES; i++)
64  Cutoffs[i] = MAX_CUTOFF;
65 
66  const int kMaxLineSize = 100;
67  char line[kMaxLineSize];
68  while (fp->FGets(line, kMaxLineSize) != nullptr &&
69  sscanf(line, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d", Class,
70  &Cutoff) == 2) {
71  if (strcmp(Class, "NULL") == 0) {
72  ClassId = unicharset.unichar_to_id(" ");
73  } else {
74  ClassId = unicharset.unichar_to_id(Class);
75  }
76  Cutoffs[ClassId] = Cutoff;
77  }
78 }
bool DeSerialize(bool swap, FILE *fp)
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define tprintf(...)
Definition: tprintf.h:31
#define UNICHAR_LEN
Definition: unichar.h:30
ShapeTable * shape_table_
Definition: classify.h:511
UNICHARSET unicharset
Definition: ccutil.h:68
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define REALLY_QUOTE_IT(x)
Definition: cutoffs.cpp:33
#define MAX_CUTOFF
Definition: cutoffs.cpp:35

◆ ReadNormProtos()

NORM_PROTOS * tesseract::Classify::ReadNormProtos ( TFile fp)

This routine allocates a new data structure to hold a set of character normalization protos. It then fills in the data structure by reading from the specified File.

Parameters
Fileopen text file to read normalization protos from
end_offsetGlobals: none
Returns
Character normalization protos.
Note
Exceptions: none
History: Wed Dec 19 16:38:49 1990, DSJ, Created.

Definition at line 245 of file normmatch.cpp.

245  {
247  int i;
248  char unichar[2 * UNICHAR_LEN + 1];
249  UNICHAR_ID unichar_id;
250  LIST Protos;
251  int NumProtos;
252 
253  /* allocate and initialization data structure */
254  NormProtos = (NORM_PROTOS *) Emalloc (sizeof (NORM_PROTOS));
255  NormProtos->NumProtos = unicharset.size();
256  NormProtos->Protos = (LIST *) Emalloc (NormProtos->NumProtos * sizeof(LIST));
257  for (i = 0; i < NormProtos->NumProtos; i++)
258  NormProtos->Protos[i] = NIL_LIST;
259 
260  /* read file header and save in data structure */
261  NormProtos->NumParams = ReadSampleSize(fp);
262  NormProtos->ParamDesc = ReadParamDesc(fp, NormProtos->NumParams);
263 
264  /* read protos for each class into a separate list */
265  const int kMaxLineSize = 100;
266  char line[kMaxLineSize];
267  while (fp->FGets(line, kMaxLineSize) != nullptr) {
268  if (sscanf(line, "%s %d", unichar, &NumProtos) != 2) continue;
269  if (unicharset.contains_unichar(unichar)) {
270  unichar_id = unicharset.unichar_to_id(unichar);
271  Protos = NormProtos->Protos[unichar_id];
272  for (i = 0; i < NumProtos; i++)
273  Protos = push_last(Protos, ReadPrototype(fp, NormProtos->NumParams));
274  NormProtos->Protos[unichar_id] = Protos;
275  } else {
276  tprintf("Error: unichar %s in normproto file is not in unichar set.\n",
277  unichar);
278  for (i = 0; i < NumProtos; i++)
279  FreePrototype(ReadPrototype(fp, NormProtos->NumParams));
280  }
281  }
282  return (NormProtos);
283 } /* ReadNormProtos */
PROTOTYPE * ReadPrototype(TFile *fp, uinT16 N)
Definition: clusttool.cpp:118
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
int UNICHAR_ID
Definition: unichar.h:33
void * Emalloc(int Size)
Definition: emalloc.cpp:47
#define tprintf(...)
Definition: tprintf.h:31
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:332
#define NIL_LIST
Definition: oldlist.h:126
#define UNICHAR_LEN
Definition: unichar.h:30
uinT16 ReadSampleSize(TFile *fp)
Definition: clusttool.cpp:47
LIST * Protos
Definition: normmatch.cpp:42
UNICHARSET unicharset
Definition: ccutil.h:68
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
void FreePrototype(void *arg)
Definition: cluster.cpp:587
int size() const
Definition: unicharset.h:299
PARAM_DESC * ReadParamDesc(TFile *fp, uinT16 N)
Definition: clusttool.cpp:73
NORM_PROTOS * NormProtos
Definition: classify.h:485
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194

◆ RefreshDebugWindow()

void tesseract::Classify::RefreshDebugWindow ( ScrollView **  win,
const char *  msg,
int  y_offset,
const TBOX wbox 
)

Definition at line 220 of file adaptmatch.cpp.

221  {
222  #ifndef GRAPHICS_DISABLED
223  const int kSampleSpaceWidth = 500;
224  if (*win == NULL) {
225  *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
226  kSampleSpaceWidth * 2, 200, true);
227  }
228  (*win)->Clear();
229  (*win)->Pen(64, 64, 64);
230  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset,
231  kSampleSpaceWidth, kBlnBaselineOffset);
232  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset,
233  kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset);
234  (*win)->ZoomToRectangle(wbox.left(), wbox.top(),
235  wbox.right(), wbox.bottom());
236  #endif // GRAPHICS_DISABLED
237 }
const int kBlnXHeight
Definition: normalis.h:28
const int kBlnBaselineOffset
Definition: normalis.h:29
inT16 left() const
Definition: rect.h:68
inT16 top() const
Definition: rect.h:54
inT16 right() const
Definition: rect.h:75
inT16 bottom() const
Definition: rect.h:61

◆ RemoveBadMatches()

void tesseract::Classify::RemoveBadMatches ( ADAPT_RESULTS Results)

This routine steps through each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.

Parameters
Resultscontains matches to be filtered

Globals:

  • matcher_bad_match_pad defines a "bad match"
Note
Exceptions: none
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2085 of file adaptmatch.cpp.

2085  {
2086  int Next, NextGood;
2087  FLOAT32 BadMatchThreshold;
2088  static const char* romans = "i v x I V X";
2089  BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
2090 
2092  UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2093  unicharset.unichar_to_id("1") : -1;
2094  UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2095  unicharset.unichar_to_id("0") : -1;
2096  float scored_one = ScoredUnichar(unichar_id_one, *Results);
2097  float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
2098 
2099  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2100  const UnicharRating& match = Results->match[Next];
2101  if (match.rating >= BadMatchThreshold) {
2102  if (!unicharset.get_isalpha(match.unichar_id) ||
2103  strstr(romans,
2104  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2105  } else if (unicharset.eq(match.unichar_id, "l") &&
2106  scored_one < BadMatchThreshold) {
2107  Results->match[Next].unichar_id = unichar_id_one;
2108  } else if (unicharset.eq(match.unichar_id, "O") &&
2109  scored_zero < BadMatchThreshold) {
2110  Results->match[Next].unichar_id = unichar_id_zero;
2111  } else {
2112  Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy.
2113  }
2114  if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
2115  if (NextGood == Next) {
2116  ++NextGood;
2117  } else {
2118  Results->match[NextGood++] = Results->match[Next];
2119  }
2120  }
2121  }
2122  }
2123  } else {
2124  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2125  if (Results->match[Next].rating >= BadMatchThreshold) {
2126  if (NextGood == Next) {
2127  ++NextGood;
2128  } else {
2129  Results->match[NextGood++] = Results->match[Next];
2130  }
2131  }
2132  }
2133  }
2134  Results->match.truncate(NextGood);
2135 } /* RemoveBadMatches */
bool classify_bln_numeric_mode
Definition: classify.h:499
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
int UNICHAR_ID
Definition: unichar.h:33
void truncate(int size)
int size() const
Definition: genericvector.h:72
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:451
double matcher_bad_match_pad
Definition: classify.h:422
UNICHARSET unicharset
Definition: ccutil.h:68
float FLOAT32
Definition: host.h:42
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:656
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
FLOAT32 best_rating
Definition: adaptmatch.cpp:87

◆ RemoveExtraPuncs()

void tesseract::Classify::RemoveExtraPuncs ( ADAPT_RESULTS Results)

This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.

Parameters
Resultscontains matches to be filtered
Note
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2147 of file adaptmatch.cpp.

2147  {
2148  int Next, NextGood;
2149  int punc_count; /*no of garbage characters */
2150  int digit_count;
2151  /*garbage characters */
2152  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2153  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2154 
2155  punc_count = 0;
2156  digit_count = 0;
2157  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2158  const UnicharRating& match = Results->match[Next];
2159  bool keep = true;
2160  if (strstr(punc_chars,
2161  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2162  if (punc_count >= 2)
2163  keep = false;
2164  punc_count++;
2165  } else {
2166  if (strstr(digit_chars,
2167  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2168  if (digit_count >= 1)
2169  keep = false;
2170  digit_count++;
2171  }
2172  }
2173  if (keep) {
2174  if (NextGood == Next) {
2175  ++NextGood;
2176  } else {
2177  Results->match[NextGood++] = match;
2178  }
2179  }
2180  }
2181  Results->match.truncate(NextGood);
2182 } /* RemoveExtraPuncs */
void truncate(int size)
int size() const
Definition: genericvector.h:72
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
UNICHARSET unicharset
Definition: ccutil.h:68
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88

◆ ResetAdaptiveClassifierInternal()

void tesseract::Classify::ResetAdaptiveClassifierInternal ( )

Definition at line 599 of file adaptmatch.cpp.

599  {
601  tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
602  NumAdaptationsFailed);
603  }
606  if (BackupAdaptedTemplates != NULL)
608  BackupAdaptedTemplates = NULL;
609  NumAdaptationsFailed = 0;
610 }
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:165
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:418
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:476
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:197
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472

◆ SetAdaptiveThreshold()

void tesseract::Classify::SetAdaptiveThreshold ( FLOAT32  Threshold)

This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.

Parameters
Thresholdthreshold for creating new templates

Globals:

  • matcher_good_threshold default good match rating
Note
Exceptions: none
History: Tue Apr 9 08:33:13 1991, DSJ, Created.

Definition at line 2198 of file adaptmatch.cpp.

2198  {
2199  Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
2201  ClipToRange<int>(255 * Threshold, 0, 255));
2203  ClipToRange<int>(255 * Threshold, 0, 255));
2204 } /* SetAdaptiveThreshold */
double matcher_good_threshold
Definition: classify.h:419
int classify_adapt_feature_threshold
Definition: classify.h:446
int classify_adapt_proto_threshold
Definition: classify.h:444

◆ SetStaticClassifier()

void tesseract::Classify::SetStaticClassifier ( ShapeClassifier static_classifier)

Definition at line 199 of file classify.cpp.

199  {
200  delete static_classifier_;
201  static_classifier_ = static_classifier;
202 }

◆ SettupPass1()

void tesseract::Classify::SettupPass1 ( )

This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).

Note
this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 656 of file adaptmatch.cpp.

656  {
658 
660 
661 } /* SettupPass1 */
Dict & getDict()
Definition: classify.h:65
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:369
bool classify_enable_learning
Definition: classify.h:388

◆ SettupPass2()

void tesseract::Classify::SettupPass2 ( )

This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 676 of file adaptmatch.cpp.

676  {
679 
680 } /* SettupPass2 */
Dict & getDict()
Definition: classify.h:65
#define FALSE
Definition: capi.h:46
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:373

◆ SetupBLCNDenorms()

void tesseract::Classify::SetupBLCNDenorms ( const TBLOB blob,
bool  nonlinear_norm,
DENORM bl_denorm,
DENORM cn_denorm,
INT_FX_RESULT_STRUCT fx_info 
)
static

Definition at line 133 of file intfx.cpp.

135  {
136  // Compute 1st and 2nd moments of the original outline.
137  FCOORD center, second_moments;
138  int length = blob.ComputeMoments(&center, &second_moments);
139  if (fx_info != NULL) {
140  fx_info->Length = length;
141  fx_info->Rx = IntCastRounded(second_moments.y());
142  fx_info->Ry = IntCastRounded(second_moments.x());
143 
144  fx_info->Xmean = IntCastRounded(center.x());
145  fx_info->Ymean = IntCastRounded(center.y());
146  }
147  // Setup the denorm for Baseline normalization.
148  bl_denorm->SetupNormalization(NULL, NULL, &blob.denorm(), center.x(), 128.0f,
149  1.0f, 1.0f, 128.0f, 128.0f);
150  // Setup the denorm for character normalization.
151  if (nonlinear_norm) {
154  TBOX box;
155  blob.GetPreciseBoundingBox(&box);
156  box.pad(1, 1);
157  blob.GetEdgeCoords(box, &x_coords, &y_coords);
158  cn_denorm->SetupNonLinear(&blob.denorm(), box, MAX_UINT8, MAX_UINT8,
159  0.0f, 0.0f, x_coords, y_coords);
160  } else {
161  cn_denorm->SetupNormalization(NULL, NULL, &blob.denorm(),
162  center.x(), center.y(),
163  51.2f / second_moments.x(),
164  51.2f / second_moments.y(),
165  128.0f, 128.0f);
166  }
167 }
Definition: points.h:189
#define MAX_UINT8
Definition: host.h:63
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:95
int IntCastRounded(double x)
Definition: helpers.h:179
const DENORM & denorm() const
Definition: blobs.h:340
void pad(int xpad, int ypad)
Definition: rect.h:127
Definition: rect.h:30
float y() const
Definition: points.h:212
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector< GenericVector< int > > &x_coords, const GenericVector< GenericVector< int > > &y_coords)
Definition: normalis.cpp:267
void GetEdgeCoords(const TBOX &box, GenericVector< GenericVector< int > > *x_coords, GenericVector< GenericVector< int > > *y_coords) const
Definition: blobs.cpp:570
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
Definition: blobs.cpp:535
float x() const
Definition: points.h:209
void GetPreciseBoundingBox(TBOX *precise_box) const
Definition: blobs.cpp:554

◆ shape_table()

const ShapeTable* tesseract::Classify::shape_table ( ) const
inline

Definition at line 69 of file classify.h.

69  {
70  return shape_table_;
71  }
ShapeTable * shape_table_
Definition: classify.h:511

◆ ShapeIDToClassID()

int tesseract::Classify::ShapeIDToClassID ( int  shape_id) const

Definition at line 2282 of file adaptmatch.cpp.

2282  {
2283  for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
2284  int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2285  ASSERT_HOST(font_set_id >= 0);
2286  const FontSet &fs = fontset_table_.get(font_set_id);
2287  for (int config = 0; config < fs.size; ++config) {
2288  if (fs.configs[config] == shape_id)
2289  return id;
2290  }
2291  }
2292  tprintf("Shape %d not found\n", shape_id);
2293  return -1;
2294 }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:495
#define tprintf(...)
Definition: tprintf.h:31
#define ASSERT_HOST(x)
Definition: errcode.h:84
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:468

◆ ShowBestMatchFor()

void tesseract::Classify::ShowBestMatchFor ( int  shape_id,
const INT_FEATURE_STRUCT features,
int  num_features 
)

This routine displays debug information for the best config of the given shape_id for the given set of features.

Parameters
shape_idclassifier id to work with
featuresfeatures of the unknown character
num_featuresNumber of features in the features array.
Note
Exceptions: none
History: Fri Mar 22 08:43:52 1991, DSJ, Created.

Definition at line 2219 of file adaptmatch.cpp.

2221  {
2222 #ifndef GRAPHICS_DISABLED
2223  uinT32 config_mask;
2224  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
2225  tprintf("No built-in templates for class/shape %d\n", shape_id);
2226  return;
2227  }
2228  if (num_features <= 0) {
2229  tprintf("Illegal blob (char norm features)!\n");
2230  return;
2231  }
2232  UnicharRating cn_result;
2233  classify_norm_method.set_value(character);
2236  num_features, features, &cn_result,
2239  tprintf("\n");
2240  config_mask = 1 << cn_result.config;
2241 
2242  tprintf("Static Shape ID: %d\n", shape_id);
2243  ShowMatchDisplay();
2245  AllProtosOn, &config_mask, // TODO: or reinterpret_cast<BIT_VECTOR>(&config_mask) anyway?
2246  num_features, features, &cn_result,
2251 #endif // GRAPHICS_DISABLED
2252 } /* ShowBestMatchFor */
bool matcher_debug_separate_windows
Definition: classify.h:457
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
int classify_adapt_feature_threshold
Definition: classify.h:446
BIT_VECTOR AllProtosOn
Definition: classify.h:479
#define tprintf(...)
Definition: tprintf.h:31
#define ClassForClassId(T, c)
Definition: intproto.h:181
uint32_t uinT32
Definition: host.h:39
IntegerMatcher im_
Definition: classify.h:502
#define NO_DEBUG
Definition: adaptmatch.cpp:70
BIT_VECTOR AllConfigsOn
Definition: classify.h:480
void UpdateMatchDisplay()
Definition: intproto.cpp:467
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:468
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475

◆ ShowMatchDisplay()

void tesseract::Classify::ShowMatchDisplay ( )

This routine sends the shapes in the global display lists to the match debugger window.

Globals:

  • FeatureShapes display list containing feature matches
  • ProtoShapes display list containing proto matches
    Returns
    none
    Note
    Exceptions: none
    History: Thu Mar 21 15:47:33 1991, DSJ, Created.

Definition at line 1008 of file intproto.cpp.

1008  {
1010  if (ProtoDisplayWindow) {
1012  }
1013  if (FeatureDisplayWindow) {
1015  }
1017  static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)),
1018  IntMatchWindow);
1020  INT_MAX_X, INT_MAX_Y);
1021  if (ProtoDisplayWindow) {
1023  INT_MAX_X, INT_MAX_Y);
1024  }
1025  if (FeatureDisplayWindow) {
1027  INT_MAX_X, INT_MAX_Y);
1028  }
1029 } /* ShowMatchDisplay */
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
Definition: intproto.cpp:1033
#define INT_MAX_Y
Definition: intproto.cpp:66
#define INT_MIN_X
Definition: intproto.cpp:63
#define INT_MAX_X
Definition: intproto.cpp:65
ScrollView * IntMatchWindow
Definition: intproto.cpp:179
void Clear()
Definition: scrollview.cpp:595
#define INT_MIN_Y
Definition: intproto.cpp:64
void ZoomToRectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:765
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1817
ScrollView * FeatureDisplayWindow
Definition: intproto.cpp:180
ScrollView * ProtoDisplayWindow
Definition: intproto.cpp:181

◆ StartBackupAdaptiveClassifier()

void tesseract::Classify::StartBackupAdaptiveClassifier ( )

Definition at line 630 of file adaptmatch.cpp.

630  {
631  if (BackupAdaptedTemplates != NULL)
634 }
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:165
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:476
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:197

◆ SwitchAdaptiveClassifier()

void tesseract::Classify::SwitchAdaptiveClassifier ( )

Definition at line 614 of file adaptmatch.cpp.

614  {
615  if (BackupAdaptedTemplates == NULL) {
617  return;
618  }
620  tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
621  NumAdaptationsFailed);
622  }
625  BackupAdaptedTemplates = NULL;
626  NumAdaptationsFailed = 0;
627 }
void ResetAdaptiveClassifierInternal()
Definition: adaptmatch.cpp:599
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:418
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:476
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:197
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472

◆ TempConfigReliable()

bool tesseract::Classify::TempConfigReliable ( CLASS_ID  class_id,
const TEMP_CONFIG config 
)

Definition at line 2298 of file adaptmatch.cpp.

2299  {
2300  if (classify_learning_debug_level >= 1) {
2301  tprintf("NumTimesSeen for config of %s is %d\n",
2302  getDict().getUnicharset().debug_str(class_id).string(),
2303  config->NumTimesSeen);
2304  }
2306  return true;
2307  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
2308  return false;
2309  } else if (use_ambigs_for_adaption) {
2310  // Go through the ambigs vector and see whether we have already seen
2311  // enough times all the characters represented by the ambigs vector.
2312  const UnicharIdVector *ambigs =
2314  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2315  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2316  ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2317  assert(ambig_class != NULL);
2318  if (ambig_class->NumPermConfigs == 0 &&
2319  ambig_class->MaxNumTimesSeen <
2321  if (classify_learning_debug_level >= 1) {
2322  tprintf("Ambig %s has not been seen enough times,"
2323  " not making config for %s permanent\n",
2324  getDict().getUnicharset().debug_str(
2325  (*ambigs)[ambig]).string(),
2326  getDict().getUnicharset().debug_str(class_id).string());
2327  }
2328  return false;
2329  }
2330  }
2331  }
2332  return true;
2333 }
Dict & getDict()
Definition: classify.h:65
#define tprintf(...)
Definition: tprintf.h:31
int size() const
Definition: genericvector.h:72
uinT8 MaxNumTimesSeen
Definition: adaptive.h:65
int classify_learning_debug_level
Definition: classify.h:418
bool use_ambigs_for_adaption
Definition: ccutil.h:89
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:192
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:103
uinT8 NumTimesSeen
Definition: adaptive.h:41
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:429
int matcher_min_examples_for_prototyping
Definition: classify.h:427
uinT8 NumPermConfigs
Definition: adaptive.h:64
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34

◆ UpdateAmbigsGroup()

void tesseract::Classify::UpdateAmbigsGroup ( CLASS_ID  class_id,
TBLOB Blob 
)

Definition at line 2335 of file adaptmatch.cpp.

2335  {
2336  const UnicharIdVector *ambigs =
2338  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2339  if (classify_learning_debug_level >= 1) {
2340  tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
2341  getDict().getUnicharset().debug_str(class_id).string(), class_id);
2342  }
2343  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2344  CLASS_ID ambig_class_id = (*ambigs)[ambig];
2345  const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2346  for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
2347  if (ConfigIsPermanent(ambigs_class, cfg)) continue;
2348  const TEMP_CONFIG config =
2349  TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2350  if (config != NULL && TempConfigReliable(ambig_class_id, config)) {
2351  if (classify_learning_debug_level >= 1) {
2352  tprintf("Making config %d of %s permanent\n", cfg,
2353  getDict().getUnicharset().debug_str(
2354  ambig_class_id).string());
2355  }
2356  MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob);
2357  }
2358  }
2359  }
2360 }
Dict & getDict()
Definition: classify.h:65
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define tprintf(...)
Definition: tprintf.h:31
int size() const
Definition: genericvector.h:72
int classify_learning_debug_level
Definition: classify.h:418
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:201
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:101
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:472
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:103
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:92

◆ WriteAdaptedTemplates()

void tesseract::Classify::WriteAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine saves Templates to File in a binary format.

Parameters
Fileopen text file to write Templates to
Templatesset of adapted templates to write to File
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:07:32 1991, DSJ, Created.

Definition at line 489 of file adaptive.cpp.

489  {
490  int i;
491 
492  /* first write the high level adaptive template struct */
493  fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
494 
495  /* then write out the basic integer templates */
496  WriteIntTemplates (File, Templates->Templates, unicharset);
497 
498  /* then write out the adaptive info for each class */
499  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
500  WriteAdaptedClass (File, Templates->Class[i],
501  Templates->Templates->Class[i]->NumConfigs);
502  }
503 } /* WriteAdaptedTemplates */
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1067
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:443
UNICHARSET unicharset
Definition: ccutil.h:68

◆ WriteIntTemplates()

void tesseract::Classify::WriteIntTemplates ( FILE *  File,
INT_TEMPLATES  Templates,
const UNICHARSET target_unicharset 
)

This routine writes Templates to File. The format is an efficient binary format. File must already be open for writing.

Parameters
Fileopen file to write templates to
Templatestemplates to save into File
target_unicharsetthe UNICHARSET to use
Returns
none
Note
Globals: none
Exceptions: none
History: Wed Feb 27 11:48:46 1991, DSJ, Created.

Definition at line 1067 of file intproto.cpp.

1068  {
1069  int i, j;
1070  INT_CLASS Class;
1071  int unicharset_size = target_unicharset.size();
1072  int version_id = -5; // When negated by the reader -1 becomes +1 etc.
1073 
1074  if (Templates->NumClasses != unicharset_size) {
1075  cprintf("Warning: executing WriteIntTemplates() with %d classes in"
1076  " Templates, while target_unicharset size is %d\n",
1077  Templates->NumClasses, unicharset_size);
1078  }
1079 
1080  /* first write the high level template struct */
1081  fwrite(&unicharset_size, sizeof(unicharset_size), 1, File);
1082  fwrite(&version_id, sizeof(version_id), 1, File);
1083  fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners),
1084  1, File);
1085  fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File);
1086 
1087  /* then write out the class pruners */
1088  for (i = 0; i < Templates->NumClassPruners; i++)
1089  fwrite(Templates->ClassPruners[i],
1090  sizeof(CLASS_PRUNER_STRUCT), 1, File);
1091 
1092  /* then write out each class */
1093  for (i = 0; i < Templates->NumClasses; i++) {
1094  Class = Templates->Class[i];
1095 
1096  /* first write out the high level struct for the class */
1097  fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File);
1098  fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File);
1099  ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size);
1100  fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File);
1101  for (j = 0; j < Class->NumConfigs; ++j) {
1102  fwrite(&Class->ConfigLengths[j], sizeof(uinT16), 1, File);
1103  }
1104 
1105  /* then write out the proto lengths */
1106  if (MaxNumIntProtosIn (Class) > 0) {
1107  fwrite ((char *) (Class->ProtoLengths), sizeof (uinT8),
1108  MaxNumIntProtosIn (Class), File);
1109  }
1110 
1111  /* then write out the proto sets */
1112  for (j = 0; j < Class->NumProtoSets; j++)
1113  fwrite ((char *) Class->ProtoSets[j],
1114  sizeof (PROTO_SET_STRUCT), 1, File);
1115 
1116  /* then write the fonts info */
1117  fwrite(&Class->font_set_id, sizeof(int), 1, File);
1118  }
1119 
1120  /* Write the fonts info tables */
1122  this->fontinfo_table_.write(File,
1125 } /* WriteIntTemplates */
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool write_set(FILE *f, const FontSet &fs)
Definition: fontinfo.cpp:238
uinT8 NumProtoSets
Definition: intproto.h:109
UnicityTable< FontSet > fontset_table_
Definition: classify.h:495
uinT8 NumConfigs
Definition: intproto.h:110
uinT16 NumProtos
Definition: intproto.h:108
uinT8 * ProtoLengths
Definition: intproto.h:112
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool write_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:164
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
bool write_spacing_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:201
uint8_t uinT8
Definition: host.h:35
int size() const
Definition: unicharset.h:299
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define MaxNumIntProtosIn(C)
Definition: intproto.h:168
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:487
uint16_t uinT16
Definition: host.h:37

◆ WriteTRFile()

bool tesseract::Classify::WriteTRFile ( const STRING filename)

Definition at line 97 of file blobclass.cpp.

97  {
98  STRING tr_filename = filename + ".tr";
99  FILE* fp = Efopen(tr_filename.string(), "wb");
100  size_t len = tr_file_data_.length();
101  bool result =
102  fwrite(&tr_file_data_[0], sizeof(tr_file_data_[0]), len, fp) == len;
103  fclose(fp);
104  tr_file_data_.truncate_at(0);
105  return result;
106 }
void truncate_at(inT32 index)
Definition: strngs.cpp:269
const char * string() const
Definition: strngs.cpp:198
inT32 length() const
Definition: strngs.cpp:193
Definition: strngs.h:45
FILE * Efopen(const char *Name, const char *Mode)
Definition: efio.cpp:43

Member Data Documentation

◆ AdaptedTemplates

ADAPT_TEMPLATES tesseract::Classify::AdaptedTemplates

Definition at line 472 of file classify.h.

◆ AllConfigsOff

BIT_VECTOR tesseract::Classify::AllConfigsOff

Definition at line 481 of file classify.h.

◆ AllConfigsOn

BIT_VECTOR tesseract::Classify::AllConfigsOn

Definition at line 480 of file classify.h.

◆ allow_blob_division

bool tesseract::Classify::allow_blob_division = true

"Use divisible blobs chopping"

Definition at line 381 of file classify.h.

◆ AllProtosOn

BIT_VECTOR tesseract::Classify::AllProtosOn

Definition at line 479 of file classify.h.

◆ BackupAdaptedTemplates

ADAPT_TEMPLATES tesseract::Classify::BackupAdaptedTemplates

Definition at line 476 of file classify.h.

◆ certainty_scale

double tesseract::Classify::certainty_scale = 20.0

"Certainty scaling factor"

Definition at line 436 of file classify.h.

◆ classify_adapt_feature_threshold

int tesseract::Classify::classify_adapt_feature_threshold = 230

"Threshold for good features during adaptive 0-255"

Definition at line 446 of file classify.h.

◆ classify_adapt_proto_threshold

int tesseract::Classify::classify_adapt_proto_threshold = 230

"Threshold for good protos during adaptive 0-255"

Definition at line 444 of file classify.h.

◆ classify_adapted_pruning_factor

double tesseract::Classify::classify_adapted_pruning_factor = 2.5

"Prune poor adapted results this much worse than best result"

Definition at line 440 of file classify.h.

◆ classify_adapted_pruning_threshold

double tesseract::Classify::classify_adapted_pruning_threshold = -1.0

"Threshold at which classify_adapted_pruning_factor starts"

Definition at line 442 of file classify.h.

◆ classify_bln_numeric_mode

bool tesseract::Classify::classify_bln_numeric_mode = 0

"Assume the input is numbers [0-9]."

Definition at line 499 of file classify.h.

◆ classify_char_norm_range

double tesseract::Classify::classify_char_norm_range = 0.2

"Character Normalization Range ..."

Definition at line 395 of file classify.h.

◆ classify_character_fragments_garbage_certainty_threshold

double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0

"Exclude fragments that do not match any whole character" " with at least this certainty"

Definition at line 452 of file classify.h.

◆ classify_class_pruner_multiplier

int tesseract::Classify::classify_class_pruner_multiplier = 15

"Class Pruner Multiplier 0-255: "

Definition at line 464 of file classify.h.

◆ classify_class_pruner_threshold

int tesseract::Classify::classify_class_pruner_threshold = 229

"Class Pruner Threshold 0-255"

Definition at line 462 of file classify.h.

◆ classify_cp_cutoff_strength

int tesseract::Classify::classify_cp_cutoff_strength = 7

"Class Pruner CutoffStrength: "

Definition at line 466 of file classify.h.

◆ classify_debug_character_fragments

bool tesseract::Classify::classify_debug_character_fragments = FALSE

"Bring up graphical debugging windows for fragments training"

Definition at line 454 of file classify.h.

◆ classify_debug_level

int tesseract::Classify::classify_debug_level = 0

"Classify debug level"

Definition at line 389 of file classify.h.

◆ classify_enable_adaptive_debugger

bool tesseract::Classify::classify_enable_adaptive_debugger = 0

"Enable match debugger"

Definition at line 413 of file classify.h.

◆ classify_enable_adaptive_matcher

bool tesseract::Classify::classify_enable_adaptive_matcher = 1

"Enable adaptive classifier"

Definition at line 408 of file classify.h.

◆ classify_enable_learning

bool tesseract::Classify::classify_enable_learning = true

"Enable adaptive classifier"

Definition at line 388 of file classify.h.

◆ classify_integer_matcher_multiplier

int tesseract::Classify::classify_integer_matcher_multiplier = 10

"Integer Matcher Multiplier 0-255: "

Definition at line 468 of file classify.h.

◆ classify_learn_debug_str

char* tesseract::Classify::classify_learn_debug_str = ""

"Class str to debug learning"

Definition at line 458 of file classify.h.

◆ classify_learning_debug_level

int tesseract::Classify::classify_learning_debug_level = 0

"Learning Debug Level: "

Definition at line 418 of file classify.h.

◆ classify_max_certainty_margin

double tesseract::Classify::classify_max_certainty_margin = 5.5

"Veto difference between classifier certainties"

Definition at line 403 of file classify.h.

◆ classify_max_norm_scale_x

double tesseract::Classify::classify_max_norm_scale_x = 0.325

"Max char x-norm scale ..."

Definition at line 397 of file classify.h.

◆ classify_max_norm_scale_y

double tesseract::Classify::classify_max_norm_scale_y = 0.325

"Max char y-norm scale ..."

Definition at line 399 of file classify.h.

◆ classify_max_rating_ratio

double tesseract::Classify::classify_max_rating_ratio = 1.5

"Veto ratio between classifier ratings"

Definition at line 401 of file classify.h.

◆ classify_min_norm_scale_x

double tesseract::Classify::classify_min_norm_scale_x = 0.0

"Min char x-norm scale ..."

Definition at line 396 of file classify.h.

◆ classify_min_norm_scale_y

double tesseract::Classify::classify_min_norm_scale_y = 0.0

"Min char y-norm scale ..."

Definition at line 398 of file classify.h.

◆ classify_misfit_junk_penalty

double tesseract::Classify::classify_misfit_junk_penalty = 0.0

"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"

Definition at line 434 of file classify.h.

◆ classify_nonlinear_norm

bool tesseract::Classify::classify_nonlinear_norm = 0

"Non-linear stroke-density normalization"

Definition at line 415 of file classify.h.

◆ classify_norm_method

int tesseract::Classify::classify_norm_method = character

"Normalization Method ..."

Definition at line 393 of file classify.h.

◆ classify_save_adapted_templates

bool tesseract::Classify::classify_save_adapted_templates = 0

"Save adapted templates to a file"

Definition at line 412 of file classify.h.

◆ classify_use_pre_adapted_templates

bool tesseract::Classify::classify_use_pre_adapted_templates = 0

"Use pre-adapted classifier templates"

Definition at line 410 of file classify.h.

◆ disable_character_fragments

bool tesseract::Classify::disable_character_fragments = TRUE

"Do not include character fragments in the" " results of the classifier"

Definition at line 449 of file classify.h.

◆ EnableLearning

bool tesseract::Classify::EnableLearning

Definition at line 483 of file classify.h.

◆ feature_defs_

FEATURE_DEFS_STRUCT tesseract::Classify::feature_defs_
protected

Definition at line 506 of file classify.h.

◆ fontinfo_table_

UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_

Definition at line 487 of file classify.h.

◆ fontset_table_

UnicityTable<FontSet> tesseract::Classify::fontset_table_

Definition at line 495 of file classify.h.

◆ il1_adaption_test

int tesseract::Classify::il1_adaption_test = 0

"Don't adapt to i/I at beginning of word"

Definition at line 497 of file classify.h.

◆ im_

IntegerMatcher tesseract::Classify::im_
protected

Definition at line 502 of file classify.h.

◆ matcher_avg_noise_size

double tesseract::Classify::matcher_avg_noise_size = 12.0

"Avg. noise blob length: "

Definition at line 424 of file classify.h.

◆ matcher_bad_match_pad

double tesseract::Classify::matcher_bad_match_pad = 0.15

"Bad Match Pad (0-1)"

Definition at line 422 of file classify.h.

◆ matcher_clustering_max_angle_delta

double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015

"Maximum angle delta for prototype clustering"

Definition at line 431 of file classify.h.

◆ matcher_debug_flags

int tesseract::Classify::matcher_debug_flags = 0

"Matcher Debug Flags"

Definition at line 417 of file classify.h.

◆ matcher_debug_level

int tesseract::Classify::matcher_debug_level = 0

"Matcher Debug Level"

Definition at line 416 of file classify.h.

◆ matcher_debug_separate_windows

bool tesseract::Classify::matcher_debug_separate_windows = FALSE

"Use two different windows for debugging the matching: " "One for the protos and one for the features."

Definition at line 457 of file classify.h.

◆ matcher_good_threshold

double tesseract::Classify::matcher_good_threshold = 0.125

"Good Match (0-1)"

Definition at line 419 of file classify.h.

◆ matcher_min_examples_for_prototyping

int tesseract::Classify::matcher_min_examples_for_prototyping = 3

"Reliable Config Threshold"

Definition at line 427 of file classify.h.

◆ matcher_perfect_threshold

double tesseract::Classify::matcher_perfect_threshold = 0.02

"Perfect Match (0-1)"

Definition at line 421 of file classify.h.

◆ matcher_permanent_classes_min

int tesseract::Classify::matcher_permanent_classes_min = 1

"Min # of permanent classes"

Definition at line 425 of file classify.h.

◆ matcher_rating_margin

double tesseract::Classify::matcher_rating_margin = 0.1

"New template margin (0-1)"

Definition at line 423 of file classify.h.

◆ matcher_reliable_adaptive_result

double tesseract::Classify::matcher_reliable_adaptive_result = 0.0

"Great Match (0-1)"

Definition at line 420 of file classify.h.

◆ matcher_sufficient_examples_for_prototyping

int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5

"Enable adaption even if the ambiguities have not been seen"

Definition at line 429 of file classify.h.

◆ NormProtos

NORM_PROTOS* tesseract::Classify::NormProtos

Definition at line 485 of file classify.h.

◆ PreTrainedTemplates

INT_TEMPLATES tesseract::Classify::PreTrainedTemplates

Definition at line 468 of file classify.h.

◆ prioritize_division

bool tesseract::Classify::prioritize_division = FALSE

"Prioritize blob division over chopping"

Definition at line 386 of file classify.h.

◆ rating_scale

double tesseract::Classify::rating_scale = 1.5

"Rating scaling factor"

Definition at line 435 of file classify.h.

◆ shape_table_

ShapeTable* tesseract::Classify::shape_table_
protected

Definition at line 511 of file classify.h.

◆ speckle_large_max_size

double tesseract::Classify::speckle_large_max_size = 0.30

"Max large speckle size"

Definition at line 500 of file classify.h.

◆ speckle_rating_penalty

double tesseract::Classify::speckle_rating_penalty = 10.0

"Penalty to add to worst rating for noise"

Definition at line 502 of file classify.h.

◆ TempProtoMask

BIT_VECTOR tesseract::Classify::TempProtoMask

Definition at line 482 of file classify.h.

◆ tess_bn_matching

bool tesseract::Classify::tess_bn_matching = 0

"Baseline Normalized Matching"

Definition at line 407 of file classify.h.

◆ tess_cn_matching

bool tesseract::Classify::tess_cn_matching = 0

"Character Normalized Matching"

Definition at line 406 of file classify.h.

◆ tessedit_class_miss_scale

double tesseract::Classify::tessedit_class_miss_scale = 0.00390625

"Scale factor for features not used"

Definition at line 438 of file classify.h.

◆ tessedit_single_match

int tesseract::Classify::tessedit_single_match = FALSE

"Top choice only from CP"

Definition at line 387 of file classify.h.


The documentation for this class was generated from the following files: