19 #ifndef TESSERACT_CLASSIFY_CLASSIFY_H_    20 #define TESSERACT_CLASSIFY_CLASSIFY_H_    42 static const int kUnknownFontinfoId = -1;
    43 static const int kBlankFontinfoId = -2;
    47 class ShapeClassifier;
   103                    const uinT8* normalization_factors,
   104                    const uinT16* expected_num_features,
   137   void LearnPieces(
const char* fontname, 
int start, 
int length, 
float threshold,
   156                      const uinT8* norm_factors,
   159                      int matcher_multiplier,
   160                      const TBOX& blob_box,
   174                                        int matcher_multiplier,
   175                                        const uinT8* cn_factors,
   182                                 double im_rating, 
int feature_misses,
   184                                 int blob_length, 
int matcher_multiplier,
   185                                 const uinT8* cn_factors);
   188                                BLOB_CHOICE_LIST *Choices);
   194 #ifndef GRAPHICS_DISABLED   224                            int class_id, 
int config_id) 
const;
   236                                       int int_result_config) 
const;
   270                          uinT8* pruner_norm_array,
   271                          uinT8* char_norm_array);
   277                              uinT8* char_norm_array,
   278                              uinT8* pruner_array);
   289                           int y_offset, 
const TBOX &wbox);
   334                                uinT8* char_norm_array);
   341                            bool* pretrained_on, 
int* shape_id);
   386              "Prioritize blob division over chopping");
   395              "Character Normalization Range ...");
   401                "Veto ratio between classifier ratings");
   403                "Veto difference between classifier certainties");
   410              "Use pre-adapted classifier templates");
   412              "Save adapted templates to a file");
   415              "Non-linear stroke-density normalization");
   427             "Reliable Config Threshold");
   429             "Enable adaption even if the ambiguities have not been seen");
   431                "Maximum angle delta for prototype clustering");
   433                "Penalty to apply when a non-alnum is vertically out of "   434                "its expected textline position");
   438                "Scale factor for features not used");
   440                "Prune poor adapted results this much worse than best result");
   442                "Threshold at which classify_adapted_pruning_factor starts");
   444             "Threshold for good protos during adaptive 0-255");
   446             "Threshold for good features during adaptive 0-255");
   448              "Do not include character fragments in the"   449              " results of the classifier");
   451                "Exclude fragments that do not match any whole character"   452                " with at least this certainty");
   454              "Bring up graphical debugging windows for fragments training");
   456              "Use two different windows for debugging the matching: "   457              "One for the protos and one for the features.");
   462             "Class Pruner Threshold 0-255");
   464             "Class Pruner Multiplier 0-255:       ");
   466             "Class Pruner CutoffStrength:         ");
   468             "Integer Matcher Multiplier  0-255:   ");
   499              "Assume the input is numbers [0-9].");
   502                "Penalty to add to worst rating for noise");
   519   int NumAdaptationsFailed;
   541 #endif  // TESSERACT_CLASSIFY_CLASSIFY_H_ 
bool LargeSpeckle(const TBLOB &blob)
 
UnicityTable< FontSet > & get_fontset_table()
 
void ResetAdaptiveClassifierInternal()
 
bool classify_enable_adaptive_matcher
 
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
double matcher_good_threshold
 
void RemoveBadMatches(ADAPT_RESULTS *Results)
 
double matcher_avg_noise_size
 
bool classify_nonlinear_norm
 
bool matcher_debug_separate_windows
 
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
 
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
bool classify_bln_numeric_mode
 
UnicityTable< FontSet > fontset_table_
 
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
 
bool classify_save_adapted_templates
 
int classify_cp_cutoff_strength
 
uinT16 CLASS_CUTOFF_ARRAY[MAX_NUM_CLASSES]
 
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
double tessedit_class_miss_scale
 
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
 
int classify_adapt_feature_threshold
 
CLASS_ID GetClassToDebug(const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
NORM_PROTOS * ReadNormProtos(TFile *fp)
 
FEATURE_DEFS_STRUCT feature_defs_
 
int classify_integer_matcher_multiplier
 
bool AdaptableWord(WERD_RES *word)
 
bool LooksLikeGarbage(TBLOB *blob)
 
bool AdaptiveClassifierIsFull() const
 
void ReadNewCutoffs(TFile *fp, CLASS_CUTOFF_ARRAY Cutoffs)
 
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
 
void ShowBestMatchFor(int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
 
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
 
UnicityTable< FontInfo > & get_fontinfo_table()
 
char * classify_learn_debug_str
 
void ClearCharNormArray(uinT8 *char_norm_array)
 
double classify_max_norm_scale_y
 
bool classify_debug_character_fragments
 
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
 
bool disable_character_fragments
 
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
 
double matcher_clustering_max_angle_delta
 
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
bool classify_enable_adaptive_debugger
 
double classify_char_norm_range
 
double matcher_reliable_adaptive_result
 
ShapeTable * shape_table_
 
int classify_learning_debug_level
 
int classify_class_pruner_multiplier
 
double classify_min_norm_scale_x
 
bool classify_use_pre_adapted_templates
 
ADAPT_TEMPLATES BackupAdaptedTemplates
 
double speckle_rating_penalty
 
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
 
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
int ShapeIDToClassID(int shape_id) const
 
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
 
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
 
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
bool AdaptiveClassifierIsEmpty() const
 
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
double matcher_perfect_threshold
 
const ShapeTable * shape_table() const
 
INT_TEMPLATES ReadIntTemplates(TFile *fp)
 
#define double_VAR_H(name, val, comment)
 
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
 
double matcher_bad_match_pad
 
int classify_adapt_proto_threshold
 
double classify_min_norm_scale_y
 
void StartBackupAdaptiveClassifier()
 
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
 
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
 
ADAPT_TEMPLATES AdaptedTemplates
 
FLOAT32 ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
 
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
 
double classify_adapted_pruning_threshold
 
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
 
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
 
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
 
double classify_max_norm_scale_x
 
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
void EndAdaptiveClassifier()
 
void SwitchAdaptiveClassifier()
 
int classify_class_pruner_threshold
 
double classify_character_fragments_garbage_certainty_threshold
 
int matcher_permanent_classes_min
 
#define STRING_VAR_H(name, val, comment)
 
double classify_misfit_junk_penalty
 
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
 
double classify_adapted_pruning_factor
 
void SetStaticClassifier(ShapeClassifier *static_classifier)
 
int matcher_sufficient_examples_for_prototyping
 
void SetAdaptiveThreshold(FLOAT32 Threshold)
 
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
 
double classify_max_rating_ratio
 
int tessedit_single_match
 
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
int matcher_min_examples_for_prototyping
 
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
void InitAdaptiveClassifier(TessdataManager *mgr)
 
bool WriteTRFile(const STRING &filename)
 
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
#define INT_VAR_H(name, val, comment)
 
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
 
double matcher_rating_margin
 
INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
#define BOOL_VAR_H(name, val, comment)
 
void ClassifyAsNoise(ADAPT_RESULTS *Results)
 
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
 
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
 
double classify_max_certainty_margin
 
void LearnWord(const char *fontname, WERD_RES *word)
 
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
 
bool classify_enable_learning
 
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
 
UnicityTable< FontInfo > fontinfo_table_
 
double speckle_large_max_size
 
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
const UnicityTable< FontInfo > & get_fontinfo_table() const
 
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
 
INT_TEMPLATES PreTrainedTemplates
 
int CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)