tesseract  4.00.00dev
trainingsample.h
Go to the documentation of this file.
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
15 
16 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_
17 #define TESSERACT_TRAINING_TRAININGSAMPLE_H_
18 
19 #include "elst.h"
20 #include "featdefs.h"
21 #include "intfx.h"
22 #include "intmatcher.h"
23 #include "matrix.h"
24 #include "mf.h"
25 #include "picofeat.h"
26 #include "shapetable.h"
27 #include "unicharset.h"
28 
29 struct Pix;
30 
31 namespace tesseract {
32 
33 class IntFeatureMap;
34 class IntFeatureSpace;
35 class ShapeTable;
36 
37 // Number of elements of cn_feature_.
38 static const int kNumCNParams = 4;
39 // Number of ways to shift the features when randomizing.
40 static const int kSampleYShiftSize = 5;
41 // Number of ways to scale the features when randomizing.
42 static const int kSampleScaleSize = 3;
43 // Total number of different ways to manipulate the features when randomizing.
44 // The first and last combinations are removed to avoid an excessive
45 // top movement (first) and an identity transformation (last).
46 // WARNING: To avoid patterned duplication of samples, be sure to keep
47 // kSampleRandomSize prime!
48 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
49 // kSampleRandomSize is 13, which is prime.
50 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
51 // ASSERT_IS_PRIME(kSampleRandomSize) !!
52 
53 class TrainingSample : public ELIST_LINK {
54  public:
56  : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
57  num_features_(0), num_micro_features_(0), outline_length_(0),
58  features_(NULL), micro_features_(NULL), weight_(1.0),
59  max_dist_(0.0), sample_index_(0),
60  features_are_indexed_(false), features_are_mapped_(false),
61  is_error_(false) {
62  }
64 
65  // Saves the given features into a TrainingSample. The features are copied,
66  // so may be deleted afterwards. Delete the return value after use.
68  const TBOX& bounding_box,
70  int num_features);
71  // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
73  // Constructs and returns a copy "randomized" by the method given by
74  // the randomizer index. If index is out of [0, kSampleRandomSize) then
75  // an exact copy is returned.
76  TrainingSample* RandomizedCopy(int index) const;
77  // Constructs and returns an exact copy.
78  TrainingSample* Copy() const;
79 
80  // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
81  // members, which is mostly the mapped features, and the weight.
82  // It is assumed these can all be reconstructed from what is saved.
83  // Writes to the given file. Returns false in case of error.
84  bool Serialize(FILE* fp) const;
85  // Creates from the given file. Returns NULL in case of error.
86  // If swap is true, assumes a big/little-endian swap is needed.
87  static TrainingSample* DeSerializeCreate(bool swap, FILE* fp);
88  // Reads from the given file. Returns false in case of error.
89  // If swap is true, assumes a big/little-endian swap is needed.
90  bool DeSerialize(bool swap, FILE* fp);
91 
92  // Extracts the needed information from the CHAR_DESC_STRUCT.
93  void ExtractCharDesc(int feature_type, int micro_type,
94  int cn_type, int geo_type,
95  CHAR_DESC_STRUCT* char_desc);
96 
97  // Sets the mapped_features_ from the features_ using the provided
98  // feature_space to the indexed versions of the features.
99  void IndexFeatures(const IntFeatureSpace& feature_space);
100  // Sets the mapped_features_ from the features_ using the provided
101  // feature_map.
102  void MapFeatures(const IntFeatureMap& feature_map);
103 
104  // Returns a pix representing the sample. (Int features only.)
105  Pix* RenderToPix(const UNICHARSET* unicharset) const;
106  // Displays the features in the given window with the given color.
107  void DisplayFeatures(ScrollView::Color color, ScrollView* window) const;
108 
109  // Returns a pix of the original sample image. The pix is padded all round
110  // by padding wherever possible.
111  // The returned Pix must be pixDestroyed after use.
112  // If the input page_pix is NULL, NULL is returned.
113  Pix* GetSamplePix(int padding, Pix* page_pix) const;
114 
115  // Accessors.
117  return class_id_;
118  }
119  void set_class_id(int id) {
120  class_id_ = id;
121  }
122  int font_id() const {
123  return font_id_;
124  }
125  void set_font_id(int id) {
126  font_id_ = id;
127  }
128  int page_num() const {
129  return page_num_;
130  }
131  void set_page_num(int page) {
132  page_num_ = page;
133  }
134  const TBOX& bounding_box() const {
135  return bounding_box_;
136  }
137  void set_bounding_box(const TBOX& box) {
138  bounding_box_ = box;
139  }
140  int num_features() const {
141  return num_features_;
142  }
143  const INT_FEATURE_STRUCT* features() const {
144  return features_;
145  }
146  int num_micro_features() const {
147  return num_micro_features_;
148  }
149  const MicroFeature* micro_features() const {
150  return micro_features_;
151  }
152  int outline_length() const {
153  return outline_length_;
154  }
155  float cn_feature(int index) const {
156  return cn_feature_[index];
157  }
158  int geo_feature(int index) const {
159  return geo_feature_[index];
160  }
161  double weight() const {
162  return weight_;
163  }
164  void set_weight(double value) {
165  weight_ = value;
166  }
167  double max_dist() const {
168  return max_dist_;
169  }
170  void set_max_dist(double value) {
171  max_dist_ = value;
172  }
173  int sample_index() const {
174  return sample_index_;
175  }
176  void set_sample_index(int value) {
177  sample_index_ = value;
178  }
179  bool features_are_mapped() const {
180  return features_are_mapped_;
181  }
183  ASSERT_HOST(features_are_mapped_);
184  return mapped_features_;
185  }
187  ASSERT_HOST(features_are_indexed_);
188  return mapped_features_;
189  }
190  bool is_error() const {
191  return is_error_;
192  }
193  void set_is_error(bool value) {
194  is_error_ = value;
195  }
196 
197  private:
198  // Unichar id that this sample represents. There obviously must be a
199  // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
200  UNICHAR_ID class_id_;
201  // Font id in which this sample was printed. Refers to a fontinfo_table_ in
202  // MasterTrainer.
203  int font_id_;
204  // Number of page that the sample came from.
205  int page_num_;
206  // Bounding box of sample in original image.
207  TBOX bounding_box_;
208  // Number of INT_FEATURE_STRUCT in features_ array.
209  int num_features_;
210  // Number of MicroFeature in micro_features_ array.
211  int num_micro_features_;
212  // Total length of outline in the baseline normalized coordinate space.
213  // See comment in WERD_RES class definition for a discussion of coordinate
214  // spaces.
215  int outline_length_;
216  // Array of features.
217  INT_FEATURE_STRUCT* features_;
218  // Array of features.
219  MicroFeature* micro_features_;
220  // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
221  float cn_feature_[kNumCNParams];
222  // The one and only geometric feature. (Aims at replacing cn_feature_).
223  // Indexed by GeoParams enum in picofeat.h
224  int geo_feature_[GeoCount];
225 
226  // Non-serialized cache data.
227  // Weight used for boosting training.
228  double weight_;
229  // Maximum distance to other samples of same class/font used in computing
230  // the canonical sample.
231  double max_dist_;
232  // Global index of this sample.
233  int sample_index_;
234  // Indexed/mapped features, as indicated by the bools below.
235  GenericVector<int> mapped_features_;
236  bool features_are_indexed_;
237  bool features_are_mapped_;
238  // True if the last classification was an error by the current definition.
239  bool is_error_;
240 
241  // Randomizing factors.
242  static const int kYShiftValues[kSampleYShiftSize];
243  static const double kScaleValues[kSampleScaleSize];
244 };
245 
247 
248 } // namespace tesseract
249 
250 #endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_
void set_max_dist(double value)
UNICHAR_ID class_id() const
void MapFeatures(const IntFeatureMap &feature_map)
FEATURE_STRUCT * GetCNFeature() const
int UNICHAR_ID
Definition: unichar.h:33
bool features_are_mapped() const
void set_sample_index(int value)
void set_weight(double value)
void set_bounding_box(const TBOX &box)
TrainingSample * RandomizedCopy(int index) const
bool DeSerialize(bool swap, FILE *fp)
static TrainingSample * DeSerializeCreate(bool swap, FILE *fp)
void IndexFeatures(const IntFeatureSpace &feature_space)
#define ASSERT_HOST(x)
Definition: errcode.h:84
float MicroFeature[MFCount]
Definition: mf.h:33
Pix * GetSamplePix(int padding, Pix *page_pix) const
void set_is_error(bool value)
int geo_feature(int index) const
Definition: rect.h:30
void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)
float cn_feature(int index) const
TrainingSample * Copy() const
const INT_FEATURE_STRUCT * features() const
Pix * RenderToPix(const UNICHARSET *unicharset) const
ELISTIZEH(AmbigSpec)
const GenericVector< int > & mapped_features() const
bool Serialize(FILE *fp) const
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const
const GenericVector< int > & indexed_features() const
const TBOX & bounding_box() const
static TrainingSample * CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
const MicroFeature * micro_features() const