tesseract  4.00.00dev
tesseract::TrainingSample Class Reference

#include <trainingsample.h>

Inheritance diagram for tesseract::TrainingSample:
ELIST_LINK

Public Member Functions

 TrainingSample ()
 
 ~TrainingSample ()
 
FEATURE_STRUCTGetCNFeature () const
 
TrainingSampleRandomizedCopy (int index) const
 
TrainingSampleCopy () const
 
bool Serialize (FILE *fp) const
 
bool DeSerialize (bool swap, FILE *fp)
 
void ExtractCharDesc (int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)
 
void IndexFeatures (const IntFeatureSpace &feature_space)
 
void MapFeatures (const IntFeatureMap &feature_map)
 
Pix * RenderToPix (const UNICHARSET *unicharset) const
 
void DisplayFeatures (ScrollView::Color color, ScrollView *window) const
 
Pix * GetSamplePix (int padding, Pix *page_pix) const
 
UNICHAR_ID class_id () const
 
void set_class_id (int id)
 
int font_id () const
 
void set_font_id (int id)
 
int page_num () const
 
void set_page_num (int page)
 
const TBOXbounding_box () const
 
void set_bounding_box (const TBOX &box)
 
int num_features () const
 
const INT_FEATURE_STRUCTfeatures () const
 
int num_micro_features () const
 
const MicroFeaturemicro_features () const
 
int outline_length () const
 
float cn_feature (int index) const
 
int geo_feature (int index) const
 
double weight () const
 
void set_weight (double value)
 
double max_dist () const
 
void set_max_dist (double value)
 
int sample_index () const
 
void set_sample_index (int value)
 
bool features_are_mapped () const
 
const GenericVector< int > & mapped_features () const
 
const GenericVector< int > & indexed_features () const
 
bool is_error () const
 
void set_is_error (bool value)
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static TrainingSampleCopyFromFeatures (const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
 
static TrainingSampleDeSerializeCreate (bool swap, FILE *fp)
 

Detailed Description

Definition at line 53 of file trainingsample.h.

Constructor & Destructor Documentation

◆ TrainingSample()

tesseract::TrainingSample::TrainingSample ( )
inline

Definition at line 55 of file trainingsample.h.

56  : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
57  num_features_(0), num_micro_features_(0), outline_length_(0),
58  features_(NULL), micro_features_(NULL), weight_(1.0),
59  max_dist_(0.0), sample_index_(0),
60  features_are_indexed_(false), features_are_mapped_(false),
61  is_error_(false) {
62  }

◆ ~TrainingSample()

tesseract::TrainingSample::~TrainingSample ( )

Definition at line 45 of file trainingsample.cpp.

45  {
46  delete [] features_;
47  delete [] micro_features_;
48 }

Member Function Documentation

◆ bounding_box()

const TBOX& tesseract::TrainingSample::bounding_box ( ) const
inline

Definition at line 134 of file trainingsample.h.

134  {
135  return bounding_box_;
136  }

◆ class_id()

UNICHAR_ID tesseract::TrainingSample::class_id ( ) const
inline

Definition at line 116 of file trainingsample.h.

116  {
117  return class_id_;
118  }

◆ cn_feature()

float tesseract::TrainingSample::cn_feature ( int  index) const
inline

Definition at line 155 of file trainingsample.h.

155  {
156  return cn_feature_[index];
157  }

◆ Copy()

TrainingSample * tesseract::TrainingSample::Copy ( ) const

Definition at line 183 of file trainingsample.cpp.

183  {
185  sample->class_id_ = class_id_;
186  sample->font_id_ = font_id_;
187  sample->weight_ = weight_;
188  sample->sample_index_ = sample_index_;
189  sample->num_features_ = num_features_;
190  if (num_features_ > 0) {
191  sample->features_ = new INT_FEATURE_STRUCT[num_features_];
192  memcpy(sample->features_, features_, num_features_ * sizeof(features_[0]));
193  }
194  sample->num_micro_features_ = num_micro_features_;
195  if (num_micro_features_ > 0) {
196  sample->micro_features_ = new MicroFeature[num_micro_features_];
197  memcpy(sample->micro_features_, micro_features_,
198  num_micro_features_ * sizeof(micro_features_[0]));
199  }
200  memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
201  memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
202  return sample;
203 }
float MicroFeature[MFCount]
Definition: mf.h:33
Definition: cluster.h:32

◆ CopyFromFeatures()

TrainingSample * tesseract::TrainingSample::CopyFromFeatures ( const INT_FX_RESULT_STRUCT fx_info,
const TBOX bounding_box,
const INT_FEATURE_STRUCT features,
int  num_features 
)
static

Definition at line 124 of file trainingsample.cpp.

128  {
130  sample->num_features_ = num_features;
131  sample->features_ = new INT_FEATURE_STRUCT[num_features];
132  sample->outline_length_ = fx_info.Length;
133  memcpy(sample->features_, features, num_features * sizeof(features[0]));
134  sample->geo_feature_[GeoBottom] = bounding_box.bottom();
135  sample->geo_feature_[GeoTop] = bounding_box.top();
136  sample->geo_feature_[GeoWidth] = bounding_box.width();
137 
138  // Generate the cn_feature_ from the fx_info.
139  sample->cn_feature_[CharNormY] =
141  sample->cn_feature_[CharNormLength] =
143  sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
144  sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
145 
146  sample->features_are_indexed_ = false;
147  sample->features_are_mapped_ = false;
148  return sample;
149 }
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
const int kBlnBaselineOffset
Definition: normalis.h:29
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
inT16 top() const
Definition: rect.h:54
inT16 width() const
Definition: rect.h:111
inT16 bottom() const
Definition: rect.h:61
Definition: cluster.h:32

◆ DeSerialize()

bool tesseract::TrainingSample::DeSerialize ( bool  swap,
FILE *  fp 
)

Definition at line 89 of file trainingsample.cpp.

89  {
90  if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
91  if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
92  if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
93  if (!bounding_box_.DeSerialize(swap, fp)) return false;
94  if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
95  if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
96  return false;
97  if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
98  return false;
99  if (swap) {
100  ReverseN(&class_id_, sizeof(class_id_));
101  ReverseN(&num_features_, sizeof(num_features_));
102  ReverseN(&num_micro_features_, sizeof(num_micro_features_));
103  ReverseN(&outline_length_, sizeof(outline_length_));
104  }
105  delete [] features_;
106  features_ = new INT_FEATURE_STRUCT[num_features_];
107  if (static_cast<int>(fread(features_, sizeof(*features_), num_features_, fp))
108  != num_features_)
109  return false;
110  delete [] micro_features_;
111  micro_features_ = new MicroFeature[num_micro_features_];
112  if (static_cast<int>(fread(micro_features_, sizeof(*micro_features_),
113  num_micro_features_,
114  fp)) != num_micro_features_)
115  return false;
116  if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
117  kNumCNParams) return false;
118  if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
119  return false;
120  return true;
121 }
bool DeSerialize(bool swap, FILE *fp)
Definition: rect.cpp:192
float MicroFeature[MFCount]
Definition: mf.h:33
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:184

◆ DeSerializeCreate()

TrainingSample * tesseract::TrainingSample::DeSerializeCreate ( bool  swap,
FILE *  fp 
)
static

Definition at line 80 of file trainingsample.cpp.

80  {
82  if (sample->DeSerialize(swap, fp)) return sample;
83  delete sample;
84  return NULL;
85 }
Definition: cluster.h:32

◆ DisplayFeatures()

void tesseract::TrainingSample::DisplayFeatures ( ScrollView::Color  color,
ScrollView window 
) const

Definition at line 317 of file trainingsample.cpp.

318  {
319  #ifndef GRAPHICS_DISABLED
320  for (int f = 0; f < num_features_; ++f) {
321  RenderIntFeature(window, &features_[f], color);
322  }
323  #endif // GRAPHICS_DISABLED
324 }
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
Definition: intproto.cpp:1693

◆ ExtractCharDesc()

void tesseract::TrainingSample::ExtractCharDesc ( int  feature_type,
int  micro_type,
int  cn_type,
int  geo_type,
CHAR_DESC_STRUCT char_desc 
)

Definition at line 206 of file trainingsample.cpp.

210  {
211  // Extract the INT features.
212  delete[] features_;
213  FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];
214  if (char_features == NULL) {
215  tprintf("Error: no features to train on of type %s\n",
217  num_features_ = 0;
218  features_ = NULL;
219  } else {
220  num_features_ = char_features->NumFeatures;
221  features_ = new INT_FEATURE_STRUCT[num_features_];
222  for (int f = 0; f < num_features_; ++f) {
223  features_[f].X =
224  static_cast<uinT8>(char_features->Features[f]->Params[IntX]);
225  features_[f].Y =
226  static_cast<uinT8>(char_features->Features[f]->Params[IntY]);
227  features_[f].Theta =
228  static_cast<uinT8>(char_features->Features[f]->Params[IntDir]);
229  features_[f].CP_misses = 0;
230  }
231  }
232  // Extract the Micro features.
233  delete[] micro_features_;
234  char_features = char_desc->FeatureSets[micro_type];
235  if (char_features == NULL) {
236  tprintf("Error: no features to train on of type %s\n",
238  num_micro_features_ = 0;
239  micro_features_ = NULL;
240  } else {
241  num_micro_features_ = char_features->NumFeatures;
242  micro_features_ = new MicroFeature[num_micro_features_];
243  for (int f = 0; f < num_micro_features_; ++f) {
244  for (int d = 0; d < MFCount; ++d) {
245  micro_features_[f][d] = char_features->Features[f]->Params[d];
246  }
247  }
248  }
249  // Extract the CN feature.
250  char_features = char_desc->FeatureSets[cn_type];
251  if (char_features == NULL) {
252  tprintf("Error: no CN feature to train on.\n");
253  } else {
254  ASSERT_HOST(char_features->NumFeatures == 1);
255  cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];
256  cn_feature_[CharNormLength] =
257  char_features->Features[0]->Params[CharNormLength];
258  cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];
259  cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];
260  }
261  // Extract the Geo feature.
262  char_features = char_desc->FeatureSets[geo_type];
263  if (char_features == NULL) {
264  tprintf("Error: no Geo feature to train on.\n");
265  } else {
266  ASSERT_HOST(char_features->NumFeatures == 1);
267  geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];
268  geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];
269  geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];
270  }
271  features_are_indexed_ = false;
272  features_are_mapped_ = false;
273 }
FEATURE Features[1]
Definition: ocrfeatures.h:72
Definition: picofeat.h:30
const char * kMicroFeatureType
Definition: featdefs.cpp:41
#define tprintf(...)
Definition: tprintf.h:31
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:44
#define ASSERT_HOST(x)
Definition: errcode.h:84
float MicroFeature[MFCount]
Definition: mf.h:33
Definition: picofeat.h:29
Definition: mf.h:30
uint8_t uinT8
Definition: host.h:35
const char * kIntFeatureType
Definition: featdefs.cpp:43
FLOAT32 Params[1]
Definition: ocrfeatures.h:65

◆ features()

const INT_FEATURE_STRUCT* tesseract::TrainingSample::features ( ) const
inline

Definition at line 143 of file trainingsample.h.

143  {
144  return features_;
145  }

◆ features_are_mapped()

bool tesseract::TrainingSample::features_are_mapped ( ) const
inline

Definition at line 179 of file trainingsample.h.

179  {
180  return features_are_mapped_;
181  }

◆ font_id()

int tesseract::TrainingSample::font_id ( ) const
inline

Definition at line 122 of file trainingsample.h.

122  {
123  return font_id_;
124  }

◆ geo_feature()

int tesseract::TrainingSample::geo_feature ( int  index) const
inline

Definition at line 158 of file trainingsample.h.

158  {
159  return geo_feature_[index];
160  }

◆ GetCNFeature()

FEATURE_STRUCT * tesseract::TrainingSample::GetCNFeature ( ) const

Definition at line 152 of file trainingsample.cpp.

152  {
153  FEATURE feature = NewFeature(&CharNormDesc);
154  for (int i = 0; i < kNumCNParams; ++i)
155  feature->Params[i] = cn_feature_[i];
156  return feature;
157 }
const FEATURE_DESC_STRUCT CharNormDesc
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:88
FLOAT32 Params[1]
Definition: ocrfeatures.h:65

◆ GetSamplePix()

Pix * tesseract::TrainingSample::GetSamplePix ( int  padding,
Pix *  page_pix 
) const

Definition at line 330 of file trainingsample.cpp.

330  {
331  if (page_pix == NULL)
332  return NULL;
333  int page_width = pixGetWidth(page_pix);
334  int page_height = pixGetHeight(page_pix);
335  TBOX padded_box = bounding_box();
336  padded_box.pad(padding, padding);
337  // Clip the padded_box to the limits of the page
338  TBOX page_box(0, 0, page_width, page_height);
339  padded_box &= page_box;
340  Box* box = boxCreate(page_box.left(), page_height - page_box.top(),
341  page_box.width(), page_box.height());
342  Pix* sample_pix = pixClipRectangle(page_pix, box, NULL);
343  boxDestroy(&box);
344  return sample_pix;
345 }
void pad(int xpad, int ypad)
Definition: rect.h:127
Definition: rect.h:30
const TBOX & bounding_box() const

◆ indexed_features()

const GenericVector<int>& tesseract::TrainingSample::indexed_features ( ) const
inline

Definition at line 186 of file trainingsample.h.

186  {
187  ASSERT_HOST(features_are_indexed_);
188  return mapped_features_;
189  }
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ IndexFeatures()

void tesseract::TrainingSample::IndexFeatures ( const IntFeatureSpace feature_space)

Definition at line 277 of file trainingsample.cpp.

277  {
279  feature_space.IndexAndSortFeatures(features_, num_features_,
280  &mapped_features_);
281  features_are_indexed_ = true;
282  features_are_mapped_ = false;
283 }
const GenericVector< int > & indexed_features() const

◆ is_error()

bool tesseract::TrainingSample::is_error ( ) const
inline

Definition at line 190 of file trainingsample.h.

190  {
191  return is_error_;
192  }

◆ MapFeatures()

void tesseract::TrainingSample::MapFeatures ( const IntFeatureMap feature_map)

Definition at line 287 of file trainingsample.cpp.

287  {
289  feature_map.feature_space().IndexAndSortFeatures(features_, num_features_,
290  &indexed_features);
291  feature_map.MapIndexedFeatures(indexed_features, &mapped_features_);
292  features_are_indexed_ = false;
293  features_are_mapped_ = true;
294 }
const GenericVector< int > & indexed_features() const

◆ mapped_features()

const GenericVector<int>& tesseract::TrainingSample::mapped_features ( ) const
inline

Definition at line 182 of file trainingsample.h.

182  {
183  ASSERT_HOST(features_are_mapped_);
184  return mapped_features_;
185  }
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ max_dist()

double tesseract::TrainingSample::max_dist ( ) const
inline

Definition at line 167 of file trainingsample.h.

167  {
168  return max_dist_;
169  }

◆ micro_features()

const MicroFeature* tesseract::TrainingSample::micro_features ( ) const
inline

Definition at line 149 of file trainingsample.h.

149  {
150  return micro_features_;
151  }

◆ num_features()

int tesseract::TrainingSample::num_features ( ) const
inline

Definition at line 140 of file trainingsample.h.

140  {
141  return num_features_;
142  }

◆ num_micro_features()

int tesseract::TrainingSample::num_micro_features ( ) const
inline

Definition at line 146 of file trainingsample.h.

146  {
147  return num_micro_features_;
148  }

◆ outline_length()

int tesseract::TrainingSample::outline_length ( ) const
inline

Definition at line 152 of file trainingsample.h.

152  {
153  return outline_length_;
154  }

◆ page_num()

int tesseract::TrainingSample::page_num ( ) const
inline

Definition at line 128 of file trainingsample.h.

128  {
129  return page_num_;
130  }

◆ RandomizedCopy()

TrainingSample * tesseract::TrainingSample::RandomizedCopy ( int  index) const

Definition at line 162 of file trainingsample.cpp.

162  {
164  if (index >= 0 && index < kSampleRandomSize) {
165  ++index; // Remove the first combination.
166  int yshift = kYShiftValues[index / kSampleScaleSize];
167  double scaling = kScaleValues[index % kSampleScaleSize];
168  for (int i = 0; i < num_features_; ++i) {
169  double result = (features_[i].X - kRandomizingCenter) * scaling;
170  result += kRandomizingCenter;
171  sample->features_[i].X = ClipToRange(static_cast<int>(result + 0.5), 0,
172  MAX_UINT8);
173  result = (features_[i].Y - kRandomizingCenter) * scaling;
174  result += kRandomizingCenter + yshift;
175  sample->features_[i].Y = ClipToRange(static_cast<int>(result + 0.5), 0,
176  MAX_UINT8);
177  }
178  }
179  return sample;
180 }
#define MAX_UINT8
Definition: host.h:63
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:122
TrainingSample * Copy() const
const int kRandomizingCenter
Definition: cluster.h:32

◆ RenderToPix()

Pix * tesseract::TrainingSample::RenderToPix ( const UNICHARSET unicharset) const

Definition at line 297 of file trainingsample.cpp.

297  {
298  Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
299  for (int f = 0; f < num_features_; ++f) {
300  int start_x = features_[f].X;
301  int start_y = kIntFeatureExtent - features_[f].Y;
302  double dx = cos((features_[f].Theta / 256.0) * 2.0 * PI - PI);
303  double dy = -sin((features_[f].Theta / 256.0) * 2.0 * PI - PI);
304  for (int i = 0; i <= 5; ++i) {
305  int x = static_cast<int>(start_x + dx * i);
306  int y = static_cast<int>(start_y + dy * i);
307  if (x >= 0 && x < 256 && y >= 0 && y < 256)
308  pixSetPixel(pix, x, y, 1);
309  }
310  }
311  if (unicharset != NULL)
312  pixSetText(pix, unicharset->id_to_unichar(class_id_));
313  return pix;
314 }
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define PI
Definition: const.h:19
const int kIntFeatureExtent

◆ sample_index()

int tesseract::TrainingSample::sample_index ( ) const
inline

Definition at line 173 of file trainingsample.h.

173  {
174  return sample_index_;
175  }

◆ Serialize()

bool tesseract::TrainingSample::Serialize ( FILE *  fp) const

Definition at line 54 of file trainingsample.cpp.

54  {
55  if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
56  if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
57  if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
58  if (!bounding_box_.Serialize(fp)) return false;
59  if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
60  if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
61  return false;
62  if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
63  return false;
64  if (static_cast<int>(fwrite(features_, sizeof(*features_), num_features_, fp))
65  != num_features_)
66  return false;
67  if (static_cast<int>(fwrite(micro_features_, sizeof(*micro_features_),
68  num_micro_features_,
69  fp)) != num_micro_features_)
70  return false;
71  if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
72  kNumCNParams) return false;
73  if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
74  return false;
75  return true;
76 }
bool Serialize(FILE *fp) const
Definition: rect.cpp:185

◆ set_bounding_box()

void tesseract::TrainingSample::set_bounding_box ( const TBOX box)
inline

Definition at line 137 of file trainingsample.h.

137  {
138  bounding_box_ = box;
139  }

◆ set_class_id()

void tesseract::TrainingSample::set_class_id ( int  id)
inline

Definition at line 119 of file trainingsample.h.

119  {
120  class_id_ = id;
121  }

◆ set_font_id()

void tesseract::TrainingSample::set_font_id ( int  id)
inline

Definition at line 125 of file trainingsample.h.

125  {
126  font_id_ = id;
127  }

◆ set_is_error()

void tesseract::TrainingSample::set_is_error ( bool  value)
inline

Definition at line 193 of file trainingsample.h.

193  {
194  is_error_ = value;
195  }

◆ set_max_dist()

void tesseract::TrainingSample::set_max_dist ( double  value)
inline

Definition at line 170 of file trainingsample.h.

170  {
171  max_dist_ = value;
172  }

◆ set_page_num()

void tesseract::TrainingSample::set_page_num ( int  page)
inline

Definition at line 131 of file trainingsample.h.

131  {
132  page_num_ = page;
133  }

◆ set_sample_index()

void tesseract::TrainingSample::set_sample_index ( int  value)
inline

Definition at line 176 of file trainingsample.h.

176  {
177  sample_index_ = value;
178  }

◆ set_weight()

void tesseract::TrainingSample::set_weight ( double  value)
inline

Definition at line 164 of file trainingsample.h.

164  {
165  weight_ = value;
166  }

◆ weight()

double tesseract::TrainingSample::weight ( ) const
inline

Definition at line 161 of file trainingsample.h.

161  {
162  return weight_;
163  }

The documentation for this class was generated from the following files: