tesseract  4.00.00dev
networkio.cpp
Go to the documentation of this file.
1 // File: networkio.cpp
3 // Description: Network input/output data, allowing float/int implementations.
4 // Author: Ray Smith
5 // Created: Thu Jun 19 13:01:31 PST 2014
6 //
7 // (C) Copyright 2014, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 #include "networkio.h"
20 
21 #include "allheaders.h"
22 #include "functions.h"
23 #include "statistc.h"
24 #include "tprintf.h"
25 
26 namespace tesseract {
27 
28 // Minimum value to output for certainty.
29 const float kMinCertainty = -20.0f;
30 // Probability corresponding to kMinCertainty.
31 const float kMinProb = exp(kMinCertainty);
32 
33 // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
34 void NetworkIO::Resize2d(bool int_mode, int width, int num_features) {
35  stride_map_ = StrideMap();
36  int_mode_ = int_mode;
37  if (int_mode_) {
38  i_.ResizeNoInit(width, num_features);
39  } else {
40  f_.ResizeNoInit(width, num_features);
41  }
42 }
43 
44 // Resizes to a specific stride_map.
46  int num_features) {
47  // If this assert fails, it most likely got here through an uninitialized
48  // scratch element, ie call NetworkScratch::IO::Resizexxx() not
49  // NetworkIO::Resizexxx()!!
50  ASSERT_HOST(this != NULL);
51  stride_map_ = stride_map;
52  int_mode_ = int_mode;
53  if (int_mode_) {
54  i_.ResizeNoInit(stride_map.Width(), num_features);
55  } else {
56  f_.ResizeNoInit(stride_map.Width(), num_features);
57  }
59 }
60 
61 // Shrinks image size by x_scale,y_scale, and use given number of features.
63  int x_scale, int y_scale, int num_features) {
64  StrideMap stride_map = src.stride_map_;
65  stride_map.ScaleXY(x_scale, y_scale);
66  ResizeToMap(src.int_mode_, stride_map, num_features);
67 }
68 
69 // Resizes to just 1 x-coord, whatever the input.
70 void NetworkIO::ResizeXTo1(const NetworkIO& src, int num_features) {
71  StrideMap stride_map = src.stride_map_;
72  stride_map.ReduceWidthTo1();
73  ResizeToMap(src.int_mode_, stride_map, num_features);
74 }
75 
76 // Initialize all the array to zero.
78  int width = Width();
79  // Zero out the everything. Column-by-column in case it is aligned.
80  for (int t = 0; t < width; ++t) {
81  ZeroTimeStep(t);
82  }
83 }
84 
85 // Initializes to zero all elements of the array that do not correspond to
86 // valid image positions. (If a batch of different-sized images are packed
87 // together, then there will be padding pixels.)
89  int num_features = NumFeatures();
90  int full_width = stride_map_.Size(FD_WIDTH);
91  int full_height = stride_map_.Size(FD_HEIGHT);
92  StrideMap::Index b_index(stride_map_);
93  do {
94  int end_x = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
95  if (end_x < full_width) {
96  // The width is small, so fill for every valid y.
97  StrideMap::Index y_index(b_index);
98  int fill_size = num_features * (full_width - end_x);
99  do {
100  StrideMap::Index z_index(y_index);
101  z_index.AddOffset(end_x, FD_WIDTH);
102  if (int_mode_) {
103  ZeroVector(fill_size, i_[z_index.t()]);
104  } else {
105  ZeroVector(fill_size, f_[z_index.t()]);
106  }
107  } while (y_index.AddOffset(1, FD_HEIGHT));
108  }
109  int end_y = b_index.MaxIndexOfDim(FD_HEIGHT) + 1;
110  if (end_y < full_height) {
111  // The height is small, so fill in the space in one go.
112  StrideMap::Index y_index(b_index);
113  y_index.AddOffset(end_y, FD_HEIGHT);
114  int fill_size = num_features * full_width * (full_height - end_y);
115  if (int_mode_) {
116  ZeroVector(fill_size, i_[y_index.t()]);
117  } else {
118  ZeroVector(fill_size, f_[y_index.t()]);
119  }
120  }
121  } while (b_index.AddOffset(1, FD_BATCH));
122 }
123 
124 // Helper computes a black point and white point to contrast-enhance an image.
125 // The computation is based on the assumption that the image is of a single line
126 // of text, so a horizontal line through the middle of the image passes through
127 // at least some of it, so local minima and maxima are a good proxy for black
128 // and white pixel samples.
129 static void ComputeBlackWhite(Pix* pix, float* black, float* white) {
130  int width = pixGetWidth(pix);
131  int height = pixGetHeight(pix);
132  STATS mins(0, 256), maxes(0, 256);
133  if (width >= 3) {
134  int y = height / 2;
135  l_uint32* line = pixGetData(pix) + pixGetWpl(pix) * y;
136  int prev = GET_DATA_BYTE(line, 0);
137  int curr = GET_DATA_BYTE(line, 1);
138  for (int x = 1; x + 1 < width; ++x) {
139  int next = GET_DATA_BYTE(line, x + 1);
140  if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) {
141  // Local minimum.
142  mins.add(curr, 1);
143  }
144  if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) {
145  // Local maximum.
146  maxes.add(curr, 1);
147  }
148  prev = curr;
149  curr = next;
150  }
151  }
152  if (mins.get_total() == 0) mins.add(0, 1);
153  if (maxes.get_total() == 0) maxes.add(255, 1);
154  *black = mins.ile(0.25);
155  *white = maxes.ile(0.75);
156 }
157 
158 // Sets up the array from the given image, using the currently set int_mode_.
159 // If the image width doesn't match the shape, the image is truncated or padded
160 // with noise to match.
161 void NetworkIO::FromPix(const StaticShape& shape, const Pix* pix,
162  TRand* randomizer) {
163  std::vector<const Pix*> pixes(1, pix);
164  FromPixes(shape, pixes, randomizer);
165 }
166 
167 // Sets up the array from the given set of images, using the currently set
168 // int_mode_. If the image width doesn't match the shape, the images are
169 // truncated or padded with noise to match.
171  const std::vector<const Pix*>& pixes,
172  TRand* randomizer) {
173  int target_height = shape.height();
174  int target_width = shape.width();
175  std::vector<std::pair<int, int>> h_w_pairs;
176  for (auto pix : pixes) {
177  Pix* var_pix = const_cast<Pix*>(pix);
178  int width = pixGetWidth(var_pix);
179  if (target_width != 0) width = target_width;
180  int height = pixGetHeight(var_pix);
181  if (target_height != 0) height = target_height;
182  h_w_pairs.emplace_back(height, width);
183  }
184  stride_map_.SetStride(h_w_pairs);
185  ResizeToMap(int_mode(), stride_map_, shape.depth());
186  // Iterate over the images again to copy the data.
187  for (size_t b = 0; b < pixes.size(); ++b) {
188  Pix* pix = const_cast<Pix*>(pixes[b]);
189  float black = 0.0f, white = 255.0f;
190  if (shape.depth() != 3) ComputeBlackWhite(pix, &black, &white);
191  float contrast = (white - black) / 2.0f;
192  if (contrast <= 0.0f) contrast = 1.0f;
193  if (shape.height() == 1) {
194  Copy1DGreyImage(b, pix, black, contrast, randomizer);
195  } else {
196  Copy2DImage(b, pix, black, contrast, randomizer);
197  }
198  }
199 }
200 
201 // Copies the given pix to *this at the given batch index, stretching and
202 // clipping the pixel values so that [black, black + 2*contrast] maps to the
203 // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.
204 // This is a 2-d operation in the sense that the output depth is the number
205 // of input channels, the height is the height of the image, and the width
206 // is the width of the image, or truncated/padded with noise if the width
207 // is a fixed size.
208 void NetworkIO::Copy2DImage(int batch, Pix* pix, float black, float contrast,
209  TRand* randomizer) {
210  int width = pixGetWidth(pix);
211  int height = pixGetHeight(pix);
212  int wpl = pixGetWpl(pix);
213  StrideMap::Index index(stride_map_);
214  index.AddOffset(batch, FD_BATCH);
215  int t = index.t();
216  int target_height = stride_map_.Size(FD_HEIGHT);
217  int target_width = stride_map_.Size(FD_WIDTH);
218  int num_features = NumFeatures();
219  bool color = num_features == 3;
220  if (width > target_width) width = target_width;
221  uinT32* line = pixGetData(pix);
222  for (int y = 0; y < target_height; ++y, line += wpl) {
223  int x = 0;
224  if (y < height) {
225  for (x = 0; x < width; ++x, ++t) {
226  if (color) {
227  int f = 0;
228  for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
229  int pixel = GET_DATA_BYTE(line + x, c);
230  SetPixel(t, f++, pixel, black, contrast);
231  }
232  } else {
233  int pixel = GET_DATA_BYTE(line, x);
234  SetPixel(t, 0, pixel, black, contrast);
235  }
236  }
237  }
238  for (; x < target_width; ++x) Randomize(t++, 0, num_features, randomizer);
239  }
240 }
241 
242 // Copies the given pix to *this at the given batch index, as Copy2DImage
243 // above, except that the output depth is the height of the input image, the
244 // output height is 1, and the output width as for Copy2DImage.
245 // The image is thus treated as a 1-d set of vertical pixel strips.
246 void NetworkIO::Copy1DGreyImage(int batch, Pix* pix, float black,
247  float contrast, TRand* randomizer) {
248  int width = pixGetWidth(pix);
249  int height = pixGetHeight(pix);
250  ASSERT_HOST(height == NumFeatures());
251  int wpl = pixGetWpl(pix);
252  StrideMap::Index index(stride_map_);
253  index.AddOffset(batch, FD_BATCH);
254  int t = index.t();
255  int target_width = stride_map_.Size(FD_WIDTH);
256  if (width > target_width) width = target_width;
257  int x;
258  for (x = 0; x < width; ++x, ++t) {
259  for (int y = 0; y < height; ++y) {
260  uinT32* line = pixGetData(pix) + wpl * y;
261  int pixel = GET_DATA_BYTE(line, x);
262  SetPixel(t, y, pixel, black, contrast);
263  }
264  }
265  for (; x < target_width; ++x) Randomize(t++, 0, height, randomizer);
266 }
267 
268 // Helper stores the pixel value in i_ or f_ according to int_mode_.
269 // t: is the index from the StrideMap corresponding to the current
270 // [batch,y,x] position
271 // f: is the index into the depth/channel
272 // pixel: the value of the pixel from the image (in one channel)
273 // black: the pixel value to map to the lowest of the range of *this
274 // contrast: the range of pixel values to stretch to half the range of *this.
275 void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) {
276  float float_pixel = (pixel - black) / contrast - 1.0f;
277  if (int_mode_) {
278  i_[t][f] = ClipToRange(IntCastRounded((MAX_INT8 + 1) * float_pixel),
279  -MAX_INT8, MAX_INT8);
280  } else {
281  f_[t][f] = float_pixel;
282  }
283 }
284 
285 // Converts the array to a Pix. Must be pixDestroyed after use.
286 Pix* NetworkIO::ToPix() const {
287  // Count the width of the image, and find the max multiplication factor.
288  int im_width = stride_map_.Size(FD_WIDTH);
289  int im_height = stride_map_.Size(FD_HEIGHT);
290  int num_features = NumFeatures();
291  int feature_factor = 1;
292  if (num_features == 3) {
293  // Special hack for color.
294  num_features = 1;
295  feature_factor = 3;
296  }
297  Pix* pix = pixCreate(im_width, im_height * num_features, 32);
298  StrideMap::Index index(stride_map_);
299  do {
300  int im_x = index.index(FD_WIDTH);
301  int top_im_y = index.index(FD_HEIGHT);
302  int im_y = top_im_y;
303  int t = index.t();
304  if (int_mode_) {
305  const inT8* features = i_[t];
306  for (int y = 0; y < num_features; ++y, im_y += im_height) {
307  int pixel = features[y * feature_factor];
308  // 1 or 2 features use greyscale.
309  int red = ClipToRange(pixel + 128, 0, 255);
310  int green = red, blue = red;
311  if (feature_factor == 3) {
312  // With 3 features assume RGB color.
313  green = ClipToRange(features[y * feature_factor + 1] + 128, 0, 255);
314  blue = ClipToRange(features[y * feature_factor + 2] + 128, 0, 255);
315  } else if (num_features > 3) {
316  // More than 3 features use false yellow/blue color, assuming a signed
317  // input in the range [-1,1].
318  red = abs(pixel) * 2;
319  if (pixel >= 0) {
320  green = red;
321  blue = 0;
322  } else {
323  blue = red;
324  green = red = 0;
325  }
326  }
327  pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
328  (green << L_GREEN_SHIFT) |
329  (blue << L_BLUE_SHIFT));
330  }
331  } else {
332  const float* features = f_[t];
333  for (int y = 0; y < num_features; ++y, im_y += im_height) {
334  float pixel = features[y * feature_factor];
335  // 1 or 2 features use greyscale.
336  int red = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
337  int green = red, blue = red;
338  if (feature_factor == 3) {
339  // With 3 features assume RGB color.
340  pixel = features[y * feature_factor + 1];
341  green = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
342  pixel = features[y * feature_factor + 2];
343  blue = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
344  } else if (num_features > 3) {
345  // More than 3 features use false yellow/blue color, assuming a signed
346  // input in the range [-1,1].
347  red = ClipToRange(IntCastRounded(fabs(pixel) * 255), 0, 255);
348  if (pixel >= 0) {
349  green = red;
350  blue = 0;
351  } else {
352  blue = red;
353  green = red = 0;
354  }
355  }
356  pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
357  (green << L_GREEN_SHIFT) |
358  (blue << L_BLUE_SHIFT));
359  }
360  }
361  } while (index.Increment());
362  return pix;
363 }
364 
365 // Prints the first and last num timesteps of the array for each feature.
366 void NetworkIO::Print(int num) const {
367  int num_features = NumFeatures();
368  for (int y = 0; y < num_features; ++y) {
369  for (int t = 0; t < Width(); ++t) {
370  if (num == 0 || t < num || t + num >= Width()) {
371  if (int_mode_) {
372  tprintf(" %g", static_cast<float>(i_[t][y]) / MAX_INT8);
373  } else {
374  tprintf(" %g", f_[t][y]);
375  }
376  }
377  }
378  tprintf("\n");
379  }
380 }
381 
382 // Copies a single time step from src.
383 void NetworkIO::CopyTimeStepFrom(int dest_t, const NetworkIO& src, int src_t) {
384  ASSERT_HOST(int_mode_ == src.int_mode_);
385  if (int_mode_) {
386  memcpy(i_[dest_t], src.i_[src_t], i_.dim2() * sizeof(i_[0][0]));
387  } else {
388  memcpy(f_[dest_t], src.f_[src_t], f_.dim2() * sizeof(f_[0][0]));
389  }
390 }
391 
392 // Copies a part of single time step from src.
393 void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset,
394  int num_features, const NetworkIO& src,
395  int src_t, int src_offset) {
396  ASSERT_HOST(int_mode_ == src.int_mode_);
397  if (int_mode_) {
398  memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset,
399  num_features * sizeof(i_[0][0]));
400  } else {
401  memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset,
402  num_features * sizeof(f_[0][0]));
403  }
404 }
405 
406 // Zeroes a single time step.
407 void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) {
408  if (int_mode_) {
409  ZeroVector(num_features, i_[t] + offset);
410  } else {
411  ZeroVector(num_features, f_[t] + offset);
412  }
413 }
414 
415 // Sets the given range to random values.
416 void NetworkIO::Randomize(int t, int offset, int num_features,
417  TRand* randomizer) {
418  if (int_mode_) {
419  inT8* line = i_[t] + offset;
420  for (int i = 0; i < num_features; ++i)
421  line[i] = IntCastRounded(randomizer->SignedRand(MAX_INT8));
422  } else {
423  // float mode.
424  float* line = f_[t] + offset;
425  for (int i = 0; i < num_features; ++i)
426  line[i] = randomizer->SignedRand(1.0);
427  }
428 }
429 
430 // Helper returns the label and score of the best choice over a range.
431 int NetworkIO::BestChoiceOverRange(int t_start, int t_end, int not_this,
432  int null_ch, float* rating,
433  float* certainty) const {
434  if (t_end <= t_start) return -1;
435  int max_char = -1;
436  float min_score = 0.0f;
437  for (int c = 0; c < NumFeatures(); ++c) {
438  if (c == not_this || c == null_ch) continue;
439  ScoresOverRange(t_start, t_end, c, null_ch, rating, certainty);
440  if (max_char < 0 || *rating < min_score) {
441  min_score = *rating;
442  max_char = c;
443  }
444  }
445  ScoresOverRange(t_start, t_end, max_char, null_ch, rating, certainty);
446  return max_char;
447 }
448 
449 // Helper returns the rating and certainty of the choice over a range in output.
450 void NetworkIO::ScoresOverRange(int t_start, int t_end, int choice, int null_ch,
451  float* rating, float* certainty) const {
452  ASSERT_HOST(!int_mode_);
453  *rating = 0.0f;
454  *certainty = 0.0f;
455  if (t_end <= t_start || t_end <= 0) return;
456  float ratings[3] = {0.0f, 0.0f, 0.0f};
457  float certs[3] = {0.0f, 0.0f, 0.0f};
458  for (int t = t_start; t < t_end; ++t) {
459  const float* line = f_[t];
460  float score = ProbToCertainty(line[choice]);
461  float zero = ProbToCertainty(line[null_ch]);
462  if (t == t_start) {
463  ratings[2] = MAX_FLOAT32;
464  ratings[1] = -score;
465  certs[1] = score;
466  } else {
467  for (int i = 2; i >= 1; --i) {
468  if (ratings[i] > ratings[i - 1]) {
469  ratings[i] = ratings[i - 1];
470  certs[i] = certs[i - 1];
471  }
472  }
473  ratings[2] -= zero;
474  if (zero < certs[2]) certs[2] = zero;
475  ratings[1] -= score;
476  if (score < certs[1]) certs[1] = score;
477  }
478  ratings[0] -= zero;
479  if (zero < certs[0]) certs[0] = zero;
480  }
481  int best_i = ratings[2] < ratings[1] ? 2 : 1;
482  *rating = ratings[best_i] + t_end - t_start;
483  *certainty = certs[best_i];
484 }
485 
486 // Returns the index (label) of the best value at the given timestep,
487 // excluding not_this and not_that, and if not null, sets the score to the
488 // log of the corresponding value.
489 int NetworkIO::BestLabel(int t, int not_this, int not_that,
490  float* score) const {
491  ASSERT_HOST(!int_mode_);
492  int best_index = -1;
493  float best_score = -MAX_FLOAT32;
494  const float* line = f_[t];
495  for (int i = 0; i < f_.dim2(); ++i) {
496  if (line[i] > best_score && i != not_this && i != not_that) {
497  best_score = line[i];
498  best_index = i;
499  }
500  }
501  if (score != NULL) *score = ProbToCertainty(best_score);
502  return best_index;
503 }
504 
505 // Returns the best start position out of [start, end) (into which all labels
506 // must fit) to obtain the highest cumulative score for the given labels.
508  int end) const {
509  int length = labels.size();
510  int last_start = end - length;
511  int best_start = -1;
512  double best_score = 0.0;
513  for (int s = start; s <= last_start; ++s) {
514  double score = ScoreOfLabels(labels, s);
515  if (score > best_score || best_start < 0) {
516  best_score = score;
517  best_start = s;
518  }
519  }
520  return best_start;
521 }
522 
523 // Returns the cumulative score of the given labels starting at start, and
524 // using one label per time-step.
526  int start) const {
527  int length = labels.size();
528  double score = 0.0;
529  for (int i = 0; i < length; ++i) {
530  score += f_(start + i, labels[i]);
531  }
532  return score;
533 }
534 
535 // Helper function sets all the outputs for a single timestep, such that
536 // label has value ok_score, and the other labels share 1 - ok_score.
537 void NetworkIO::SetActivations(int t, int label, float ok_score) {
538  ASSERT_HOST(!int_mode_);
539  int num_classes = NumFeatures();
540  float bad_score = (1.0f - ok_score) / (num_classes - 1);
541  float* targets = f_[t];
542  for (int i = 0; i < num_classes; ++i)
543  targets[i] = bad_score;
544  targets[label] = ok_score;
545 }
546 
547 // Modifies the values, only if needed, so that the given label is
548 // the winner at the given time step t.
549 void NetworkIO::EnsureBestLabel(int t, int label) {
550  ASSERT_HOST(!int_mode_);
551  if (BestLabel(t, NULL) != label) {
552  // Output value needs enhancing. Third all the other elements and add the
553  // remainder to best_label.
554  int num_classes = NumFeatures();
555  float* targets = f_[t];
556  for (int c = 0; c < num_classes; ++c) {
557  if (c == label) {
558  targets[c] += (1.0 - targets[c]) * (2 / 3.0);
559  } else {
560  targets[c] /= 3.0;
561  }
562  }
563  }
564 }
565 
566 // Helper function converts prob to certainty taking the minimum into account.
567 /* static */
568 float NetworkIO::ProbToCertainty(float prob) {
569  return prob > kMinProb ? log(prob) : kMinCertainty;
570 }
571 
572 // Returns true if there is any bad value that is suspiciously like a GT
573 // error. Assuming that *this is the difference(gradient) between target
574 // and forward output, returns true if there is a large negative value
575 // (correcting a very confident output) for which there is no corresponding
576 // positive value in an adjacent timestep for the same feature index. This
577 // allows the box-truthed samples to make fine adjustments to position while
578 // stopping other disagreements of confident output with ground truth.
579 bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const {
580  int num_features = NumFeatures();
581  for (int t = 0; t < Width(); ++t) {
582  const float* features = f_[t];
583  for (int y = 0; y < num_features; ++y) {
584  float grad = features[y];
585  if (grad < -confidence_thr) {
586  // Correcting strong output. Check for movement.
587  if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) &&
588  (t + 1 == Width() || f_[t + 1][y] < confidence_thr / 2)) {
589  return true; // No strong positive on either side.
590  }
591  }
592  }
593  }
594  return false;
595 }
596 
597 // Reads a single timestep to floats in the range [-1, 1].
598 void NetworkIO::ReadTimeStep(int t, double* output) const {
599  if (int_mode_) {
600  const inT8* line = i_[t];
601  for (int i = 0; i < i_.dim2(); ++i) {
602  output[i] = static_cast<double>(line[i]) / MAX_INT8;
603  }
604  } else {
605  const float* line = f_[t];
606  for (int i = 0; i < f_.dim2(); ++i) {
607  output[i] = static_cast<double>(line[i]);
608  }
609  }
610 }
611 
612 // Adds a single timestep to floats.
613 void NetworkIO::AddTimeStep(int t, double* inout) const {
614  int num_features = NumFeatures();
615  if (int_mode_) {
616  const inT8* line = i_[t];
617  for (int i = 0; i < num_features; ++i) {
618  inout[i] += static_cast<double>(line[i]) / MAX_INT8;
619  }
620  } else {
621  const float* line = f_[t];
622  for (int i = 0; i < num_features; ++i) {
623  inout[i] += line[i];
624  }
625  }
626 }
627 
628 // Adds part of a single timestep to floats.
629 void NetworkIO::AddTimeStepPart(int t, int offset, int num_features,
630  float* inout) const {
631  if (int_mode_) {
632  const inT8* line = i_[t] + offset;
633  for (int i = 0; i < num_features; ++i) {
634  inout[i] += static_cast<float>(line[i]) / MAX_INT8;
635  }
636  } else {
637  const float* line = f_[t] + offset;
638  for (int i = 0; i < num_features; ++i) {
639  inout[i] += line[i];
640  }
641  }
642 }
643 
644 // Writes a single timestep from floats in the range [-1, 1].
645 void NetworkIO::WriteTimeStep(int t, const double* input) {
646  WriteTimeStepPart(t, 0, NumFeatures(), input);
647 }
648 
649 // Writes a single timestep from floats in the range [-1, 1] writing only
650 // num_features elements of input to (*this)[t], starting at offset.
651 void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features,
652  const double* input) {
653  if (int_mode_) {
654  inT8* line = i_[t] + offset;
655  for (int i = 0; i < num_features; ++i) {
656  line[i] = ClipToRange(IntCastRounded(input[i] * MAX_INT8),
657  -MAX_INT8, MAX_INT8);
658  }
659  } else {
660  float* line = f_[t] + offset;
661  for (int i = 0; i < num_features; ++i) {
662  line[i] = static_cast<float>(input[i]);
663  }
664  }
665 }
666 
667 // Maxpools a single time step from src.
668 void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO& src, int src_t,
669  int* max_line) {
670  ASSERT_HOST(int_mode_ == src.int_mode_);
671  if (int_mode_) {
672  int dim = i_.dim2();
673  inT8* dest_line = i_[dest_t];
674  const inT8* src_line = src.i_[src_t];
675  for (int i = 0; i < dim; ++i) {
676  if (dest_line[i] < src_line[i]) {
677  dest_line[i] = src_line[i];
678  max_line[i] = src_t;
679  }
680  }
681  } else {
682  int dim = f_.dim2();
683  float* dest_line = f_[dest_t];
684  const float* src_line = src.f_[src_t];
685  for (int i = 0; i < dim; ++i) {
686  if (dest_line[i] < src_line[i]) {
687  dest_line[i] = src_line[i];
688  max_line[i] = src_t;
689  }
690  }
691  }
692 }
693 
694 // Runs maxpool backward, using maxes to index timesteps in *this.
696  const GENERIC_2D_ARRAY<int>& maxes) {
697  ASSERT_HOST(!int_mode_);
698  Zero();
699  StrideMap::Index index(fwd.stride_map_);
700  do {
701  int t = index.t();
702  const int* max_line = maxes[t];
703  const float* fwd_line = fwd.f_[t];
704  int num_features = fwd.f_.dim2();
705  for (int i = 0; i < num_features; ++i) {
706  f_[max_line[i]][i] = fwd_line[i];
707  }
708  } while (index.Increment());
709 }
710 
711 // Returns the min over time of the maxes over features of the outputs.
712 float NetworkIO::MinOfMaxes() const {
713  float min_max = 0.0f;
714  int width = Width();
715  int num_features = NumFeatures();
716  for (int t = 0; t < width; ++t) {
717  float max_value = -MAX_FLOAT32;
718  if (int_mode_) {
719  const inT8* column = i_[t];
720  for (int i = 0; i < num_features; ++i) {
721  if (column[i] > max_value) max_value = column[i];
722  }
723  } else {
724  const float* column = f_[t];
725  for (int i = 0; i < num_features; ++i) {
726  if (column[i] > max_value) max_value = column[i];
727  }
728  }
729  if (t == 0 || max_value < min_max) min_max = max_value;
730  }
731  return min_max;
732 }
733 
734 // Computes combined results for a combiner that chooses between an existing
735 // input and itself, with an additional output to indicate the choice.
736 void NetworkIO::CombineOutputs(const NetworkIO& base_output,
737  const NetworkIO& combiner_output) {
738  int no = base_output.NumFeatures();
739  ASSERT_HOST(combiner_output.NumFeatures() == no + 1);
740  Resize(base_output, no);
741  int width = Width();
742  if (int_mode_) {
743  // Number of outputs from base and final result.
744  for (int t = 0; t < width; ++t) {
745  inT8* out_line = i_[t];
746  const inT8* base_line = base_output.i_[t];
747  const inT8* comb_line = combiner_output.i_[t];
748  float base_weight = static_cast<float>(comb_line[no]) / MAX_INT8;
749  float boost_weight = 1.0f - base_weight;
750  for (int i = 0; i < no; ++i) {
751  out_line[i] = IntCastRounded(base_line[i] * base_weight +
752  comb_line[i] * boost_weight);
753  }
754  }
755  } else {
756  for (int t = 0; t < width; ++t) {
757  float* out_line = f_[t];
758  const float* base_line = base_output.f_[t];
759  const float* comb_line = combiner_output.f_[t];
760  float base_weight = comb_line[no];
761  float boost_weight = 1.0f - base_weight;
762  for (int i = 0; i < no; ++i) {
763  out_line[i] = base_line[i] * base_weight + comb_line[i] * boost_weight;
764  }
765  }
766  }
767 }
768 
769 // Computes deltas for a combiner that chooses between 2 sets of inputs.
771  const NetworkIO& base_output) {
772  ASSERT_HOST(!int_mode_);
773  // Compute the deltas for the combiner.
774  int width = Width();
775  int no = NumFeatures() - 1;
776  ASSERT_HOST(fwd_deltas.NumFeatures() == no);
777  ASSERT_HOST(base_output.NumFeatures() == no);
778  // Number of outputs from base and final result.
779  for (int t = 0; t < width; ++t) {
780  const float* delta_line = fwd_deltas.f_[t];
781  const float* base_line = base_output.f_[t];
782  float* comb_line = f_[t];
783  float base_weight = comb_line[no];
784  float boost_weight = 1.0f - base_weight;
785  float max_base_delta = 0.0;
786  for (int i = 0; i < no; ++i) {
787  // What did the combiner actually produce?
788  float output = base_line[i] * base_weight + comb_line[i] * boost_weight;
789  // Reconstruct the target from the delta.
790  float comb_target = delta_line[i] + output;
791  comb_line[i] = comb_target - comb_line[i];
792  float base_delta = fabs(comb_target - base_line[i]);
793  if (base_delta > max_base_delta) max_base_delta = base_delta;
794  }
795  if (max_base_delta >= 0.5) {
796  // The base network got it wrong. The combiner should output the right
797  // answer and 0 for the base network.
798  comb_line[no] = 0.0 - base_weight;
799  } else {
800  // The base network was right. The combiner should flag that.
801  for (int i = 0; i < no; ++i) {
802  // All other targets are 0.
803  if (comb_line[i] > 0.0) comb_line[i] -= 1.0;
804  }
805  comb_line[no] = 1.0 - base_weight;
806  }
807  }
808 }
809 
810 // Copies the array checking that the types match.
811 void NetworkIO::CopyAll(const NetworkIO& src) {
812  ASSERT_HOST(src.int_mode_ == int_mode_);
813  f_ = src.f_;
814 }
815 
816 // Checks that both are floats and adds the src array to *this.
818  ASSERT_HOST(!int_mode_);
819  ASSERT_HOST(!src.int_mode_);
820  f_ += src.f_;
821 }
822 
823 // Subtracts the array from a float array. src must also be float.
825  ASSERT_HOST(!int_mode_);
826  ASSERT_HOST(!src.int_mode_);
827  f_ -= src.f_;
828 }
829 
830 // Copies src to *this, with maxabs normalization to match scale.
832  const NetworkIO& scale) {
833  ASSERT_HOST(!int_mode_);
834  ASSERT_HOST(!src.int_mode_);
835  ASSERT_HOST(!scale.int_mode_);
836  float src_max = src.f_.MaxAbs();
837  ASSERT_HOST(std::isfinite(src_max));
838  float scale_max = scale.f_.MaxAbs();
839  ASSERT_HOST(std::isfinite(scale_max));
840  if (src_max > 0.0f) {
841  float factor = scale_max / src_max;
842  for (int t = 0; t < src.Width(); ++t) {
843  const float* src_ptr = src.f_[t];
844  float* dest_ptr = f_[t];
845  for (int i = 0; i < src.f_.dim2(); ++i) dest_ptr[i] = src_ptr[i] * factor;
846  }
847  } else {
848  f_.Clear();
849  }
850 }
851 
852 // Copies src to *this with independent reversal of the y dimension.
854  int num_features = src.NumFeatures();
855  Resize(src, num_features);
856  StrideMap::Index b_index(src.stride_map_);
857  do {
858  int width = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
859  StrideMap::Index fwd_index(b_index);
860  StrideMap::Index rev_index(b_index);
861  rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_HEIGHT), FD_HEIGHT);
862  do {
863  int fwd_t = fwd_index.t();
864  int rev_t = rev_index.t();
865  for (int x = 0; x < width; ++x) CopyTimeStepFrom(rev_t++, src, fwd_t++);
866  } while (fwd_index.AddOffset(1, FD_HEIGHT) &&
867  rev_index.AddOffset(-1, FD_HEIGHT));
868  } while (b_index.AddOffset(1, FD_BATCH));
869 }
870 
871 // Copies src to *this with independent reversal of the x dimension.
873  int num_features = src.NumFeatures();
874  Resize(src, num_features);
875  StrideMap::Index b_index(src.stride_map_);
876  do {
877  StrideMap::Index y_index(b_index);
878  do {
879  StrideMap::Index fwd_index(y_index);
880  StrideMap::Index rev_index(y_index);
881  rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_WIDTH), FD_WIDTH);
882  do {
883  CopyTimeStepFrom(rev_index.t(), src, fwd_index.t());
884  } while (fwd_index.AddOffset(1, FD_WIDTH) &&
885  rev_index.AddOffset(-1, FD_WIDTH));
886  } while (y_index.AddOffset(1, FD_HEIGHT));
887  } while (b_index.AddOffset(1, FD_BATCH));
888 }
889 
890 // Copies src to *this with independent transpose of the x and y dimensions.
892  int num_features = src.NumFeatures();
893  stride_map_ = src.stride_map_;
894  stride_map_.TransposeXY();
895  ResizeToMap(src.int_mode(), stride_map_, num_features);
896  StrideMap::Index src_b_index(src.stride_map_);
897  StrideMap::Index dest_b_index(stride_map_);
898  do {
899  StrideMap::Index src_y_index(src_b_index);
900  StrideMap::Index dest_x_index(dest_b_index);
901  do {
902  StrideMap::Index src_x_index(src_y_index);
903  StrideMap::Index dest_y_index(dest_x_index);
904  do {
905  CopyTimeStepFrom(dest_y_index.t(), src, src_x_index.t());
906  } while (src_x_index.AddOffset(1, FD_WIDTH) &&
907  dest_y_index.AddOffset(1, FD_HEIGHT));
908  } while (src_y_index.AddOffset(1, FD_HEIGHT) &&
909  dest_x_index.AddOffset(1, FD_WIDTH));
910  } while (src_b_index.AddOffset(1, FD_BATCH) &&
911  dest_b_index.AddOffset(1, FD_BATCH));
912 }
913 
914 // Copies src to *this, at the given feature_offset, returning the total
915 // feature offset after the copy. Multiple calls will stack outputs from
916 // multiple sources in feature space.
917 int NetworkIO::CopyPacking(const NetworkIO& src, int feature_offset) {
918  ASSERT_HOST(int_mode_ == src.int_mode_);
919  int width = src.Width();
920  ASSERT_HOST(width <= Width());
921  int num_features = src.NumFeatures();
922  ASSERT_HOST(num_features + feature_offset <= NumFeatures());
923  if (int_mode_) {
924  for (int t = 0; t < width; ++t) {
925  memcpy(i_[t] + feature_offset, src.i_[t],
926  num_features * sizeof(i_[t][0]));
927  }
928  for (int t = width; t < i_.dim1(); ++t) {
929  memset(i_[t], 0, num_features * sizeof(i_[t][0]));
930  }
931  } else {
932  for (int t = 0; t < width; ++t) {
933  memcpy(f_[t] + feature_offset, src.f_[t],
934  num_features * sizeof(f_[t][0]));
935  }
936  for (int t = width; t < f_.dim1(); ++t) {
937  memset(f_[t], 0, num_features * sizeof(f_[t][0]));
938  }
939  }
940  return num_features + feature_offset;
941 }
942 
943 // Opposite of CopyPacking, fills *this with a part of src, starting at
944 // feature_offset, and picking num_features.
945 void NetworkIO::CopyUnpacking(const NetworkIO& src, int feature_offset,
946  int num_features) {
947  Resize(src, num_features);
948  int width = src.Width();
949  ASSERT_HOST(num_features + feature_offset <= src.NumFeatures());
950  if (int_mode_) {
951  for (int t = 0; t < width; ++t) {
952  memcpy(i_[t], src.i_[t] + feature_offset,
953  num_features * sizeof(i_[t][0]));
954  }
955  } else {
956  for (int t = 0; t < width; ++t) {
957  memcpy(f_[t], src.f_[t] + feature_offset,
958  num_features * sizeof(f_[t][0]));
959  }
960  }
961 }
962 
963 // Transposes the float part of *this into dest.
965  int width = Width();
966  dest->ResizeNoInit(NumFeatures(), width);
967  for (int t = 0; t < width; ++t) dest->WriteStrided(t, f_[t]);
968 }
969 
970 // Clips the content of a single time-step to +/-range.
971 void NetworkIO::ClipVector(int t, float range) {
972  ASSERT_HOST(!int_mode_);
973  float* v = f_[t];
974  int dim = f_.dim2();
975  for (int i = 0; i < dim; ++i)
976  v[i] = ClipToRange(v[i], -range, range);
977 }
978 
979 } // namespace tesseract.
int Width() const
Definition: stridemap.h:118
void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features)
Definition: networkio.cpp:945
void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t)
Definition: networkio.cpp:383
void ZeroInvalidElements()
Definition: networkio.cpp:88
bool AddOffset(int offset, FlexDimensions dimension)
Definition: stridemap.cpp:62
void ReadTimeStep(int t, double *output) const
Definition: networkio.cpp:598
bool AnySuspiciousTruth(float confidence_thr) const
Definition: networkio.cpp:579
int Width() const
Definition: networkio.h:107
void Clear()
Definition: matrix.h:129
int index(FlexDimensions dimension) const
Definition: stridemap.h:60
inT32 get_total() const
Definition: statistc.h:86
float * f(int t)
Definition: networkio.h:115
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line)
Definition: networkio.cpp:668
void Resize2d(bool int_mode, int width, int num_features)
Definition: networkio.cpp:34
T MaxAbs() const
Definition: matrix.h:350
void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer)
Definition: networkio.cpp:161
int BestLabel(int t, float *score) const
Definition: networkio.h:161
int MaxIndexOfDim(FlexDimensions dim) const
Definition: stridemap.cpp:43
void AddTimeStep(int t, double *inout) const
Definition: networkio.cpp:613
void SubtractAllFromFloat(const NetworkIO &src)
Definition: networkio.cpp:824
double ScoreOfLabels(const GenericVector< int > &labels, int start) const
Definition: networkio.cpp:525
void SetActivations(int t, int label, float ok_score)
Definition: networkio.cpp:537
#define tprintf(...)
Definition: tprintf.h:31
void CopyWithXYTranspose(const NetworkIO &src)
Definition: networkio.cpp:891
void Resize(const NetworkIO &src, int num_features)
Definition: networkio.h:45
void WriteStrided(int t, const float *data)
Definition: weightmatrix.h:37
voidpf uLong offset
Definition: ioapi.h:42
const float kMinProb
Definition: networkio.cpp:31
int IntCastRounded(double x)
Definition: helpers.h:179
const float kMinCertainty
Definition: networkio.cpp:29
void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features)
Definition: networkio.cpp:62
int size() const
Definition: genericvector.h:72
bool int_mode() const
Definition: networkio.h:127
void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:450
int CopyPacking(const NetworkIO &src, int feature_offset)
Definition: networkio.cpp:917
#define ASSERT_HOST(x)
Definition: errcode.h:84
int dim1() const
Definition: matrix.h:201
void AddAllToFloat(const NetworkIO &src)
Definition: networkio.cpp:817
int Size(FlexDimensions dimension) const
Definition: stridemap.h:116
int dim2() const
Definition: matrix.h:202
uint32_t uinT32
Definition: host.h:39
void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY< int > &maxes)
Definition: networkio.cpp:695
void EnsureBestLabel(int t, int label)
Definition: networkio.cpp:549
void Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:208
void CopyAll(const NetworkIO &src)
Definition: networkio.cpp:811
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:122
#define MAX_FLOAT32
Definition: host.h:66
float MinOfMaxes() const
Definition: networkio.cpp:712
void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src, int src_t, int src_offset)
Definition: networkio.cpp:393
void add(inT32 value, inT32 count)
Definition: statistc.cpp:101
void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale)
Definition: networkio.cpp:831
void ZeroTimeStepGeneral(int t, int offset, int num_features)
Definition: networkio.cpp:407
void ResizeNoInit(int size1, int size2)
Definition: matrix.h:86
int8_t inT8
Definition: host.h:34
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const
Definition: networkio.cpp:629
void ResizeXTo1(const NetworkIO &src, int num_features)
Definition: networkio.cpp:70
static float ProbToCertainty(float prob)
Definition: networkio.cpp:568
void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output)
Definition: networkio.cpp:736
void Print(int num) const
Definition: networkio.cpp:366
int NumFeatures() const
Definition: networkio.h:111
const inT8 * i(int t) const
Definition: networkio.h:123
const StrideMap & stride_map() const
Definition: networkio.h:133
void Copy1DGreyImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:246
Definition: statistc.h:33
void CopyWithXReversal(const NetworkIO &src)
Definition: networkio.cpp:872
void Randomize(int t, int offset, int num_features, TRand *randomizer)
Definition: networkio.cpp:416
#define MAX_INT8
Definition: host.h:60
double ile(double frac) const
Definition: statistc.cpp:174
void ZeroVector(int n, T *vec)
Definition: functions.h:219
void CopyWithYReversal(const NetworkIO &src)
Definition: networkio.cpp:853
void WriteTimeStep(int t, const double *input)
Definition: networkio.cpp:645
void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output)
Definition: networkio.cpp:770
void Transpose(TransposedArray *dest) const
Definition: networkio.cpp:964
double SignedRand(double range)
Definition: helpers.h:60
void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features)
Definition: networkio.cpp:45
void ClipVector(int t, float range)
Definition: networkio.cpp:971
void WriteTimeStepPart(int t, int offset, int num_features, const double *input)
Definition: networkio.cpp:651
Pix * ToPix() const
Definition: networkio.cpp:286
int PositionOfBestMatch(const GenericVector< int > &labels, int start, int end) const
Definition: networkio.cpp:507
void ScaleXY(int x_factor, int y_factor)
Definition: stridemap.cpp:144
double v[max]
void SetStride(const std::vector< std::pair< int, int >> &h_w_pairs)
Definition: stridemap.cpp:126
void SetPixel(int t, int f, int pixel, float black, float contrast)
Definition: networkio.cpp:275
void ZeroTimeStep(int t)
Definition: networkio.h:148
const char features[]
Definition: feature_tests.c:2
void FromPixes(const StaticShape &shape, const std::vector< const Pix *> &pixes, TRand *randomizer)
Definition: networkio.cpp:170
int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:431