tesseract  4.00.00dev
input.cpp
Go to the documentation of this file.
1 // File: input.cpp
3 // Description: Input layer class for neural network implementations.
4 // Author: Ray Smith
5 // Created: Thu Mar 13 09:10:34 PDT 2014
6 //
7 // (C) Copyright 2014, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 #include "input.h"
20 
21 #include "allheaders.h"
22 #include "imagedata.h"
23 #include "pageres.h"
24 #include "scrollview.h"
25 
26 namespace tesseract {
27 
28 // Max height for variable height inputs before scaling anyway.
29 const int kMaxInputHeight = 48;
30 
31 Input::Input(const STRING& name, int ni, int no)
32  : Network(NT_INPUT, name, ni, no), cached_x_scale_(1) {}
33 Input::Input(const STRING& name, const StaticShape& shape)
34  : Network(NT_INPUT, name, shape.height(), shape.depth()),
35  shape_(shape),
36  cached_x_scale_(1) {
37  if (shape.height() == 1) ni_ = shape.depth();
38 }
39 
41 }
42 
43 // Writes to the given file. Returns false in case of error.
44 bool Input::Serialize(TFile* fp) const {
45  if (!Network::Serialize(fp)) return false;
46  if (fp->FWrite(&shape_, sizeof(shape_), 1) != 1) return false;
47  return true;
48 }
49 
50 // Reads from the given file. Returns false in case of error.
52  return fp->FReadEndian(&shape_, sizeof(shape_), 1) == 1;
53 }
54 
55 // Returns an integer reduction factor that the network applies to the
56 // time sequence. Assumes that any 2-d is already eliminated. Used for
57 // scaling bounding boxes of truth data.
58 int Input::XScaleFactor() const {
59  return 1;
60 }
61 
62 // Provides the (minimum) x scale factor to the network (of interest only to
63 // input units) so they can determine how to scale bounding boxes.
64 void Input::CacheXScaleFactor(int factor) {
65  cached_x_scale_ = factor;
66 }
67 
68 // Runs forward propagation of activations on the input line.
69 // See Network for a detailed discussion of the arguments.
70 void Input::Forward(bool debug, const NetworkIO& input,
71  const TransposedArray* input_transpose,
72  NetworkScratch* scratch, NetworkIO* output) {
73  *output = input;
74 }
75 
76 // Runs backward propagation of errors on the deltas line.
77 // See NetworkCpp for a detailed discussion of the arguments.
78 bool Input::Backward(bool debug, const NetworkIO& fwd_deltas,
79  NetworkScratch* scratch,
80  NetworkIO* back_deltas) {
81  tprintf("Input::Backward should not be called!!\n");
82  return false;
83 }
84 
85 // Creates and returns a Pix of appropriate size for the network from the
86 // image_data. If non-null, *image_scale returns the image scale factor used.
87 // Returns nullptr on error.
88 /* static */
89 Pix* Input::PrepareLSTMInputs(const ImageData& image_data,
90  const Network* network, int min_width,
91  TRand* randomizer, float* image_scale) {
92  // Note that NumInputs() is defined as input image height.
93  int target_height = network->NumInputs();
94  int width, height;
95  Pix* pix = image_data.PreScale(target_height, kMaxInputHeight, image_scale,
96  &width, &height, nullptr);
97  if (pix == nullptr) {
98  tprintf("Bad pix from ImageData!\n");
99  return nullptr;
100  }
101  if (width <= min_width || height < min_width) {
102  tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width,
103  height, min_width);
104  pixDestroy(&pix);
105  return nullptr;
106  }
107  return pix;
108 }
109 
110 // Converts the given pix to a NetworkIO of height and depth appropriate to the
111 // given StaticShape:
112 // If depth == 3, convert to 24 bit color, otherwise normalized grey.
113 // Scale to target height, if the shape's height is > 1, or its depth if the
114 // height == 1. If height == 0 then no scaling.
115 // NOTE: It isn't safe for multiple threads to call this on the same pix.
116 /* static */
117 void Input::PreparePixInput(const StaticShape& shape, const Pix* pix,
118  TRand* randomizer, NetworkIO* input) {
119  bool color = shape.depth() == 3;
120  Pix* var_pix = const_cast<Pix*>(pix);
121  int depth = pixGetDepth(var_pix);
122  Pix* normed_pix = nullptr;
123  // On input to BaseAPI, an image is forced to be 1, 8 or 24 bit, without
124  // colormap, so we just have to deal with depth conversion here.
125  if (color) {
126  // Force RGB.
127  if (depth == 32)
128  normed_pix = pixClone(var_pix);
129  else
130  normed_pix = pixConvertTo32(var_pix);
131  } else {
132  // Convert non-8-bit images to 8 bit.
133  if (depth == 8)
134  normed_pix = pixClone(var_pix);
135  else
136  normed_pix = pixConvertTo8(var_pix, false);
137  }
138  int height = pixGetHeight(normed_pix);
139  int target_height = shape.height();
140  if (target_height == 1) target_height = shape.depth();
141  if (target_height == 0) target_height = height;
142  float im_factor = static_cast<float>(target_height) / height;
143  if (im_factor != 1.0f) {
144  // Get the scaled image.
145  Pix* scaled_pix = pixScale(normed_pix, im_factor, im_factor);
146  pixDestroy(&normed_pix);
147  normed_pix = scaled_pix;
148  }
149  input->FromPix(shape, normed_pix, randomizer);
150  pixDestroy(&normed_pix);
151 }
152 
153 } // namespace tesseract.
Pix * PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width, int *scaled_height, GenericVector< TBOX > *boxes) const
Definition: imagedata.cpp:227
virtual bool Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch, NetworkIO *back_deltas)
Definition: input.cpp:78
const int kMaxInputHeight
Definition: input.cpp:29
void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer)
Definition: networkio.cpp:161
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, NetworkIO *input)
Definition: input.cpp:117
static Pix * PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale)
Definition: input.cpp:89
virtual bool DeSerialize(TFile *fp)
Definition: input.cpp:51
#define tprintf(...)
Definition: tprintf.h:31
int FReadEndian(void *buffer, int size, int count)
Definition: serialis.cpp:97
virtual int XScaleFactor() const
Definition: input.cpp:58
Input(const STRING &name, int ni, int no)
Definition: input.cpp:31
virtual void CacheXScaleFactor(int factor)
Definition: input.cpp:64
virtual ~Input()
Definition: input.cpp:40
Definition: strngs.h:45
virtual bool Serialize(TFile *fp) const
Definition: input.cpp:44
int FWrite(const void *buffer, int size, int count)
Definition: serialis.cpp:148
int NumInputs() const
Definition: network.h:120
const STRING & name() const
Definition: network.h:138
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)
Definition: input.cpp:70
virtual bool Serialize(TFile *fp) const
Definition: network.cpp:153