tesseract  4.00.00dev
lstm.h
Go to the documentation of this file.
1 // File: lstm.h
3 // Description: Long-term-short-term-memory Recurrent neural network.
4 // Author: Ray Smith
5 // Created: Wed May 01 17:33:06 PST 2013
6 //
7 // (C) Copyright 2013, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 #ifndef TESSERACT_LSTM_LSTM_H_
20 #define TESSERACT_LSTM_LSTM_H_
21 
22 #include "network.h"
23 #include "fullyconnected.h"
24 
25 namespace tesseract {
26 
27 // C++ Implementation of the LSTM class from lstm.py.
28 class LSTM : public Network {
29  public:
30  // Enum for the different weights in LSTM, to reduce some of the I/O and
31  // setup code to loops. The elements of the enum correspond to elements of an
32  // array of WeightMatrix or a corresponding array of NetworkIO.
33  enum WeightType {
34  CI, // Cell Inputs.
35  GI, // Gate at the input.
36  GF1, // Forget gate at the memory (1-d or looking back 1 timestep).
37  GO, // Gate at the output.
38  GFS, // Forget gate at the memory, looking back in the other dimension.
39 
40  WT_COUNT // Number of WeightTypes.
41  };
42 
43  // Constructor for NT_LSTM (regular 1 or 2-d LSTM), NT_LSTM_SOFTMAX (LSTM with
44  // additional softmax layer included and fed back into the input at the next
45  // timestep), or NT_LSTM_SOFTMAX_ENCODED (as LSTM_SOFTMAX, but the feedback
46  // is binary encoded instead of categorical) only.
47  // 2-d and bidi softmax LSTMs are not rejected, but are impossible to build
48  // in the conventional way because the output feedback both forwards and
49  // backwards in time does become impossible.
50  LSTM(const STRING& name, int num_inputs, int num_states, int num_outputs,
51  bool two_dimensional, NetworkType type);
52  virtual ~LSTM();
53 
54  // Returns the shape output from the network given an input shape (which may
55  // be partially unknown ie zero).
56  virtual StaticShape OutputShape(const StaticShape& input_shape) const;
57 
58  virtual STRING spec() const {
59  STRING spec;
60  if (type_ == NT_LSTM)
61  spec.add_str_int("Lfx", ns_);
62  else if (type_ == NT_LSTM_SUMMARY)
63  spec.add_str_int("Lfxs", ns_);
64  else if (type_ == NT_LSTM_SOFTMAX)
65  spec.add_str_int("LS", ns_);
66  else if (type_ == NT_LSTM_SOFTMAX_ENCODED)
67  spec.add_str_int("LE", ns_);
68  if (softmax_ != NULL) spec += softmax_->spec();
69  return spec;
70  }
71 
72  // Suspends/Enables training by setting the training_ flag. Serialize and
73  // DeSerialize only operate on the run-time data if state is false.
74  virtual void SetEnableTraining(TrainingState state);
75 
76  // Sets up the network for training. Initializes weights using weights of
77  // scale `range` picked according to the random number generator `randomizer`.
78  virtual int InitWeights(float range, TRand* randomizer);
79 
80  // Converts a float network to an int network.
81  virtual void ConvertToInt();
82 
83  // Provides debug output on the weights.
84  virtual void DebugWeights();
85 
86  // Writes to the given file. Returns false in case of error.
87  virtual bool Serialize(TFile* fp) const;
88  // Reads from the given file. Returns false in case of error.
89  virtual bool DeSerialize(TFile* fp);
90 
91  // Runs forward propagation of activations on the input line.
92  // See Network for a detailed discussion of the arguments.
93  virtual void Forward(bool debug, const NetworkIO& input,
94  const TransposedArray* input_transpose,
95  NetworkScratch* scratch, NetworkIO* output);
96 
97  // Runs backward propagation of errors on the deltas line.
98  // See Network for a detailed discussion of the arguments.
99  virtual bool Backward(bool debug, const NetworkIO& fwd_deltas,
100  NetworkScratch* scratch,
101  NetworkIO* back_deltas);
102  // Updates the weights using the given learning rate and momentum.
103  // num_samples is the quotient to be used in the adagrad computation iff
104  // use_ada_grad_ is true.
105  virtual void Update(float learning_rate, float momentum, int num_samples);
106  // Sums the products of weight updates in *this and other, splitting into
107  // positive (same direction) in *same and negative (different direction) in
108  // *changed.
109  virtual void CountAlternators(const Network& other, double* same,
110  double* changed) const;
111  // Prints the weights for debug purposes.
112  void PrintW();
113  // Prints the weight deltas for debug purposes.
114  void PrintDW();
115 
116  // Returns true of this is a 2-d lstm.
117  bool Is2D() const {
118  return is_2d_;
119  }
120 
121  private:
122  // Resizes forward data to cope with an input image of the given width.
123  void ResizeForward(const NetworkIO& input);
124 
125  private:
126  // Size of padded input to weight matrices = ni_ + no_ for 1-D operation
127  // and ni_ + 2 * no_ for 2-D operation. Note that there is a phantom 1 input
128  // for the bias that makes the weight matrices of size [na + 1][no].
129  inT32 na_;
130  // Number of internal states. Equal to no_ except for a softmax LSTM.
131  // ns_ is NOT serialized, but is calculated from gate_weights_.
132  inT32 ns_;
133  // Number of additional feedback states. The softmax types feed back
134  // additional output information on top of the ns_ internal states.
135  // In the case of a binary-coded (EMBEDDED) softmax, nf_ < no_.
136  inT32 nf_;
137  // Flag indicating 2-D operation.
138  bool is_2d_;
139 
140  // Gate weight arrays of size [na + 1, no].
141  WeightMatrix gate_weights_[WT_COUNT];
142  // Used only if this is a softmax LSTM.
143  FullyConnected* softmax_;
144  // Input padded with previous output of size [width, na].
145  NetworkIO source_;
146  // Internal state used during forward operation, of size [width, ns].
147  NetworkIO state_;
148  // State of the 2-d maxpool, generated during forward, used during backward.
149  GENERIC_2D_ARRAY<inT8> which_fg_;
150  // Internal state saved from forward, but used only during backward.
151  NetworkIO node_values_[WT_COUNT];
152  // Preserved input stride_map used for Backward when NT_LSTM_SQUASHED.
153  StrideMap input_map_;
154  int input_width_;
155 };
156 
157 } // namespace tesseract.
158 
159 
160 #endif // TESSERACT_LSTM_LSTM_H_
virtual void CountAlternators(const Network &other, double *same, double *changed) const
Definition: lstm.cpp:651
void add_str_int(const char *str, int number)
Definition: strngs.cpp:381
bool Is2D() const
Definition: lstm.h:117
int32_t inT32
Definition: host.h:38
virtual STRING spec() const
virtual STRING spec() const
Definition: lstm.h:58
NetworkType type() const
Definition: network.h:112
TrainingState
Definition: network.h:92
virtual void ConvertToInt()
Definition: lstm.cpp:144
virtual bool Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch, NetworkIO *back_deltas)
Definition: lstm.cpp:400
virtual void Update(float learning_rate, float momentum, int num_samples)
Definition: lstm.cpp:632
virtual bool DeSerialize(TFile *fp)
Definition: lstm.cpp:181
Definition: strngs.h:45
LSTM(const STRING &name, int num_inputs, int num_states, int num_outputs, bool two_dimensional, NetworkType type)
Definition: lstm.cpp:70
void PrintDW()
Definition: lstm.cpp:691
virtual int InitWeights(float range, TRand *randomizer)
Definition: lstm.cpp:129
NetworkType
Definition: network.h:43
NetworkType type_
Definition: network.h:285
virtual ~LSTM()
Definition: lstm.cpp:94
virtual void SetEnableTraining(TrainingState state)
Definition: lstm.cpp:108
virtual void DebugWeights()
Definition: lstm.cpp:155
const STRING & name() const
Definition: network.h:138
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)
Definition: lstm.cpp:211
void PrintW()
Definition: lstm.cpp:665
virtual bool Serialize(TFile *fp) const
Definition: lstm.cpp:168
virtual StaticShape OutputShape(const StaticShape &input_shape) const
Definition: lstm.cpp:98