tesseract/a01067_source.html

 // File:        network.h
 // Description: Base class for neural network implementations.
 // Author:      Ray Smith
 // Created:     Wed May 01 16:38:06 PST 2013
 //
 // (C) Copyright 2013, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef TESSERACT_LSTM_NETWORK_H_
 #define TESSERACT_LSTM_NETWORK_H_

 #include <stdio.h>
 #include <cmath>

 #include "genericvector.h"
 #include "helpers.h"
 #include "matrix.h"
 #include "networkio.h"
 #include "serialis.h"
 #include "static_shape.h"
 #include "tprintf.h"

 struct Pix;
 class ScrollView;
 class TBOX;

 namespace tesseract {

 class ImageData;
 class NetworkScratch;

 // Enum to store the run-time type of a Network. Keep in sync with kTypeNames.
 enum NetworkType {
   NT_NONE,   // The naked base class.
   NT_INPUT,  // Inputs from an image.
   // Plumbing networks combine other networks or rearrange the inputs.
   NT_CONVOLVE,     // Duplicates inputs in a sliding window neighborhood.
   NT_MAXPOOL,      // Chooses the max result from a rectangle.
   NT_PARALLEL,     // Runs networks in parallel.
   NT_REPLICATED,   // Runs identical networks in parallel.
   NT_PAR_RL_LSTM,  // Runs LTR and RTL LSTMs in parallel.
   NT_PAR_UD_LSTM,  // Runs Up and Down LSTMs in parallel.
   NT_PAR_2D_LSTM,  // Runs 4 LSTMs in parallel.
   NT_SERIES,       // Executes a sequence of layers.
   NT_RECONFIG,     // Scales the time/y size but makes the output deeper.
   NT_XREVERSED,    // Reverses the x direction of the inputs/outputs.
   NT_YREVERSED,    // Reverses the y-direction of the inputs/outputs.
   NT_XYTRANSPOSE,  // Transposes x and y (for just a single op).
   // Functional networks actually calculate stuff.
   NT_LSTM,            // Long-Short-Term-Memory block.
   NT_LSTM_SUMMARY,    // LSTM that only keeps its last output.
   NT_LOGISTIC,        // Fully connected logistic nonlinearity.
   NT_POSCLIP,         // Fully connected rect lin version of logistic.
   NT_SYMCLIP,         // Fully connected rect lin version of tanh.
   NT_TANH,            // Fully connected with tanh nonlinearity.
   NT_RELU,            // Fully connected with rectifier nonlinearity.
   NT_LINEAR,          // Fully connected with no nonlinearity.
   NT_SOFTMAX,         // Softmax uses exponential normalization, with CTC.
   NT_SOFTMAX_NO_CTC,  // Softmax uses exponential normalization, no CTC.
   // The SOFTMAX LSTMs both have an extra softmax layer on top, but inside, with
   // the outputs fed back to the input of the LSTM at the next timestep.
   // The ENCODED version binary encodes the softmax outputs, providing log2 of
   // the number of outputs as additional inputs, and the other version just
   // provides all the softmax outputs as additional inputs.
   NT_LSTM_SOFTMAX,          // 1-d LSTM with built-in fully connected softmax.
   NT_LSTM_SOFTMAX_ENCODED,  // 1-d LSTM with built-in binary encoded softmax.
   // A TensorFlow graph encapsulated as a Tesseract network.
   NT_TENSORFLOW,

   NT_COUNT  // Array size.
 };

 // Enum of Network behavior flags. Can in theory be set for each individual
 // network element.
 enum NetworkFlags {
   // Network forward/backprop behavior.
   NF_LAYER_SPECIFIC_LR = 64,  // Separate learning rate for each layer.
   NF_ADA_GRAD = 128,          // Weight-specific learning rate.
 };

 // State of training and desired state used in SetEnableTraining.
 enum TrainingState {
   // Valid states of training_.
   TS_DISABLED,      // Disabled permanently.
   TS_ENABLED,       // Enabled for backprop and to write a training dump.
                     // Re-enable from ANY disabled state.
   TS_TEMP_DISABLE,  // Temporarily disabled to write a recognition dump.
   // Valid only for SetEnableTraining.
   TS_RE_ENABLE,  // Re-Enable from TS_TEMP_DISABLE, but not TS_DISABLED.
 };

 // Base class for network types. Not quite an abstract base class, but almost.
 // Most of the time no isolated Network exists, except prior to
 // deserialization.
 class Network {
  public:
   Network();
   Network(NetworkType type, const STRING& name, int ni, int no);
   virtual ~Network();

   // Accessors.
   NetworkType type() const {
     return type_;
   }
   bool IsTraining() const { return training_ == TS_ENABLED; }
   bool needs_to_backprop() const {
     return needs_to_backprop_;
   }
   int num_weights() const { return num_weights_; }
   int NumInputs() const {
     return ni_;
   }
   int NumOutputs() const {
     return no_;
   }
   // Returns the required shape input to the network.
   virtual StaticShape InputShape() const {
     StaticShape result;
     return result;
   }
   // Returns the shape output from the network given an input shape (which may
   // be partially unknown ie zero).
   virtual StaticShape OutputShape(const StaticShape& input_shape) const {
     StaticShape result(input_shape);
     result.set_depth(no_);
     return result;
   }
   const STRING& name() const {
     return name_;
   }
   virtual STRING spec() const {
     return "?";
   }
   bool TestFlag(NetworkFlags flag) const {
     return (network_flags_ & flag) != 0;
   }

   // Initialization and administrative functions that are mostly provided
   // by Plumbing.
   // Returns true if the given type is derived from Plumbing, and thus contains
   // multiple sub-networks that can have their own learning rate.
   virtual bool IsPlumbingType() const { return false; }

   // Suspends/Enables/Permanently disables training by setting the training_
   // flag. Serialize and DeSerialize only operate on the run-time data if state
   // is TS_DISABLED or TS_TEMP_DISABLE. Specifying TS_TEMP_DISABLE will
   // temporarily disable layers in state TS_ENABLED, allowing a trainer to
   // serialize as if it were a recognizer.
   // TS_RE_ENABLE will re-enable layers that were previously in any disabled
   // state. If in TS_TEMP_DISABLE then the flag is just changed, but if in
   // TS_DISABLED, the deltas in the weight matrices are reinitialized so that a
   // recognizer can be converted back to a trainer.
   virtual void SetEnableTraining(TrainingState state);

   // Sets flags that control the action of the network. See NetworkFlags enum
   // for bit values.
   virtual void SetNetworkFlags(uinT32 flags);

   // Sets up the network for training. Initializes weights using weights of
   // scale `range` picked according to the random number generator `randomizer`.
   // Note that randomizer is a borrowed pointer that should outlive the network
   // and should not be deleted by any of the networks.
   // Returns the number of weights initialized.
   virtual int InitWeights(float range, TRand* randomizer);

   // Converts a float network to an int network.
   virtual void ConvertToInt() {}

   // Provides a pointer to a TRand for any networks that care to use it.
   // Note that randomizer is a borrowed pointer that should outlive the network
   // and should not be deleted by any of the networks.
   virtual void SetRandomizer(TRand* randomizer);

   // Sets needs_to_backprop_ to needs_backprop and returns true if
   // needs_backprop || any weights in this network so the next layer forward
   // can be told to produce backprop for this layer if needed.
   virtual bool SetupNeedsBackprop(bool needs_backprop);

   // Returns the most recent reduction factor that the network applied to the
   // time sequence. Assumes that any 2-d is already eliminated. Used for
   // scaling bounding boxes of truth data and calculating result bounding boxes.
   // WARNING: if GlobalMinimax is used to vary the scale, this will return
   // the last used scale factor. Call it before any forward, and it will return
   // the minimum scale factor of the paths through the GlobalMinimax.
   virtual int XScaleFactor() const {
     return 1;
   }

   // Provides the (minimum) x scale factor to the network (of interest only to
   // input units) so they can determine how to scale bounding boxes.
   virtual void CacheXScaleFactor(int factor) {}

   // Provides debug output on the weights.
   virtual void DebugWeights() {
     tprintf("Must override Network::DebugWeights for type %d\n", type_);
   }

   // Writes to the given file. Returns false in case of error.
   // Should be overridden by subclasses, but called by their Serialize.
   virtual bool Serialize(TFile* fp) const;
   // Reads from the given file. Returns false in case of error.
   // Should be overridden by subclasses, but NOT called by their DeSerialize.
   virtual bool DeSerialize(TFile* fp);

   // Updates the weights using the given learning rate and momentum.
   // num_samples is the quotient to be used in the adagrad computation iff
   // use_ada_grad_ is true.
   virtual void Update(float learning_rate, float momentum, int num_samples) {}
   // Sums the products of weight updates in *this and other, splitting into
   // positive (same direction) in *same and negative (different direction) in
   // *changed.
   virtual void CountAlternators(const Network& other, double* same,
                                 double* changed) const {}

   // Reads from the given file. Returns NULL in case of error.
   // Determines the type of the serialized class and calls its DeSerialize
   // on a new object of the appropriate type, which is returned.
   static Network* CreateFromFile(TFile* fp);

   // Runs forward propagation of activations on the input line.
   // Note that input and output are both 2-d arrays.
   // The 1st index is the time element. In a 1-d network, it might be the pixel
   // position on the textline. In a 2-d network, the linearization is defined
   // by the stride_map. (See networkio.h).
   // The 2nd index of input is the network inputs/outputs, and the dimension
   // of the input must match NumInputs() of this network.
   // The output array will be resized as needed so that its 1st dimension is
   // always equal to the number of output values, and its second dimension is
   // always NumOutputs(). Note that all this detail is encapsulated away inside
   // NetworkIO, as are the internals of the scratch memory space used by the
   // network. See networkscratch.h for that.
   // If input_transpose is not NULL, then it contains the transpose of input,
   // and the caller guarantees that it will still be valid on the next call to
   // backward. The callee is therefore at liberty to save the pointer and
   // reference it on a call to backward. This is a bit ugly, but it makes it
   // possible for a replicating parallel to calculate the input transpose once
   // instead of all the replicated networks having to do it.
   virtual void Forward(bool debug, const NetworkIO& input,
                        const TransposedArray* input_transpose,
                        NetworkScratch* scratch, NetworkIO* output) {
     tprintf("Must override Network::Forward for type %d\n", type_);
   }

   // Runs backward propagation of errors on fwdX_deltas.
   // Note that fwd_deltas and back_deltas are both 2-d arrays as with Forward.
   // Returns false if back_deltas was not set, due to there being no point in
   // propagating further backwards. Thus most complete networks will always
   // return false from Backward!
   virtual bool Backward(bool debug, const NetworkIO& fwd_deltas,
                         NetworkScratch* scratch,
                         NetworkIO* back_deltas) {
     tprintf("Must override Network::Backward for type %d\n", type_);
     return false;
   }

   // === Debug image display methods. ===
   // Displays the image of the matrix to the forward window.
   void DisplayForward(const NetworkIO& matrix);
   // Displays the image of the matrix to the backward window.
   void DisplayBackward(const NetworkIO& matrix);

   // Creates the window if needed, otherwise clears it.
   static void ClearWindow(bool tess_coords, const char* window_name,
                           int width, int height, ScrollView** window);

   // Displays the pix in the given window. and returns the height of the pix.
   // The pix is pixDestroyed.
   static int DisplayImage(Pix* pix, ScrollView* window);

  protected:
   // Returns a random number in [-range, range].
   double Random(double range);

  protected:
   NetworkType type_;          // Type of the derived network class.
   TrainingState training_;    // Are we currently training?
   bool needs_to_backprop_;    // This network needs to output back_deltas.
   inT32 network_flags_;       // Behavior control flags in NetworkFlags.
   inT32 ni_;                  // Number of input values.
   inT32 no_;                  // Number of output values.
   inT32 num_weights_;         // Number of weights in this and sub-network.
   STRING name_;               // A unique name for this layer.

   // NOT-serialized debug data.
   ScrollView* forward_win_;   // Recognition debug display window.
   ScrollView* backward_win_;  // Training debug display window.
   TRand* randomizer_;         // Random number generator.

   // Static serialized name/type_ mapping. Keep in sync with NetworkType.
   static char const* const kTypeNames[NT_COUNT];
 };


 }  // namespace tesseract.

 #endif  // TESSERACT_LSTM_NETWORK_H_
tesseract::Network::InitWeights
virtual int InitWeights(float range, TRand *randomizer)
Definition: network.cpp:132

tesseract::Network::XScaleFactor
virtual int XScaleFactor() const
Definition: network.h:195

helpers.h

tesseract::NT_LSTM_SUMMARY
Definition: network.h:61

tesseract::Network::IsPlumbingType
virtual bool IsPlumbingType() const
Definition: network.h:152

tesseract::NT_PAR_2D_LSTM
Definition: network.h:53

tesseract::NT_POSCLIP
Definition: network.h:63

tesseract::Network::needs_to_backprop_
bool needs_to_backprop_
Definition: network.h:287

tesseract::Network::num_weights
int num_weights() const
Definition: network.h:119

tesseract::NetworkScratch
Definition: networkscratch.h:36

tesseract::Network
Definition: network.h:105

tesseract::NT_MAXPOOL
Definition: network.h:48

inT32
int32_t inT32
Definition: host.h:38

tesseract::TS_ENABLED
Definition: network.h:95

tesseract::Network::SetRandomizer
virtual void SetRandomizer(TRand *randomizer)
Definition: network.cpp:140

genericvector.h

tesseract::Network::Update
virtual void Update(float learning_rate, float momentum, int num_samples)
Definition: network.h:218

tesseract::NT_COUNT
Definition: network.h:80

tesseract::Network::DisplayForward
void DisplayForward(const NetworkIO &matrix)
Definition: network.cpp:285

tesseract::NT_PAR_RL_LSTM
Definition: network.h:51

tesseract::NT_RECONFIG
Definition: network.h:55

tesseract::NT_XYTRANSPOSE
Definition: network.h:58

tesseract::NT_SOFTMAX_NO_CTC
Definition: network.h:69

tesseract::NT_SOFTMAX
Definition: network.h:68

tesseract::Network::backward_win_
ScrollView * backward_win_
Definition: network.h:296

tesseract::NT_LSTM
Definition: network.h:60

tesseract::Network::type
NetworkType type() const
Definition: network.h:112

tesseract::Network::name_
STRING name_
Definition: network.h:292

tprintf
#define tprintf(...)
Definition: tprintf.h:31

tesseract::Network::CreateFromFile
static Network * CreateFromFile(TFile *fp)
Definition: network.cpp:203

tesseract::Network::needs_to_backprop
bool needs_to_backprop() const
Definition: network.h:116

tesseract::Network::IsTraining
bool IsTraining() const
Definition: network.h:115

serialis.h

tesseract
Definition: baseapi.cpp:82

tesseract::TrainingState
TrainingState
Definition: network.h:92

tesseract::Network::Forward
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)
Definition: network.h:248

static_shape.h

networkio.h

tesseract::Network::randomizer_
TRand * randomizer_
Definition: network.h:297

tesseract::Network::CacheXScaleFactor
virtual void CacheXScaleFactor(int factor)
Definition: network.h:201

tesseract::Network::SetNetworkFlags
virtual void SetNetworkFlags(uinT32 flags)
Definition: network.cpp:126

tesseract::NT_REPLICATED
Definition: network.h:50

tesseract::TS_RE_ENABLE
Definition: network.h:99

tesseract::NT_LSTM_SOFTMAX_ENCODED
Definition: network.h:76

tesseract::Network::network_flags_
inT32 network_flags_
Definition: network.h:288

tesseract::NT_SYMCLIP
Definition: network.h:64

tesseract::NT_INPUT
Definition: network.h:45

tesseract::Network::DeSerialize
virtual bool DeSerialize(TFile *fp)
Definition: network.cpp:172

uinT32
uint32_t uinT32
Definition: host.h:39

tesseract::Network::InputShape
virtual StaticShape InputShape() const
Definition: network.h:127

tesseract::Network::training_
TrainingState training_
Definition: network.h:286

tesseract::Network::ConvertToInt
virtual void ConvertToInt()
Definition: network.h:177

tesseract::TRand
Definition: helpers.h:41

tesseract::NT_TENSORFLOW
Definition: network.h:78

tesseract::Network::DebugWeights
virtual void DebugWeights()
Definition: network.h:204

STRING
Definition: strngs.h:45

tesseract::TS_TEMP_DISABLE
Definition: network.h:97

tesseract::NF_ADA_GRAD
Definition: network.h:88

tesseract::NT_PAR_UD_LSTM
Definition: network.h:52

tesseract::NT_YREVERSED
Definition: network.h:57

tesseract::TS_DISABLED
Definition: network.h:94

tesseract::Network::ClearWindow
static void ClearWindow(bool tess_coords, const char *window_name, int width, int height, ScrollView **window)
Definition: network.cpp:309

tesseract::Network::Random
double Random(double range)
Definition: network.cpp:278

tesseract::Network::TestFlag
bool TestFlag(NetworkFlags flag) const
Definition: network.h:144

tesseract::NetworkFlags
NetworkFlags
Definition: network.h:85

tesseract::Network::Backward
virtual bool Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch, NetworkIO *back_deltas)
Definition: network.h:259

tesseract::Network::kTypeNames
static char const  *const kTypeNames[NT_COUNT]
Definition: network.h:300

tesseract::Network::OutputShape
virtual StaticShape OutputShape(const StaticShape &input_shape) const
Definition: network.h:133

tesseract::NetworkIO
Definition: networkio.h:39

tesseract::Network::forward_win_
ScrollView * forward_win_
Definition: network.h:295

tesseract::NT_SERIES
Definition: network.h:54

tesseract::NT_XREVERSED
Definition: network.h:56

TBOX
Definition: rect.h:30

tesseract::NT_TANH
Definition: network.h:65

tesseract::NetworkType
NetworkType
Definition: network.h:43

tesseract::TFile
Definition: serialis.h:51

tesseract::Network::type_
NetworkType type_
Definition: network.h:285

tesseract::NT_LOGISTIC
Definition: network.h:62

tesseract::Network::~Network
virtual ~Network()
Definition: network.cpp:100

tprintf.h

tesseract::Network::NumInputs
int NumInputs() const
Definition: network.h:120

tesseract::TransposedArray
Definition: weightmatrix.h:31

tesseract::StaticShape
Definition: static_shape.h:36

tesseract::Network::name
const STRING & name() const
Definition: network.h:138

tesseract::NT_PARALLEL
Definition: network.h:49

tesseract::NT_LINEAR
Definition: network.h:67

tesseract::NT_CONVOLVE
Definition: network.h:47

tesseract::Network::no_
inT32 no_
Definition: network.h:290

tesseract::NT_RELU
Definition: network.h:66

tesseract::Network::spec
virtual STRING spec() const
Definition: network.h:141

tesseract::Network::DisplayImage
static int DisplayImage(Pix *pix, ScrollView *window)
Definition: network.cpp:332

tesseract::Network::NumOutputs
int NumOutputs() const
Definition: network.h:123

tesseract::Network::CountAlternators
virtual void CountAlternators(const Network &other, double *same, double *changed) const
Definition: network.h:222

tesseract::Network::Serialize
virtual bool Serialize(TFile *fp) const
Definition: network.cpp:153

tesseract::Network::DisplayBackward
void DisplayBackward(const NetworkIO &matrix)
Definition: network.cpp:296

tesseract::Network::num_weights_
inT32 num_weights_
Definition: network.h:291

tesseract::Network::ni_
inT32 ni_
Definition: network.h:289

tesseract::Network::Network
Network()
Definition: network.cpp:76

matrix.h

tesseract::Network::SetEnableTraining
virtual void SetEnableTraining(TrainingState state)
Definition: network.cpp:112

ScrollView
Definition: scrollview.h:102

tesseract::Network::SetupNeedsBackprop
virtual bool SetupNeedsBackprop(bool needs_backprop)
Definition: network.cpp:147

tesseract::NT_LSTM_SOFTMAX
Definition: network.h:75

tesseract::NF_LAYER_SPECIFIC_LR
Definition: network.h:87

tesseract::NT_NONE
Definition: network.h:44

tesseract::StaticShape::set_depth
void set_depth(int value)
Definition: static_shape.h:47