tesseract/a01055_source.html

 // File:        lstmtrainer.h
 // Description: Top-level line trainer class for LSTM-based networks.
 // Author:      Ray Smith
 // Created:     Fri May 03 09:07:06 PST 2013
 //
 // (C) Copyright 2013, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef TESSERACT_LSTM_LSTMTRAINER_H_
 #define TESSERACT_LSTM_LSTMTRAINER_H_

 #include "imagedata.h"
 #include "lstmrecognizer.h"
 #include "rect.h"
 #include "tesscallback.h"

 namespace tesseract {

 class LSTM;
 class LSTMTrainer;
 class Parallel;
 class Reversed;
 class Softmax;
 class Series;

 // Enum for the types of errors that are counted.
 enum ErrorTypes {
   ET_RMS,          // RMS activation error.
   ET_DELTA,        // Number of big errors in deltas.
   ET_WORD_RECERR,  // Output text string word recall error.
   ET_CHAR_ERROR,   // Output text string total char error.
   ET_SKIP_RATIO,   // Fraction of samples skipped.
   ET_COUNT         // For array sizing.
 };

 // Enum for the trainability_ flags.
 enum Trainability {
   TRAINABLE,         // Non-zero delta error.
   PERFECT,           // Zero delta error.
   UNENCODABLE,       // Not trainable due to coding/alignment trouble.
   HI_PRECISION_ERR,  // Hi confidence disagreement.
   NOT_BOXED,         // Early in training and has no character boxes.
 };

 // Enum to define the amount of data to get serialized.
 enum SerializeAmount {
   LIGHT,            // Minimal data for remote training.
   NO_BEST_TRAINER,  // Save an empty vector in place of best_trainer_.
   FULL,             // All data including best_trainer_.
 };

 // Enum to indicate how the sub_trainer_ training went.
 enum SubTrainerResult {
   STR_NONE,     // Did nothing as not good enough.
   STR_UPDATED,  // Subtrainer was updated, but didn't replace *this.
   STR_REPLACED  // Subtrainer replaced *this.
 };

 class LSTMTrainer;
 // Function to restore the trainer state from a given checkpoint.
 // Returns false on failure.
 typedef TessResultCallback2<bool, const GenericVector<char>&, LSTMTrainer*>*
     CheckPointReader;
 // Function to save a checkpoint of the current trainer state.
 // Returns false on failure. SerializeAmount determines the amount of the
 // trainer to serialize, typically used for saving the best state.
 typedef TessResultCallback3<bool, SerializeAmount, const LSTMTrainer*,
                             GenericVector<char>*>* CheckPointWriter;
 // Function to compute and record error rates on some external test set(s).
 // Args are: iteration, mean errors, model, training stage.
 // Returns a STRING containing logging information about the tests.
 typedef TessResultCallback4<STRING, int, const double*,
                             const GenericVector<char>&, int>* TestCallback;

 // Trainer class for LSTM networks. Most of the effort is in creating the
 // ideal target outputs from the transcription. A box file is used if it is
 // available, otherwise estimates of the char widths from the unicharset are
 // used to guide a DP search for the best fit to the transcription.
 class LSTMTrainer : public LSTMRecognizer {
  public:
   LSTMTrainer();
   // Callbacks may be null, in which case defaults are used.
   LSTMTrainer(FileReader file_reader, FileWriter file_writer,
               CheckPointReader checkpoint_reader,
               CheckPointWriter checkpoint_writer,
               const char* model_base, const char* checkpoint_name,
               int debug_interval, inT64 max_memory);
   virtual ~LSTMTrainer();

   // Tries to deserialize a trainer from the given file and silently returns
   // false in case of failure.
   bool TryLoadingCheckpoint(const char* filename);

   // Initializes the character set encode/decode mechanism.
   // train_flags control training behavior according to the TrainingFlags
   // enum, including character set encoding.
   // script_dir is required for TF_COMPRESS_UNICHARSET, and, if provided,
   // fully initializes the unicharset from the universal unicharsets.
   // Note: Call before InitNetwork!
   void InitCharSet(const UNICHARSET& unicharset, const STRING& script_dir,
                    int train_flags);
   // Initializes the character set encode/decode mechanism directly from a
   // previously setup UNICHARSET and UnicharCompress.
   // ctc_mode controls how the truth text is mapped to the network targets.
   // Note: Call before InitNetwork!
   void InitCharSet(const UNICHARSET& unicharset,
                    const UnicharCompress& recoder);

   // Initializes the trainer with a network_spec in the network description
   // net_flags control network behavior according to the NetworkFlags enum.
   // There isn't really much difference between them - only where the effects
   // are implemented.
   // For other args see NetworkBuilder::InitNetwork.
   // Note: Be sure to call InitCharSet before InitNetwork!
   bool InitNetwork(const STRING& network_spec, int append_index, int net_flags,
                    float weight_range, float learning_rate, float momentum);
   // Initializes a trainer from a serialized TFNetworkModel proto.
   // Returns the global step of TensorFlow graph or 0 if failed.
   // Building a compatible TF graph: See tfnetwork.proto.
   int InitTensorFlowNetwork(const std::string& tf_proto);
   // Resets all the iteration counters for fine tuning or training a head,
   // where we want the error reporting to reset.
   void InitIterations();

   // Accessors.
   double ActivationError() const {
     return error_rates_[ET_DELTA];
   }
   double CharError() const { return error_rates_[ET_CHAR_ERROR]; }
   const double* error_rates() const {
     return error_rates_;
   }
   double best_error_rate() const {
     return best_error_rate_;
   }
   int best_iteration() const {
     return best_iteration_;
   }
   int learning_iteration() const { return learning_iteration_; }
   int improvement_steps() const { return improvement_steps_; }
   void set_perfect_delay(int delay) { perfect_delay_ = delay; }
   const GenericVector<char>& best_trainer() const { return best_trainer_; }
   // Returns the error that was just calculated by PrepareForBackward.
   double NewSingleError(ErrorTypes type) const {
     return error_buffers_[type][training_iteration() % kRollingBufferSize_];
   }
   // Returns the error that was just calculated by TrainOnLine. Since
   // TrainOnLine rolls the error buffers, this is one further back than
   // NewSingleError.
   double LastSingleError(ErrorTypes type) const {
     return error_buffers_[type]
                          [(training_iteration() + kRollingBufferSize_ - 1) %
                           kRollingBufferSize_];
   }
   const DocumentCache& training_data() const {
     return training_data_;
   }
   DocumentCache* mutable_training_data() { return &training_data_; }

   // If the training sample is usable, grid searches for the optimal
   // dict_ratio/cert_offset, and returns the results in a string of space-
   // separated triplets of ratio,offset=worderr.
   Trainability GridSearchDictParams(
       const ImageData* trainingdata, int iteration, double min_dict_ratio,
       double dict_ratio_step, double max_dict_ratio, double min_cert_offset,
       double cert_offset_step, double max_cert_offset, STRING* results);

   void SetSerializeMode(SerializeAmount serialize_amount) const {
     serialize_amount_ = serialize_amount;
   }

   // Provides output on the distribution of weight values.
   void DebugNetwork();

   // Loads a set of lstmf files that were created using the lstm.train config to
   // tesseract into memory ready for training. Returns false if nothing was
   // loaded.
   bool LoadAllTrainingData(const GenericVector<STRING>& filenames);

   // Keeps track of best and locally worst error rate, using internally computed
   // values. See MaintainCheckpointsSpecific for more detail.
   bool MaintainCheckpoints(TestCallback tester, STRING* log_msg);
   // Keeps track of best and locally worst error_rate (whatever it is) and
   // launches tests using rec_model, when a new min or max is reached.
   // Writes checkpoints using train_model at appropriate times and builds and
   // returns a log message to indicate progress. Returns false if nothing
   // interesting happened.
   bool MaintainCheckpointsSpecific(int iteration,
                                    const GenericVector<char>* train_model,
                                    const GenericVector<char>* rec_model,
                                    TestCallback tester, STRING* log_msg);
   // Builds a string containing a progress message with current error rates.
   void PrepareLogMsg(STRING* log_msg) const;
   // Appends <intro_str> iteration learning_iteration()/training_iteration()/
   // sample_iteration() to the log_msg.
   void LogIterations(const char* intro_str, STRING* log_msg) const;

   // TODO(rays) Add curriculum learning.
   // Returns true and increments the training_stage_ if the error rate has just
   // passed through the given threshold for the first time.
   bool TransitionTrainingStage(float error_threshold);
   // Returns the current training stage.
   int CurrentTrainingStage() const { return training_stage_; }

   // Writes to the given file. Returns false in case of error.
   virtual bool Serialize(TFile* fp) const;
   // Reads from the given file. Returns false in case of error.
   virtual bool DeSerialize(TFile* fp);

   // De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the
   // learning rates (by scaling reduction, or layer specific, according to
   // NF_LAYER_SPECIFIC_LR).
   void StartSubtrainer(STRING* log_msg);
   // While the sub_trainer_ is behind the current training iteration and its
   // training error is at least kSubTrainerMarginFraction better than the
   // current training error, trains the sub_trainer_, and returns STR_UPDATED if
   // it did anything. If it catches up, and has a better error rate than the
   // current best, as well as a margin over the current error rate, then the
   // trainer in *this is replaced with sub_trainer_, and STR_REPLACED is
   // returned. STR_NONE is returned if the subtrainer wasn't good enough to
   // receive any training iterations.
   SubTrainerResult UpdateSubtrainer(STRING* log_msg);
   // Reduces network learning rates, either for everything, or for layers
   // independently, according to NF_LAYER_SPECIFIC_LR.
   void ReduceLearningRates(LSTMTrainer* samples_trainer, STRING* log_msg);
   // Considers reducing the learning rate independently for each layer down by
   // factor(<1), or leaving it the same, by double-training the given number of
   // samples and minimizing the amount of changing of sign of weight updates.
   // Even if it looks like all weights should remain the same, an adjustment
   // will be made to guarantee a different result when reverting to an old best.
   // Returns the number of layer learning rates that were reduced.
   int ReduceLayerLearningRates(double factor, int num_samples,
                                LSTMTrainer* samples_trainer);

   // Converts the string to integer class labels, with appropriate null_char_s
   // in between if not in SimpleTextOutput mode. Returns false on failure.
   bool EncodeString(const STRING& str, GenericVector<int>* labels) const {
     return EncodeString(str, GetUnicharset(), IsRecoding() ? &recoder_ : NULL,
                         SimpleTextOutput(), null_char_, labels);
   }
   // Static version operates on supplied unicharset, encoder, simple_text.
   static bool EncodeString(const STRING& str, const UNICHARSET& unicharset,
                            const UnicharCompress* recoder, bool simple_text,
                            int null_char, GenericVector<int>* labels);

   // Converts the network to int if not already.
   void ConvertToInt() {
     if ((training_flags_ & TF_INT_MODE) == 0) {
       network_->ConvertToInt();
       training_flags_ |= TF_INT_MODE;
     }
   }

   // Performs forward-backward on the given trainingdata.
   // Returns the sample that was used or NULL if the next sample was deemed
   // unusable. samples_trainer could be this or an alternative trainer that
   // holds the training samples.
   const ImageData* TrainOnLine(LSTMTrainer* samples_trainer, bool batch) {
     int sample_index = sample_iteration();
     const ImageData* image =
         samples_trainer->training_data_.GetPageBySerial(sample_index);
     if (image != NULL) {
       Trainability trainable = TrainOnLine(image, batch);
       if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
         return NULL;  // Sample was unusable.
       }
     } else {
       ++sample_iteration_;
     }
     return image;
   }
   Trainability TrainOnLine(const ImageData* trainingdata, bool batch);

   // Prepares the ground truth, runs forward, and prepares the targets.
   // Returns a Trainability enum to indicate the suitability of the sample.
   Trainability PrepareForBackward(const ImageData* trainingdata,
                                   NetworkIO* fwd_outputs, NetworkIO* targets);

   // Writes the trainer to memory, so that the current training state can be
   // restored.
   bool SaveTrainingDump(SerializeAmount serialize_amount,
                         const LSTMTrainer* trainer,
                         GenericVector<char>* data) const;

   // Reads previously saved trainer from memory.
   bool ReadTrainingDump(const GenericVector<char>& data, LSTMTrainer* trainer);
   bool ReadSizedTrainingDump(const char* data, int size);

   // Sets up the data for MaintainCheckpoints from a light ReadTrainingDump.
   void SetupCheckpointInfo();

   // Writes the recognizer to memory, so that it can be used for testing later.
   void SaveRecognitionDump(GenericVector<char>* data) const;

   // Reads and returns a previously saved recognizer from memory.
   static LSTMRecognizer* ReadRecognitionDump(const GenericVector<char>& data);

   // Writes current best model to a file, unless it has already been written.
   bool SaveBestModel(FileWriter writer) const;

   // Returns a suitable filename for a training dump, based on the model_base_,
   // the iteration and the error rates.
   STRING DumpFilename() const;

   // Fills the whole error buffer of the given type with the given value.
   void FillErrorBuffer(double new_error, ErrorTypes type);

  protected:
   // Factored sub-constructor sets up reasonable default values.
   void EmptyConstructor();

   // Sets the unicharset properties using the given script_dir as a source of
   // script unicharsets. If the flag TF_COMPRESS_UNICHARSET is true, also sets
   // up the recoder_ to simplify the unicharset.
   void SetUnicharsetProperties(const STRING& script_dir);

   // Outputs the string and periodically displays the given network inputs
   // as an image in the given window, and the corresponding labels at the
   // corresponding x_starts.
   // Returns false if the truth string is empty.
   bool DebugLSTMTraining(const NetworkIO& inputs,
                          const ImageData& trainingdata,
                          const NetworkIO& fwd_outputs,
                          const GenericVector<int>& truth_labels,
                          const NetworkIO& outputs);
   // Displays the network targets as line a line graph.
   void DisplayTargets(const NetworkIO& targets, const char* window_name,
                       ScrollView** window);

   // Builds a no-compromises target where the first positions should be the
   // truth labels and the rest is padded with the null_char_.
   bool ComputeTextTargets(const NetworkIO& outputs,
                           const GenericVector<int>& truth_labels,
                           NetworkIO* targets);

   // Builds a target using standard CTC. truth_labels should be pre-padded with
   // nulls wherever desired. They don't have to be between all labels.
   // outputs is input-output, as it gets clipped to minimum probability.
   bool ComputeCTCTargets(const GenericVector<int>& truth_labels,
                          NetworkIO* outputs, NetworkIO* targets);

   // Computes network errors, and stores the results in the rolling buffers,
   // along with the supplied text_error.
   // Returns the delta error of the current sample (not running average.)
   double ComputeErrorRates(const NetworkIO& deltas, double char_error,
                            double word_error);

   // Computes the network activation RMS error rate.
   double ComputeRMSError(const NetworkIO& deltas);

   // Computes network activation winner error rate. (Number of values that are
   // in error by >= 0.5 divided by number of time-steps.) More closely related
   // to final character error than RMS, but still directly calculable from
   // just the deltas. Because of the binary nature of the targets, zero winner
   // error is a sufficient but not necessary condition for zero char error.
   double ComputeWinnerError(const NetworkIO& deltas);

   // Computes a very simple bag of chars char error rate.
   double ComputeCharError(const GenericVector<int>& truth_str,
                           const GenericVector<int>& ocr_str);
   // Computes a very simple bag of words word recall error rate.
   // NOTE that this is destructive on both input strings.
   double ComputeWordError(STRING* truth_str, STRING* ocr_str);

   // Updates the error buffer and corresponding mean of the given type with
   // the new_error.
   void UpdateErrorBuffer(double new_error, ErrorTypes type);

   // Rolls error buffers and reports the current means.
   void RollErrorBuffers();

   // Given that error_rate is either a new min or max, updates the best/worst
   // error rates, and record of progress.
   STRING UpdateErrorGraph(int iteration, double error_rate,
                           const GenericVector<char>& model_data,
                           TestCallback tester);

  protected:
   // Alignment display window.
   ScrollView* align_win_;
   // CTC target display window.
   ScrollView* target_win_;
   // CTC output display window.
   ScrollView* ctc_win_;
   // Reconstructed image window.
   ScrollView* recon_win_;
   // How often to display a debug image.
   int debug_interval_;
   // Iteration at which the last checkpoint was dumped.
   int checkpoint_iteration_;
   // Basename of files to save best models to.
   STRING model_base_;
   // Checkpoint filename.
   STRING checkpoint_name_;
   // Training data.
   DocumentCache training_data_;
   // A hack to serialize less data for batch training and record file version.
   mutable SerializeAmount serialize_amount_;
   // Name to use when saving best_trainer_.
   STRING best_model_name_;
   // Number of available training stages.
   int num_training_stages_;
   // Checkpointing callbacks.
   FileReader file_reader_;
   FileWriter file_writer_;
   // TODO(rays) These are pointers, and must be deleted. Switch to unique_ptr
   // when we can commit to c++11.
   CheckPointReader checkpoint_reader_;
   CheckPointWriter checkpoint_writer_;

   // ===Serialized data to ensure that a restart produces the same results.===
   // These members are only serialized when serialize_amount_ != LIGHT.
   // Best error rate so far.
   double best_error_rate_;
   // Snapshot of all error rates at best_iteration_.
   double best_error_rates_[ET_COUNT];
   // Iteration of best_error_rate_.
   int best_iteration_;
   // Worst error rate since best_error_rate_.
   double worst_error_rate_;
   // Snapshot of all error rates at worst_iteration_.
   double worst_error_rates_[ET_COUNT];
   // Iteration of worst_error_rate_.
   int worst_iteration_;
   // Iteration at which the process will be thought stalled.
   int stall_iteration_;
   // Saved recognition models for computing test error for graph points.
   GenericVector<char> best_model_data_;
   GenericVector<char> worst_model_data_;
   // Saved trainer for reverting back to last known best.
   GenericVector<char> best_trainer_;
   // A subsidiary trainer running with a different learning rate until either
   // *this or sub_trainer_ hits a new best.
   LSTMTrainer* sub_trainer_;
   // Error rate at which last best model was dumped.
   float error_rate_of_last_saved_best_;
   // Current stage of training.
   int training_stage_;
   // History of best error rate against iteration. Used for computing the
   // number of steps to each 2% improvement.
   GenericVector<double> best_error_history_;
   GenericVector<int> best_error_iterations_;
   // Number of iterations since the best_error_rate_ was 2% more than it is now.
   int improvement_steps_;
   // Number of iterations that yielded a non-zero delta error and thus provided
   // significant learning. learning_iteration_ <= training_iteration_.
   // learning_iteration_ is used to measure rate of learning progress.
   int learning_iteration_;
   // Saved value of sample_iteration_ before looking for the the next sample.
   int prev_sample_iteration_;
   // How often to include a PERFECT training sample in backprop.
   // A PERFECT training sample is used if the current
   // training_iteration_ > last_perfect_training_iteration_ + perfect_delay_,
   // so with perfect_delay_ == 0, all samples are used, and with
   // perfect_delay_ == 4, at most 1 in 5 samples will be perfect.
   int perfect_delay_;
   // Value of training_iteration_ at which the last PERFECT training sample
   // was used in back prop.
   int last_perfect_training_iteration_;
   // Rolling buffers storing recent training errors are indexed by
   // training_iteration % kRollingBufferSize_.
   static const int kRollingBufferSize_ = 1000;
   GenericVector<double> error_buffers_[ET_COUNT];
   // Rounded mean percent trailing training errors in the buffers.
   double error_rates_[ET_COUNT];    // RMS training error.
 };

 }  // namespace tesseract.

 #endif  // TESSERACT_LSTM_LSTMTRAINER_H_
tesseract::LSTMTrainer::checkpoint_name_
STRING checkpoint_name_
Definition: lstmtrainer.h:404

tesseract::LSTMTrainer::PrepareLogMsg
void PrepareLogMsg(STRING *log_msg) const
Definition: lstmtrainer.cpp:402

tesseract::LSTMTrainer::best_error_rate
double best_error_rate() const
Definition: lstmtrainer.h:143

tesseract::LSTMTrainer::ComputeCTCTargets
bool ComputeCTCTargets(const GenericVector< int > &truth_labels, NetworkIO *outputs, NetworkIO *targets)
Definition: lstmtrainer.cpp:1118

tesseract::DocumentCache::GetPageBySerial
const ImageData * GetPageBySerial(int serial)
Definition: imagedata.h:335

tesseract::LSTMTrainer::CurrentTrainingStage
int CurrentTrainingStage() const
Definition: lstmtrainer.h:213

tesseract::LSTMTrainer::best_trainer_
GenericVector< char > best_trainer_
Definition: lstmtrainer.h:441

tesseract::STR_NONE
Definition: lstmtrainer.h:64

UNICHARSET
Definition: unicharset.h:139

tesseract::LSTMTrainer::ctc_win_
ScrollView * ctc_win_
Definition: lstmtrainer.h:394

tesseract::LSTMRecognizer::training_flags_
inT32 training_flags_
Definition: lstmrecognizer.h:364

tesseract::LSTMTrainer::checkpoint_writer_
CheckPointWriter checkpoint_writer_
Definition: lstmtrainer.h:419

inT64
int64_t inT64
Definition: host.h:40

tesseract::LSTMTrainer::InitIterations
void InitIterations()
Definition: lstmtrainer.cpp:223

tesseract::LSTMTrainer::worst_error_rates_
double worst_error_rates_[ET_COUNT]
Definition: lstmtrainer.h:432

tesseract::LSTMTrainer::error_rates_
double error_rates_[ET_COUNT]
Definition: lstmtrainer.h:475

tesseract::NO_BEST_TRAINER
Definition: lstmtrainer.h:58

tesseract::LSTMTrainer::SetupCheckpointInfo
void SetupCheckpointInfo()

tesseract::STR_REPLACED
Definition: lstmtrainer.h:66

tesseract::LSTMTrainer::ReduceLearningRates
void ReduceLearningRates(LSTMTrainer *samples_trainer, STRING *log_msg)
Definition: lstmtrainer.cpp:620

tesseract::ET_COUNT
Definition: lstmtrainer.h:43

tesseract::LSTMTrainer::debug_interval_
int debug_interval_
Definition: lstmtrainer.h:398

tesseract::LSTMTrainer::InitCharSet
void InitCharSet(const UNICHARSET &unicharset, const STRING &script_dir, int train_flags)
Definition: lstmtrainer.cpp:138

tesseract::LSTMTrainer::Serialize
virtual bool Serialize(TFile *fp) const
Definition: lstmtrainer.cpp:433

tesseract::LSTMTrainer::ComputeWinnerError
double ComputeWinnerError(const NetworkIO &deltas)
Definition: lstmtrainer.cpp:1168

tesseract::LSTMRecognizer::null_char_
inT32 null_char_
Definition: lstmrecognizer.h:371

tesseract::LSTMTrainer::error_buffers_
GenericVector< double > error_buffers_[ET_COUNT]
Definition: lstmtrainer.h:473

tesseract::FileReader
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
Definition: genericvector.h:360

tesseract::LSTMTrainer::worst_iteration_
int worst_iteration_
Definition: lstmtrainer.h:434

tesseract::LSTMTrainer::serialize_amount_
SerializeAmount serialize_amount_
Definition: lstmtrainer.h:408

tesseract::LSTMTrainer::best_error_history_
GenericVector< double > best_error_history_
Definition: lstmtrainer.h:451

tesseract::LSTMTrainer::error_rates
const double * error_rates() const
Definition: lstmtrainer.h:140

size
voidpf void uLong size
Definition: ioapi.h:39

tesseract::LSTMTrainer::InitNetwork
bool InitNetwork(const STRING &network_spec, int append_index, int net_flags, float weight_range, float learning_rate, float momentum)
Definition: lstmtrainer.cpp:175

tesseract::LSTMTrainer::improvement_steps_
int improvement_steps_
Definition: lstmtrainer.h:454

tesseract::ImageData
Definition: imagedata.h:103

tesseract::LSTMTrainer::mutable_training_data
DocumentCache * mutable_training_data()
Definition: lstmtrainer.h:168

tesseract::LSTMRecognizer::GetUnicharset
const UNICHARSET & GetUnicharset() const
Definition: lstmrecognizer.h:143

tesseract::LSTMTrainer::best_iteration
int best_iteration() const
Definition: lstmtrainer.h:146

tesseract::LSTMTrainer::kRollingBufferSize_
static const int kRollingBufferSize_
Definition: lstmtrainer.h:472

tesseract::LSTMTrainer::LSTMTrainer
LSTMTrainer()
Definition: lstmtrainer.cpp:73

tesseract::LSTMTrainer::model_base_
STRING model_base_
Definition: lstmtrainer.h:402

tesseract::LSTMTrainer::target_win_
ScrollView * target_win_
Definition: lstmtrainer.h:392

tesseract::LSTMTrainer::LogIterations
void LogIterations(const char *intro_str, STRING *log_msg) const
Definition: lstmtrainer.cpp:414

tesseract::LSTMTrainer::TryLoadingCheckpoint
bool TryLoadingCheckpoint(const char *filename)
Definition: lstmtrainer.cpp:125

tesseract::DocumentCache
Definition: imagedata.h:312

tesseract::LSTMTrainer::stall_iteration_
int stall_iteration_
Definition: lstmtrainer.h:436

tesseract::LSTMRecognizer::sample_iteration_
inT32 sample_iteration_
Definition: lstmrecognizer.h:368

tesseract::LSTMTrainer::file_writer_
FileWriter file_writer_
Definition: lstmtrainer.h:415

tesseract::PERFECT
Definition: lstmtrainer.h:49

tesseract::ET_WORD_RECERR
Definition: lstmtrainer.h:40

tesseract::LSTMTrainer::DisplayTargets
void DisplayTargets(const NetworkIO &targets, const char *window_name, ScrollView **window)
Definition: lstmtrainer.cpp:1061

tesseract::Trainability
Trainability
Definition: lstmtrainer.h:47

tesseract
Definition: baseapi.cpp:82

tesseract::LSTMTrainer::training_data_
DocumentCache training_data_
Definition: lstmtrainer.h:406

tesscallback.h

imagedata.h

tesseract::TF_INT_MODE
Definition: lstmrecognizer.h:47

tesseract::LSTMTrainer::ActivationError
double ActivationError() const
Definition: lstmtrainer.h:136

GenericVector< char >

tesseract::LSTMTrainer::MaintainCheckpoints
bool MaintainCheckpoints(TestCallback tester, STRING *log_msg)
Definition: lstmtrainer.cpp:314

tesseract::LSTMTrainer::last_perfect_training_iteration_
int last_perfect_training_iteration_
Definition: lstmtrainer.h:469

tesseract::LSTMTrainer::set_perfect_delay
void set_perfect_delay(int delay)
Definition: lstmtrainer.h:151

tesseract::SubTrainerResult
SubTrainerResult
Definition: lstmtrainer.h:63

tesseract::LSTMTrainer::SaveTrainingDump
bool SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer *trainer, GenericVector< char > *data) const
Definition: lstmtrainer.cpp:909

tesseract::LSTMTrainer::UpdateSubtrainer
SubTrainerResult UpdateSubtrainer(STRING *log_msg)
Definition: lstmtrainer.cpp:577

tesseract::LSTMRecognizer::sample_iteration
int sample_iteration() const
Definition: lstmrecognizer.h:66

tesseract::LSTMTrainer::DebugLSTMTraining
bool DebugLSTMTraining(const NetworkIO &inputs, const ImageData &trainingdata, const NetworkIO &fwd_outputs, const GenericVector< int > &truth_labels, const NetworkIO &outputs)
Definition: lstmtrainer.cpp:1028

tesseract::LSTMTrainer::best_error_rate_
double best_error_rate_
Definition: lstmtrainer.h:424

tesseract::LSTMTrainer::ReadTrainingDump
bool ReadTrainingDump(const GenericVector< char > &data, LSTMTrainer *trainer)
Definition: lstmtrainer.cpp:919

tesseract::LSTMTrainer::sub_trainer_
LSTMTrainer * sub_trainer_
Definition: lstmtrainer.h:444

tesseract::FULL
Definition: lstmtrainer.h:59

tesseract::LSTMTrainer::error_rate_of_last_saved_best_
float error_rate_of_last_saved_best_
Definition: lstmtrainer.h:446

tesseract::LSTMTrainer::recon_win_
ScrollView * recon_win_
Definition: lstmtrainer.h:396

tesseract::Network::ConvertToInt
virtual void ConvertToInt()
Definition: network.h:177

tesseract::LSTMTrainer::ComputeErrorRates
double ComputeErrorRates(const NetworkIO &deltas, double char_error, double word_error)
Definition: lstmtrainer.cpp:1129

tesseract::LSTMTrainer::StartSubtrainer
void StartSubtrainer(STRING *log_msg)
Definition: lstmtrainer.cpp:547

STRING
Definition: strngs.h:45

tesseract::LSTMTrainer::NewSingleError
double NewSingleError(ErrorTypes type) const
Definition: lstmtrainer.h:154

TessResultCallback3
Definition: tesscallback.h:1702

tesseract::LSTMTrainer::training_data
const DocumentCache & training_data() const
Definition: lstmtrainer.h:165

tesseract::CheckPointWriter
TessResultCallback3< bool, SerializeAmount, const LSTMTrainer *, GenericVector< char > * > * CheckPointWriter
Definition: lstmtrainer.h:78

tesseract::SerializeAmount
SerializeAmount
Definition: lstmtrainer.h:56

tesseract::LSTMTrainer::SetSerializeMode
void SetSerializeMode(SerializeAmount serialize_amount) const
Definition: lstmtrainer.h:178

tesseract::LSTMRecognizer
Definition: lstmrecognizer.h:55

tesseract::LSTMTrainer::SaveBestModel
bool SaveBestModel(FileWriter writer) const

tesseract::LSTMTrainer::file_reader_
FileReader file_reader_
Definition: lstmtrainer.h:414

tesseract::LSTMTrainer::TrainOnLine
const ImageData * TrainOnLine(LSTMTrainer *samples_trainer, bool batch)
Definition: lstmtrainer.h:268

tesseract::LSTMTrainer::checkpoint_reader_
CheckPointReader checkpoint_reader_
Definition: lstmtrainer.h:418

tesseract::LSTMTrainer::checkpoint_iteration_
int checkpoint_iteration_
Definition: lstmtrainer.h:400

tesseract::LSTMRecognizer::null_char
int null_char() const
Definition: lstmrecognizer.h:156

tesseract::LSTMTrainer::learning_iteration
int learning_iteration() const
Definition: lstmtrainer.h:149

tesseract::LSTMTrainer::InitTensorFlowNetwork
int InitTensorFlowNetwork(const std::string &tf_proto)
Definition: lstmtrainer.cpp:203

tesseract::LSTMTrainer::best_iteration_
int best_iteration_
Definition: lstmtrainer.h:428

tesseract::LSTMTrainer::SetUnicharsetProperties
void SetUnicharsetProperties(const STRING &script_dir)
Definition: lstmtrainer.cpp:982

tesseract::LSTMTrainer::EmptyConstructor
void EmptyConstructor()
Definition: lstmtrainer.cpp:967

tesseract::LSTMTrainer::DumpFilename
STRING DumpFilename() const
Definition: lstmtrainer.cpp:951

tesseract::LSTMTrainer::FillErrorBuffer
void FillErrorBuffer(double new_error, ErrorTypes type)
Definition: lstmtrainer.cpp:960

tesseract::NetworkIO
Definition: networkio.h:39

tesseract::LSTMRecognizer::learning_rate
double learning_rate() const
Definition: lstmrecognizer.h:69

tesseract::LSTMTrainer::DeSerialize
virtual bool DeSerialize(TFile *fp)
Definition: lstmtrainer.cpp:483

tesseract::HI_PRECISION_ERR
Definition: lstmtrainer.h:51

tesseract::LSTMTrainer::best_model_data_
GenericVector< char > best_model_data_
Definition: lstmtrainer.h:438

tesseract::LSTMTrainer::ComputeRMSError
double ComputeRMSError(const NetworkIO &deltas)
Definition: lstmtrainer.cpp:1149

tesseract::ET_RMS
Definition: lstmtrainer.h:38

tesseract::LSTMTrainer::best_trainer
const GenericVector< char > & best_trainer() const
Definition: lstmtrainer.h:152

tesseract::LSTMTrainer::LoadAllTrainingData
bool LoadAllTrainingData(const GenericVector< STRING > &filenames)
Definition: lstmtrainer.cpp:305

tesseract::LSTMRecognizer::network_
Network * network_
Definition: lstmrecognizer.h:351

tesseract::LSTMTrainer::ReadSizedTrainingDump
bool ReadSizedTrainingDump(const char *data, int size)
Definition: lstmtrainer.cpp:924

tesseract::LSTMTrainer::ReadRecognitionDump
static LSTMRecognizer * ReadRecognitionDump(const GenericVector< char > &data)
Definition: lstmtrainer.cpp:940

tesseract::TFile
Definition: serialis.h:51

tesseract::LSTMTrainer::ComputeCharError
double ComputeCharError(const GenericVector< int > &truth_str, const GenericVector< int > &ocr_str)
Definition: lstmtrainer.cpp:1186

tesseract::LSTMTrainer::PrepareForBackward
Trainability PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs, NetworkIO *targets)
Definition: lstmtrainer.cpp:827

tesseract::FileWriter
bool(* FileWriter)(const GenericVector< char > &data, const STRING &filename)
Definition: genericvector.h:363

tesseract::UNENCODABLE
Definition: lstmtrainer.h:50

tesseract::LSTMTrainer::LastSingleError
double LastSingleError(ErrorTypes type) const
Definition: lstmtrainer.h:160

tesseract::LSTMTrainer::best_error_rates_
double best_error_rates_[ET_COUNT]
Definition: lstmtrainer.h:426

tesseract::UnicharCompress
Definition: unicharcompress.h:139

tesseract::ET_CHAR_ERROR
Definition: lstmtrainer.h:41

tesseract::LSTMTrainer::worst_error_rate_
double worst_error_rate_
Definition: lstmtrainer.h:430

tesseract::LSTMTrainer::UpdateErrorGraph
STRING UpdateErrorGraph(int iteration, double error_rate, const GenericVector< char > &model_data, TestCallback tester)
Definition: lstmtrainer.cpp:1279

tesseract::LSTMTrainer::worst_model_data_
GenericVector< char > worst_model_data_
Definition: lstmtrainer.h:439

tesseract::CheckPointReader
TessResultCallback2< bool, const GenericVector< char > &, LSTMTrainer * > * CheckPointReader
Definition: lstmtrainer.h:69

filename
const char * filename
Definition: ioapi.h:38

lstmrecognizer.h

int
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque

tesseract::LSTMRecognizer::IsRecoding
bool IsRecoding() const
Definition: lstmrecognizer.h:84

tesseract::LSTMTrainer::best_error_iterations_
GenericVector< int > best_error_iterations_
Definition: lstmtrainer.h:452

tesseract::STR_UPDATED
Definition: lstmtrainer.h:65

tesseract::LSTMTrainer::num_training_stages_
int num_training_stages_
Definition: lstmtrainer.h:412

tesseract::LIGHT
Definition: lstmtrainer.h:57

tesseract::LSTMTrainer::ComputeWordError
double ComputeWordError(STRING *truth_str, STRING *ocr_str)
Definition: lstmtrainer.cpp:1214

tesseract::LSTMTrainer::~LSTMTrainer
virtual ~LSTMTrainer()
Definition: lstmtrainer.cpp:113

tesseract::LSTMTrainer::prev_sample_iteration_
int prev_sample_iteration_
Definition: lstmtrainer.h:460

tesseract::ET_SKIP_RATIO
Definition: lstmtrainer.h:42

tesseract::TRAINABLE
Definition: lstmtrainer.h:48

TessResultCallback4
Definition: tesscallback.h:1716

tesseract::LSTMTrainer::ComputeTextTargets
bool ComputeTextTargets(const NetworkIO &outputs, const GenericVector< int > &truth_labels, NetworkIO *targets)
Definition: lstmtrainer.cpp:1098

tesseract::LSTMRecognizer::recoder_
UnicharCompress recoder_
Definition: lstmrecognizer.h:358

tesseract::NOT_BOXED
Definition: lstmtrainer.h:52

tesseract::LSTMTrainer::CharError
double CharError() const
Definition: lstmtrainer.h:139

tesseract::LSTMTrainer::learning_iteration_
int learning_iteration_
Definition: lstmtrainer.h:458

tesseract::ErrorTypes
ErrorTypes
Definition: lstmtrainer.h:37

tesseract::LSTMTrainer::UpdateErrorBuffer
void UpdateErrorBuffer(double new_error, ErrorTypes type)
Definition: lstmtrainer.cpp:1247

tesseract::LSTMTrainer::TransitionTrainingStage
bool TransitionTrainingStage(float error_threshold)
Definition: lstmtrainer.cpp:423

tesseract::LSTMTrainer::RollErrorBuffers
void RollErrorBuffers()
Definition: lstmtrainer.cpp:1260

tesseract::LSTMTrainer::improvement_steps
int improvement_steps() const
Definition: lstmtrainer.h:150

tesseract::LSTMTrainer::EncodeString
bool EncodeString(const STRING &str, GenericVector< int > *labels) const
Definition: lstmtrainer.h:247

tesseract::LSTMTrainer::ConvertToInt
void ConvertToInt()
Definition: lstmtrainer.h:257

tesseract::ET_DELTA
Definition: lstmtrainer.h:39

tesseract::LSTMTrainer::ReduceLayerLearningRates
int ReduceLayerLearningRates(double factor, int num_samples, LSTMTrainer *samples_trainer)
Definition: lstmtrainer.cpp:639

tesseract::LSTMRecognizer::training_iteration
int training_iteration() const
Definition: lstmrecognizer.h:63

tesseract::LSTMTrainer::MaintainCheckpointsSpecific
bool MaintainCheckpointsSpecific(int iteration, const GenericVector< char > *train_model, const GenericVector< char > *rec_model, TestCallback tester, STRING *log_msg)

tesseract::LSTMRecognizer::SimpleTextOutput
bool SimpleTextOutput() const
Definition: lstmrecognizer.h:81

tesseract::LSTMTrainer::perfect_delay_
int perfect_delay_
Definition: lstmtrainer.h:466

tesseract::LSTMTrainer::DebugNetwork
void DebugNetwork()
Definition: lstmtrainer.cpp:298

tesseract::LSTMTrainer::training_stage_
int training_stage_
Definition: lstmtrainer.h:448

TessResultCallback2
Definition: tesscallback.h:1688

tesseract::LSTMTrainer::align_win_
ScrollView * align_win_
Definition: lstmtrainer.h:390

tesseract::LSTMTrainer
Definition: lstmtrainer.h:89

ScrollView
Definition: scrollview.h:102

tesseract::TestCallback
TessResultCallback4< STRING, int, const double *, const GenericVector< char > &, int > * TestCallback
Definition: lstmtrainer.h:83

tesseract::LSTMTrainer::best_model_name_
STRING best_model_name_
Definition: lstmtrainer.h:410

tesseract::LSTMTrainer::SaveRecognitionDump
void SaveRecognitionDump(GenericVector< char > *data) const
Definition: lstmtrainer.cpp:931

rect.h

tesseract::LSTMTrainer::GridSearchDictParams
Trainability GridSearchDictParams(const ImageData *trainingdata, int iteration, double min_dict_ratio, double dict_ratio_step, double max_dict_ratio, double min_cert_offset, double cert_offset_step, double max_cert_offset, STRING *results)
Definition: lstmtrainer.cpp:248