tesseract/a01049_source.html

 // File:        lstmrecognizer.h
 // Description: Top-level line recognizer class for LSTM-based networks.
 // Author:      Ray Smith
 // Created:     Thu May 02 08:57:06 PST 2013
 //
 // (C) Copyright 2013, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef TESSERACT_LSTM_LSTMRECOGNIZER_H_
 #define TESSERACT_LSTM_LSTMRECOGNIZER_H_

 #include "ccutil.h"
 #include "helpers.h"
 #include "imagedata.h"
 #include "matrix.h"
 #include "network.h"
 #include "networkscratch.h"
 #include "recodebeam.h"
 #include "series.h"
 #include "strngs.h"
 #include "unicharcompress.h"

 class BLOB_CHOICE_IT;
 struct Pix;
 class ROW_RES;
 class ScrollView;
 class TBOX;
 class WERD_RES;

 namespace tesseract {

 class Dict;
 class ImageData;

 // Enum indicating training mode control flags.
 enum TrainingFlags {
   TF_INT_MODE = 1,
   TF_AUTO_HARDEN = 2,
   TF_ROUND_ROBIN_TRAINING = 16,
   TF_COMPRESS_UNICHARSET = 64,
 };

 // Top-level line recognizer class for LSTM-based networks.
 // Note that a sub-class, LSTMTrainer is used for training.
 class LSTMRecognizer {
  public:
   LSTMRecognizer();
   ~LSTMRecognizer();

   int NumOutputs() const {
     return network_->NumOutputs();
   }
   int training_iteration() const {
     return training_iteration_;
   }
   int sample_iteration() const {
     return sample_iteration_;
   }
   double learning_rate() const {
     return learning_rate_;
   }
   bool IsHardening() const {
     return (training_flags_ & TF_AUTO_HARDEN) != 0;
   }
   LossType OutputLossType() const {
     if (network_ == nullptr) return LT_NONE;
     StaticShape shape;
     shape = network_->OutputShape(shape);
     return shape.loss_type();
   }
   bool SimpleTextOutput() const { return OutputLossType() == LT_SOFTMAX; }
   bool IsIntMode() const { return (training_flags_ & TF_INT_MODE) != 0; }
   // True if recoder_ is active to re-encode text to a smaller space.
   bool IsRecoding() const {
     return (training_flags_ & TF_COMPRESS_UNICHARSET) != 0;
   }
   // Returns the cache strategy for the DocumentCache.
   CachingStrategy CacheStrategy() const {
     return training_flags_ & TF_ROUND_ROBIN_TRAINING ? CS_ROUND_ROBIN
                                                      : CS_SEQUENTIAL;
   }
   // Returns true if the network is a TensorFlow network.
   bool IsTensorFlow() const { return network_->type() == NT_TENSORFLOW; }
   // Returns a vector of layer ids that can be passed to other layer functions
   // to access a specific layer.
   GenericVector<STRING> EnumerateLayers() const {
     ASSERT_HOST(network_ != NULL && network_->type() == NT_SERIES);
     Series* series = static_cast<Series*>(network_);
     GenericVector<STRING> layers;
     series->EnumerateLayers(NULL, &layers);
     return layers;
   }
   // Returns a specific layer from its id (from EnumerateLayers).
   Network* GetLayer(const STRING& id) const {
     ASSERT_HOST(network_ != NULL && network_->type() == NT_SERIES);
     ASSERT_HOST(id.length() > 1 && id[0] == ':');
     Series* series = static_cast<Series*>(network_);
     return series->GetLayer(&id[1]);
   }
   // Returns the learning rate of the layer from its id.
   float GetLayerLearningRate(const STRING& id) const {
     ASSERT_HOST(network_ != NULL && network_->type() == NT_SERIES);
     if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
       ASSERT_HOST(id.length() > 1 && id[0] == ':');
       Series* series = static_cast<Series*>(network_);
       return series->LayerLearningRate(&id[1]);
     } else {
       return learning_rate_;
     }
   }
   // Multiplies the all the learning rate(s) by the given factor.
   void ScaleLearningRate(double factor) {
     ASSERT_HOST(network_ != NULL && network_->type() == NT_SERIES);
     learning_rate_ *= factor;
     if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
       GenericVector<STRING> layers = EnumerateLayers();
       for (int i = 0; i < layers.size(); ++i) {
         ScaleLayerLearningRate(layers[i], factor);
       }
     }
   }
   // Multiplies the learning rate of the layer with id, by the given factor.
   void ScaleLayerLearningRate(const STRING& id, double factor) {
     ASSERT_HOST(network_ != NULL && network_->type() == NT_SERIES);
     ASSERT_HOST(id.length() > 1 && id[0] == ':');
     Series* series = static_cast<Series*>(network_);
     series->ScaleLayerLearningRate(&id[1], factor);
   }

   // True if the network is using adagrad to train.
   bool IsUsingAdaGrad() const { return network_->TestFlag(NF_ADA_GRAD); }
   // Provides access to the UNICHARSET that this classifier works with.
   const UNICHARSET& GetUnicharset() const { return ccutil_.unicharset; }
   // Provides access to the Dict that this classifier works with.
   const Dict* GetDict() const { return dict_; }
   // Sets the sample iteration to the given value. The sample_iteration_
   // determines the seed for the random number generator. The training
   // iteration is incremented only by a successful training iteration.
   void SetIteration(int iteration) {
     sample_iteration_ = iteration;
   }
   // Accessors for textline image normalization.
   int NumInputs() const {
     return network_->NumInputs();
   }
   int null_char() const { return null_char_; }

   // Writes to the given file. Returns false in case of error.
   bool Serialize(TFile* fp) const;
   // Reads from the given file. Returns false in case of error.
   bool DeSerialize(TFile* fp);
   // Loads the dictionary if possible from the traineddata file.
   // Prints a warning message, and returns false but otherwise fails silently
   // and continues to work without it if loading fails.
   // Note that dictionary load is independent from DeSerialize, but dependent
   // on the unicharset matching. This enables training to deserialize a model
   // from checkpoint or restore without having to go back and reload the
   // dictionary.
   bool LoadDictionary(const char* lang, TessdataManager* mgr);

   // Recognizes the line image, contained within image_data, returning the
   // ratings matrix and matching box_word for each WERD_RES in the output.
   // If invert, tries inverted as well if the normal interpretation doesn't
   // produce a good enough result. If use_alternates, the ratings matrix is
   // filled with segmentation and classifier alternatives that may be searched
   // using the standard beam search, otherwise, just a diagonal and prebuilt
   // best_choice. The line_box is used for computing the box_word in the
   // output words. Score_ratio is used to determine the classifier alternates.
   // If one_word, then a single WERD_RES is formed, regardless of the spaces
   // found during recognition.
   // If not NULL, we attempt to translate the output to target_unicharset, but
   // do not guarantee success, due to mismatches. In that case the output words
   // are marked with our UNICHARSET, not the caller's.
   void RecognizeLine(const ImageData& image_data, bool invert, bool debug,
                      double worst_dict_cert, bool use_alternates,
                      const UNICHARSET* target_unicharset, const TBOX& line_box,
                      float score_ratio, bool one_word,
                      PointerVector<WERD_RES>* words);
   // Builds a set of tesseract-compatible WERD_RESs aligned to line_box,
   // corresponding to the network output in outputs, labels, label_coords.
   // one_word generates a single word output, that may include spaces inside.
   // use_alternates generates alternative BLOB_CHOICEs and segmentation paths,
   // with cut-offs determined by scale_factor.
   // If not NULL, we attempt to translate the output to target_unicharset, but
   // do not guarantee success, due to mismatches. In that case the output words
   // are marked with our UNICHARSET, not the caller's.
   void WordsFromOutputs(const NetworkIO& outputs,
                         const GenericVector<int>& labels,
                         const GenericVector<int> label_coords,
                         const TBOX& line_box, bool debug, bool use_alternates,
                         bool one_word, float score_ratio, float scale_factor,
                         const UNICHARSET* target_unicharset,
                         PointerVector<WERD_RES>* words);

   // Helper computes min and mean best results in the output.
   void OutputStats(const NetworkIO& outputs,
                    float* min_output, float* mean_output, float* sd);
   // Recognizes the image_data, returning the labels,
   // scores, and corresponding pairs of start, end x-coords in coords.
   // If label_threshold is positive, uses it for making the labels, otherwise
   // uses standard ctc. Returned in scale_factor is the reduction factor
   // between the image and the output coords, for computing bounding boxes.
   // If re_invert is true, the input is inverted back to its original
   // photometric interpretation if inversion is attempted but fails to
   // improve the results. This ensures that outputs contains the correct
   // forward outputs for the best photometric interpretation.
   // inputs is filled with the used inputs to the network, and if not null,
   // target boxes is filled with scaled truth boxes if present in image_data.
   bool RecognizeLine(const ImageData& image_data, bool invert, bool debug,
                      bool re_invert, float label_threshold, float* scale_factor,
                      NetworkIO* inputs, NetworkIO* outputs);
   // Returns a tesseract-compatible WERD_RES from the line recognizer outputs.
   // line_box should be the bounding box of the line image in the main image,
   // outputs the output of the network,
   // [word_start, word_end) the interval over which to convert,
   // score_ratio for choosing alternate classifier choices,
   // use_alternates to control generation of alternative segmentations,
   // labels, label_coords, scale_factor from RecognizeLine above.
   // If target_unicharset is not NULL, attempts to translate the internal
   // unichar_ids to the target_unicharset, but falls back to untranslated ids
   // if the translation should fail.
   WERD_RES* WordFromOutput(const TBOX& line_box, const NetworkIO& outputs,
                            int word_start, int word_end, float score_ratio,
                            float space_certainty, bool debug,
                            bool use_alternates,
                            const UNICHARSET* target_unicharset,
                            const GenericVector<int>& labels,
                            const GenericVector<int>& label_coords,
                            float scale_factor);
   // Sets up a word with the ratings matrix and fake blobs with boxes in the
   // right places.
   WERD_RES* InitializeWord(const TBOX& line_box, int word_start, int word_end,
                            float space_certainty, bool use_alternates,
                            const UNICHARSET* target_unicharset,
                            const GenericVector<int>& labels,
                            const GenericVector<int>& label_coords,
                            float scale_factor);

   // Converts an array of labels to utf-8, whether or not the labels are
   // augmented with character boundaries.
   STRING DecodeLabels(const GenericVector<int>& labels);

   // Displays the forward results in a window with the characters and
   // boundaries as determined by the labels and label_coords.
   void DisplayForward(const NetworkIO& inputs,
                       const GenericVector<int>& labels,
                       const GenericVector<int>& label_coords,
                       const char* window_name,
                       ScrollView** window);

  protected:
   // Sets the random seed from the sample_iteration_;
   void SetRandomSeed() {
     inT64 seed = static_cast<inT64>(sample_iteration_) * 0x10000001;
     randomizer_.set_seed(seed);
     randomizer_.IntRand();
   }

   // Displays the labels and cuts at the corresponding xcoords.
   // Size of labels should match xcoords.
   void DisplayLSTMOutput(const GenericVector<int>& labels,
                          const GenericVector<int>& xcoords,
                          int height, ScrollView* window);

   // Prints debug output detailing the activation path that is implied by the
   // xcoords.
   void DebugActivationPath(const NetworkIO& outputs,
                            const GenericVector<int>& labels,
                            const GenericVector<int>& xcoords);

   // Prints debug output detailing activations and 2nd choice over a range
   // of positions.
   void DebugActivationRange(const NetworkIO& outputs, const char* label,
                             int best_choice, int x_start, int x_end);

   // Converts the network output to a sequence of labels. Outputs labels, scores
   // and start xcoords of each char, and each null_char_, with an additional
   // final xcoord for the end of the output.
   // The conversion method is determined by internal state.
   void LabelsFromOutputs(const NetworkIO& outputs, float null_thr,
                          GenericVector<int>* labels,
                          GenericVector<int>* xcoords);
   // Converts the network output to a sequence of labels, using a threshold
   // on the null_char_ to determine character boundaries. Outputs labels, scores
   // and start xcoords of each char, and each null_char_, with an additional
   // final xcoord for the end of the output.
   // The label output is the one with the highest score in the interval between
   // null_chars_.
   void LabelsViaThreshold(const NetworkIO& output,
                           float null_threshold,
                           GenericVector<int>* labels,
                           GenericVector<int>* xcoords);
   // Converts the network output to a sequence of labels, with scores and
   // start x-coords of the character labels. Retains the null_char_ character as
   // the end x-coord, where already present, otherwise the start of the next
   // character is the end.
   // The number of labels, scores, and xcoords is always matched, except that
   // there is always an additional xcoord for the last end position.
   void LabelsViaCTC(const NetworkIO& output,
                     GenericVector<int>* labels,
                     GenericVector<int>* xcoords);
   // As LabelsViaCTC except that this function constructs the best path that
   // contains only legal sequences of subcodes for recoder_.
   void LabelsViaReEncode(const NetworkIO& output, GenericVector<int>* labels,
                          GenericVector<int>* xcoords);
   // Converts the network output to a sequence of labels, with scores, using
   // the simple character model (each position is a char, and the null_char_ is
   // mainly intended for tail padding.)
   void LabelsViaSimpleText(const NetworkIO& output,
                            GenericVector<int>* labels,
                            GenericVector<int>* xcoords);

   // Helper returns a BLOB_CHOICE_LIST for the choices in a given x-range.
   // Handles either LSTM labels or direct unichar-ids.
   // Score ratio determines the worst ratio between top choice and remainder.
   // If target_unicharset is not NULL, attempts to translate to the target
   // unicharset, returning NULL on failure.
   BLOB_CHOICE_LIST* GetBlobChoices(int col, int row, bool debug,
                                    const NetworkIO& output,
                                    const UNICHARSET* target_unicharset,
                                    int x_start, int x_end, float score_ratio);

   // Adds to the given iterator, the blob choices for the target_unicharset
   // that correspond to the given LSTM unichar_id.
   // Returns false if unicharset translation failed.
   bool AddBlobChoices(int unichar_id, float rating, float certainty, int col,
                       int row, const UNICHARSET* target_unicharset,
                       BLOB_CHOICE_IT* bc_it);

   // Returns a string corresponding to the label starting at start. Sets *end
   // to the next start and if non-null, *decoded to the unichar id.
   const char* DecodeLabel(const GenericVector<int>& labels, int start, int* end,
                           int* decoded);

   // Returns a string corresponding to a given single label id, falling back to
   // a default of ".." for part of a multi-label unichar-id.
   const char* DecodeSingleLabel(int label);

  protected:
   // The network hierarchy.
   Network* network_;
   // The unicharset. Only the unicharset element is serialized.
   // Has to be a CCUtil, so Dict can point to it.
   CCUtil ccutil_;
   // For backward compatibility, recoder_ is serialized iff
   // training_flags_ & TF_COMPRESS_UNICHARSET.
   // Further encode/decode ccutil_.unicharset's ids to simplify the unicharset.
   UnicharCompress recoder_;

   // ==Training parameters that are serialized to provide a record of them.==
   STRING network_str_;
   // Flags used to determine the training method of the network.
   // See enum TrainingFlags above.
   inT32 training_flags_;
   // Number of actual backward training steps used.
   inT32 training_iteration_;
   // Index into training sample set. sample_iteration >= training_iteration_.
   inT32 sample_iteration_;
   // Index in softmax of null character. May take the value UNICHAR_BROKEN or
   // ccutil_.unicharset.size().
   inT32 null_char_;
   // Range used for the initial random numbers in the weights.
   float weight_range_;
   // Learning rate and momentum multipliers of deltas in backprop.
   float learning_rate_;
   float momentum_;

   // === NOT SERIALIZED.
   TRand randomizer_;
   NetworkScratch scratch_space_;
   // Language model (optional) to use with the beam search.
   Dict* dict_;
   // Beam search held between uses to optimize memory allocation/use.
   RecodeBeamSearch* search_;

   // == Debugging parameters.==
   // Recognition debug display window.
   ScrollView* debug_win_;
 };

 }  // namespace tesseract.

 #endif  // TESSERACT_LSTM_LSTMRECOGNIZER_H_
helpers.h

tesseract::LSTMRecognizer::GetLayerLearningRate
float GetLayerLearningRate(const STRING &id) const
Definition: lstmrecognizer.h:111

tesseract::TF_AUTO_HARDEN
Definition: lstmrecognizer.h:48

tesseract::StaticShape::loss_type
LossType loss_type() const
Definition: static_shape.h:48

tesseract::NetworkScratch
Definition: networkscratch.h:36

UNICHARSET
Definition: unicharset.h:139

tesseract::LSTMRecognizer::training_flags_
inT32 training_flags_
Definition: lstmrecognizer.h:364

tesseract::TF_COMPRESS_UNICHARSET
Definition: lstmrecognizer.h:50

tesseract::Network
Definition: network.h:105

inT64
int64_t inT64
Definition: host.h:40

inT32
int32_t inT32
Definition: host.h:38

tesseract::CCUtil
Definition: ccutil.h:51

tesseract::LSTMRecognizer::GetBlobChoices
BLOB_CHOICE_LIST * GetBlobChoices(int col, int row, bool debug, const NetworkIO &output, const UNICHARSET *target_unicharset, int x_start, int x_end, float score_ratio)
Definition: lstmrecognizer.cpp:689

tesseract::LSTMRecognizer::RecognizeLine
void RecognizeLine(const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, bool use_alternates, const UNICHARSET *target_unicharset, const TBOX &line_box, float score_ratio, bool one_word, PointerVector< WERD_RES > *words)
Definition: lstmrecognizer.cpp:146

tesseract-c_api-demo.lang
string lang
Definition: tesseract-c_api-demo.py:28

tesseract::LT_NONE
Definition: static_shape.h:28

tesseract::TRand::IntRand
inT32 IntRand()
Definition: helpers.h:55

tesseract::LSTMRecognizer::OutputLossType
LossType OutputLossType() const
Definition: lstmrecognizer.h:75

tesseract::LSTMRecognizer::null_char_
inT32 null_char_
Definition: lstmrecognizer.h:371

tesseract::CS_ROUND_ROBIN
Definition: imagedata.h:52

tesseract::LSTMRecognizer::network_str_
STRING network_str_
Definition: lstmrecognizer.h:361

tesseract::CS_SEQUENTIAL
Definition: imagedata.h:47

tesseract::LSTMRecognizer::weight_range_
float weight_range_
Definition: lstmrecognizer.h:373

tesseract::ImageData
Definition: imagedata.h:103

tesseract::LSTMRecognizer::IsTensorFlow
bool IsTensorFlow() const
Definition: lstmrecognizer.h:93

tesseract::Network::type
NetworkType type() const
Definition: network.h:112

tesseract::LSTMRecognizer::NumInputs
int NumInputs() const
Definition: lstmrecognizer.h:153

unicharcompress.h

tesseract::LSTMRecognizer::WordFromOutput
WERD_RES * WordFromOutput(const TBOX &line_box, const NetworkIO &outputs, int word_start, int word_end, float score_ratio, float space_certainty, bool debug, bool use_alternates, const UNICHARSET *target_unicharset, const GenericVector< int > &labels, const GenericVector< int > &label_coords, float scale_factor)
Definition: lstmrecognizer.cpp:332

tesseract::LSTMRecognizer::GetUnicharset
const UNICHARSET & GetUnicharset() const
Definition: lstmrecognizer.h:143

tesseract::LSTMRecognizer::randomizer_
TRand randomizer_
Definition: lstmrecognizer.h:379

ccutil.h

tesseract::Plumbing::GetLayer
Network * GetLayer(const char *id) const
Definition: plumbing.cpp:148

tesseract::LSTMRecognizer::scratch_space_
NetworkScratch scratch_space_
Definition: lstmrecognizer.h:380

tesseract::LSTMRecognizer::learning_rate_
float learning_rate_
Definition: lstmrecognizer.h:375

tesseract::LSTMRecognizer::DeSerialize
bool DeSerialize(TFile *fp)
Definition: lstmrecognizer.cpp:91

tesseract::LSTMRecognizer::IsHardening
bool IsHardening() const
Definition: lstmrecognizer.h:72

strngs.h

tesseract::LSTMRecognizer::IsUsingAdaGrad
bool IsUsingAdaGrad() const
Definition: lstmrecognizer.h:141

tesseract::LSTMRecognizer::ScaleLearningRate
void ScaleLearningRate(double factor)
Definition: lstmrecognizer.h:122

tesseract::LSTMRecognizer::dict_
Dict * dict_
Definition: lstmrecognizer.h:382

tesseract::LSTMRecognizer::EnumerateLayers
GenericVector< STRING > EnumerateLayers() const
Definition: lstmrecognizer.h:96

tesseract::LSTMRecognizer::sample_iteration_
inT32 sample_iteration_
Definition: lstmrecognizer.h:368

tesseract::TessdataManager
Definition: tessdatamanager.h:121

GenericVector::size
int size() const
Definition: genericvector.h:72

tesseract
Definition: baseapi.cpp:82

tesseract::LSTMRecognizer::ccutil_
CCUtil ccutil_
Definition: lstmrecognizer.h:354

tesseract::LSTMRecognizer::momentum_
float momentum_
Definition: lstmrecognizer.h:376

imagedata.h

tesseract::LSTMRecognizer::LoadDictionary
bool LoadDictionary(const char *lang, TessdataManager *mgr)
Definition: lstmrecognizer.cpp:131

ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:84

tesseract::TF_INT_MODE
Definition: lstmrecognizer.h:47

GenericVector< STRING >

network.h

tesseract::LSTMRecognizer::WordsFromOutputs
void WordsFromOutputs(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > label_coords, const TBOX &line_box, bool debug, bool use_alternates, bool one_word, float score_ratio, float scale_factor, const UNICHARSET *target_unicharset, PointerVector< WERD_RES > *words)
Definition: lstmrecognizer.cpp:185

networkscratch.h

tesseract::LSTMRecognizer::CacheStrategy
CachingStrategy CacheStrategy() const
Definition: lstmrecognizer.h:88

series.h

tesseract::LSTMRecognizer::sample_iteration
int sample_iteration() const
Definition: lstmrecognizer.h:66

tesseract::LSTMRecognizer::LSTMRecognizer
LSTMRecognizer()
Definition: lstmrecognizer.cpp:52

tesseract::TRand
Definition: helpers.h:41

tesseract::NT_TENSORFLOW
Definition: network.h:78

tesseract::LSTMRecognizer::DecodeSingleLabel
const char * DecodeSingleLabel(int label)
Definition: lstmrecognizer.cpp:804

STRING
Definition: strngs.h:45

tesseract::LSTMRecognizer::LabelsViaReEncode
void LabelsViaReEncode(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:653

tesseract::NF_ADA_GRAD
Definition: network.h:88

tesseract::LT_SOFTMAX
Definition: static_shape.h:30

tesseract::LSTMRecognizer
Definition: lstmrecognizer.h:55

tesseract::LSTMRecognizer::NumOutputs
int NumOutputs() const
Definition: lstmrecognizer.h:60

tesseract::LSTMRecognizer::GetDict
const Dict * GetDict() const
Definition: lstmrecognizer.h:145

tesseract::LSTMRecognizer::LabelsViaSimpleText
void LabelsViaSimpleText(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:667

tesseract::Network::TestFlag
bool TestFlag(NetworkFlags flag) const
Definition: network.h:144

tesseract::LSTMRecognizer::LabelsFromOutputs
void LabelsFromOutputs(const NetworkIO &outputs, float null_thr, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:572

tesseract::LSTMRecognizer::GetLayer
Network * GetLayer(const STRING &id) const
Definition: lstmrecognizer.h:104

tesseract::LSTMRecognizer::DebugActivationRange
void DebugActivationRange(const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)
Definition: lstmrecognizer.cpp:531

WERD_RES
Definition: pageres.h:155

tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:68

tesseract::LSTMRecognizer::null_char
int null_char() const
Definition: lstmrecognizer.h:156

tesseract::Network::OutputShape
virtual StaticShape OutputShape(const StaticShape &input_shape) const
Definition: network.h:133

recodebeam.h

tesseract::NetworkIO
Definition: networkio.h:39

tesseract::LSTMRecognizer::learning_rate
double learning_rate() const
Definition: lstmrecognizer.h:69

tesseract::Plumbing::ScaleLayerLearningRate
void ScaleLayerLearningRate(const char *id, double factor)
Definition: plumbing.h:108

tesseract::RecodeBeamSearch
Definition: recodebeam.h:176

tesseract::NT_SERIES
Definition: network.h:54

tesseract::LSTMRecognizer::SetRandomSeed
void SetRandomSeed()
Definition: lstmrecognizer.h:263

tesseract::LSTMRecognizer::network_
Network * network_
Definition: lstmrecognizer.h:351

TBOX
Definition: rect.h:30

tesseract::TrainingFlags
TrainingFlags
Definition: lstmrecognizer.h:46

tesseract::LSTMRecognizer::LabelsViaCTC
void LabelsViaCTC(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:633

tesseract::TFile
Definition: serialis.h:51

tesseract::LSTMRecognizer::DisplayForward
void DisplayForward(const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char *window_name, ScrollView **window)
Definition: lstmrecognizer.cpp:461

tesseract::LSTMRecognizer::DebugActivationPath
void DebugActivationPath(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)
Definition: lstmrecognizer.cpp:504

tesseract::LSTMRecognizer::debug_win_
ScrollView * debug_win_
Definition: lstmrecognizer.h:388

tesseract::CachingStrategy
CachingStrategy
Definition: imagedata.h:40

tesseract::UnicharCompress
Definition: unicharcompress.h:139

tesseract::Network::NumInputs
int NumInputs() const
Definition: network.h:120

tesseract::LSTMRecognizer::IsRecoding
bool IsRecoding() const
Definition: lstmrecognizer.h:84

tesseract::LSTMRecognizer::SetIteration
void SetIteration(int iteration)
Definition: lstmrecognizer.h:149

tesseract::StaticShape
Definition: static_shape.h:36

tesseract::LSTMRecognizer::OutputStats
void OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output, float *sd)
Definition: lstmrecognizer.cpp:236

tesseract::LSTMRecognizer::AddBlobChoices
bool AddBlobChoices(int unichar_id, float rating, float certainty, int col, int row, const UNICHARSET *target_unicharset, BLOB_CHOICE_IT *bc_it)
Definition: lstmrecognizer.cpp:739

tesseract::LSTMRecognizer::LabelsViaThreshold
void LabelsViaThreshold(const NetworkIO &output, float null_threshold, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:592

tesseract::Dict
Definition: dict.h:87

tesseract::PointerVector< WERD_RES >

tesseract::TRand::set_seed
void set_seed(uinT64 seed)
Definition: helpers.h:45

tesseract::LSTMRecognizer::recoder_
UnicharCompress recoder_
Definition: lstmrecognizer.h:358

tesseract::LSTMRecognizer::~LSTMRecognizer
~LSTMRecognizer()
Definition: lstmrecognizer.cpp:65

tesseract::LSTMRecognizer::DecodeLabels
STRING DecodeLabels(const GenericVector< int > &labels)
Definition: lstmrecognizer.cpp:446

tesseract::TF_ROUND_ROBIN_TRAINING
Definition: lstmrecognizer.h:49

tesseract::LossType
LossType
Definition: static_shape.h:27

tesseract::Network::NumOutputs
int NumOutputs() const
Definition: network.h:123

tesseract::Series
Definition: series.h:27

ROW_RES
Definition: pageres.h:125

tesseract::LSTMRecognizer::DisplayLSTMOutput
void DisplayLSTMOutput(const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)
Definition: lstmrecognizer.cpp:477

tesseract::LSTMRecognizer::training_iteration
int training_iteration() const
Definition: lstmrecognizer.h:63

tesseract::LSTMRecognizer::training_iteration_
inT32 training_iteration_
Definition: lstmrecognizer.h:366

tesseract::LSTMRecognizer::InitializeWord
WERD_RES * InitializeWord(const TBOX &line_box, int word_start, int word_end, float space_certainty, bool use_alternates, const UNICHARSET *target_unicharset, const GenericVector< int > &labels, const GenericVector< int > &label_coords, float scale_factor)
Definition: lstmrecognizer.cpp:398

tesseract::LSTMRecognizer::SimpleTextOutput
bool SimpleTextOutput() const
Definition: lstmrecognizer.h:81

tesseract::LSTMRecognizer::Serialize
bool Serialize(TFile *fp) const
Definition: lstmrecognizer.cpp:72

tesseract::LSTMRecognizer::IsIntMode
bool IsIntMode() const
Definition: lstmrecognizer.h:82

matrix.h

tesseract::LSTMRecognizer::DecodeLabel
const char * DecodeLabel(const GenericVector< int > &labels, int start, int *end, int *decoded)
Definition: lstmrecognizer.cpp:762

tesseract::LSTMRecognizer::search_
RecodeBeamSearch * search_
Definition: lstmrecognizer.h:384

ScrollView
Definition: scrollview.h:102

tesseract::NF_LAYER_SPECIFIC_LR
Definition: network.h:87

tesseract::Plumbing::LayerLearningRate
float LayerLearningRate(const char *id) const
Definition: plumbing.h:102

tesseract::LSTMRecognizer::ScaleLayerLearningRate
void ScaleLayerLearningRate(const STRING &id, double factor)
Definition: lstmrecognizer.h:133