#include <lstmtrainer.h>

Inheritance diagram for tesseract::LSTMTrainer:

Public Member Functions
	LSTMTrainer ()

	LSTMTrainer (FileReader file_reader, FileWriter file_writer, CheckPointReader checkpoint_reader, CheckPointWriter checkpoint_writer, const char model_base, const char checkpoint_name, int debug_interval, inT64 max_memory)

virtual	~LSTMTrainer ()

bool	TryLoadingCheckpoint (const char *filename)

void	InitCharSet (const UNICHARSET &unicharset, const STRING &script_dir, int train_flags)

void	InitCharSet (const UNICHARSET &unicharset, const UnicharCompress &recoder)

bool	InitNetwork (const STRING &network_spec, int append_index, int net_flags, float weight_range, float learning_rate, float momentum)

int	InitTensorFlowNetwork (const std::string &tf_proto)

void	InitIterations ()

double	ActivationError () const

double	CharError () const

const double *	error_rates () const

double	best_error_rate () const

int	best_iteration () const

int	learning_iteration () const

int	improvement_steps () const

void	set_perfect_delay (int delay)

const GenericVector< char > &	best_trainer () const

double	NewSingleError (ErrorTypes type) const

double	LastSingleError (ErrorTypes type) const

const DocumentCache &	training_data () const

DocumentCache *	mutable_training_data ()

Trainability	GridSearchDictParams (const ImageData trainingdata, int iteration, double min_dict_ratio, double dict_ratio_step, double max_dict_ratio, double min_cert_offset, double cert_offset_step, double max_cert_offset, STRING results)

void	SetSerializeMode (SerializeAmount serialize_amount) const

void	DebugNetwork ()

bool	LoadAllTrainingData (const GenericVector< STRING > &filenames)

bool	MaintainCheckpoints (TestCallback tester, STRING *log_msg)

bool	MaintainCheckpointsSpecific (int iteration, const GenericVector< char > train_model, const GenericVector< char > rec_model, TestCallback tester, STRING *log_msg)

void	PrepareLogMsg (STRING *log_msg) const

void	LogIterations (const char intro_str, STRING log_msg) const

bool	TransitionTrainingStage (float error_threshold)

int	CurrentTrainingStage () const

virtual bool	Serialize (TFile *fp) const

virtual bool	DeSerialize (TFile *fp)

void	StartSubtrainer (STRING *log_msg)

SubTrainerResult	UpdateSubtrainer (STRING *log_msg)

void	ReduceLearningRates (LSTMTrainer samples_trainer, STRING log_msg)

int	ReduceLayerLearningRates (double factor, int num_samples, LSTMTrainer *samples_trainer)

bool	EncodeString (const STRING &str, GenericVector< int > *labels) const

void	ConvertToInt ()

const ImageData *	TrainOnLine (LSTMTrainer *samples_trainer, bool batch)

Trainability	TrainOnLine (const ImageData *trainingdata, bool batch)

Trainability	PrepareForBackward (const ImageData trainingdata, NetworkIO fwd_outputs, NetworkIO *targets)

bool	SaveTrainingDump (SerializeAmount serialize_amount, const LSTMTrainer trainer, GenericVector< char > data) const

bool	ReadTrainingDump (const GenericVector< char > &data, LSTMTrainer *trainer)

bool	ReadSizedTrainingDump (const char *data, int size)

void	SetupCheckpointInfo ()

void	SaveRecognitionDump (GenericVector< char > *data) const

bool	SaveBestModel (FileWriter writer) const

STRING	DumpFilename () const

void	FillErrorBuffer (double new_error, ErrorTypes type)

Public Member Functions inherited from tesseract::LSTMRecognizer
	LSTMRecognizer ()

	~LSTMRecognizer ()

int	NumOutputs () const

int	training_iteration () const

int	sample_iteration () const

double	learning_rate () const

bool	IsHardening () const

LossType	OutputLossType () const

bool	SimpleTextOutput () const

bool	IsIntMode () const

bool	IsRecoding () const

CachingStrategy	CacheStrategy () const

bool	IsTensorFlow () const

GenericVector< STRING >	EnumerateLayers () const

Network *	GetLayer (const STRING &id) const

float	GetLayerLearningRate (const STRING &id) const

void	ScaleLearningRate (double factor)

void	ScaleLayerLearningRate (const STRING &id, double factor)

bool	IsUsingAdaGrad () const

const UNICHARSET &	GetUnicharset () const

const Dict *	GetDict () const

void	SetIteration (int iteration)

int	NumInputs () const

int	null_char () const

bool	Serialize (TFile *fp) const

bool	DeSerialize (TFile *fp)

bool	LoadDictionary (const char lang, TessdataManager mgr)

void	RecognizeLine (const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, bool use_alternates, const UNICHARSET target_unicharset, const TBOX &line_box, float score_ratio, bool one_word, PointerVector< WERD_RES > words)

void	WordsFromOutputs (const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > label_coords, const TBOX &line_box, bool debug, bool use_alternates, bool one_word, float score_ratio, float scale_factor, const UNICHARSET target_unicharset, PointerVector< WERD_RES > words)

void	OutputStats (const NetworkIO &outputs, float min_output, float mean_output, float *sd)

bool	RecognizeLine (const ImageData &image_data, bool invert, bool debug, bool re_invert, float label_threshold, float scale_factor, NetworkIO inputs, NetworkIO *outputs)

WERD_RES *	WordFromOutput (const TBOX &line_box, const NetworkIO &outputs, int word_start, int word_end, float score_ratio, float space_certainty, bool debug, bool use_alternates, const UNICHARSET *target_unicharset, const GenericVector< int > &labels, const GenericVector< int > &label_coords, float scale_factor)

WERD_RES *	InitializeWord (const TBOX &line_box, int word_start, int word_end, float space_certainty, bool use_alternates, const UNICHARSET *target_unicharset, const GenericVector< int > &labels, const GenericVector< int > &label_coords, float scale_factor)

STRING	DecodeLabels (const GenericVector< int > &labels)

void	DisplayForward (const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char window_name, ScrollView *window)

Static Public Member Functions
static bool	EncodeString (const STRING &str, const UNICHARSET &unicharset, const UnicharCompress recoder, bool simple_text, int null_char, GenericVector< int > labels)

static LSTMRecognizer *	ReadRecognitionDump (const GenericVector< char > &data)

Protected Member Functions
void	EmptyConstructor ()

void	SetUnicharsetProperties (const STRING &script_dir)

bool	DebugLSTMTraining (const NetworkIO &inputs, const ImageData &trainingdata, const NetworkIO &fwd_outputs, const GenericVector< int > &truth_labels, const NetworkIO &outputs)

void	DisplayTargets (const NetworkIO &targets, const char window_name, ScrollView *window)

bool	ComputeTextTargets (const NetworkIO &outputs, const GenericVector< int > &truth_labels, NetworkIO *targets)

bool	ComputeCTCTargets (const GenericVector< int > &truth_labels, NetworkIO outputs, NetworkIO targets)

double	ComputeErrorRates (const NetworkIO &deltas, double char_error, double word_error)

double	ComputeRMSError (const NetworkIO &deltas)

double	ComputeWinnerError (const NetworkIO &deltas)

double	ComputeCharError (const GenericVector< int > &truth_str, const GenericVector< int > &ocr_str)

double	ComputeWordError (STRING truth_str, STRING ocr_str)

void	UpdateErrorBuffer (double new_error, ErrorTypes type)

void	RollErrorBuffers ()

STRING	UpdateErrorGraph (int iteration, double error_rate, const GenericVector< char > &model_data, TestCallback tester)

Protected Member Functions inherited from tesseract::LSTMRecognizer
void	SetRandomSeed ()

void	DisplayLSTMOutput (const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)

void	DebugActivationPath (const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)

void	DebugActivationRange (const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)

void	LabelsFromOutputs (const NetworkIO &outputs, float null_thr, GenericVector< int > labels, GenericVector< int > xcoords)

void	LabelsViaThreshold (const NetworkIO &output, float null_threshold, GenericVector< int > labels, GenericVector< int > xcoords)

void	LabelsViaCTC (const NetworkIO &output, GenericVector< int > labels, GenericVector< int > xcoords)

void	LabelsViaReEncode (const NetworkIO &output, GenericVector< int > labels, GenericVector< int > xcoords)

void	LabelsViaSimpleText (const NetworkIO &output, GenericVector< int > labels, GenericVector< int > xcoords)

BLOB_CHOICE_LIST *	GetBlobChoices (int col, int row, bool debug, const NetworkIO &output, const UNICHARSET *target_unicharset, int x_start, int x_end, float score_ratio)

bool	AddBlobChoices (int unichar_id, float rating, float certainty, int col, int row, const UNICHARSET target_unicharset, BLOB_CHOICE_IT bc_it)

const char *	DecodeLabel (const GenericVector< int > &labels, int start, int end, int decoded)

const char *	DecodeSingleLabel (int label)

Protected Attributes
ScrollView *	align_win_

ScrollView *	target_win_

ScrollView *	ctc_win_

ScrollView *	recon_win_

int	debug_interval_

int	checkpoint_iteration_

STRING	model_base_

STRING	checkpoint_name_

DocumentCache	training_data_

SerializeAmount	serialize_amount_

STRING	best_model_name_

int	num_training_stages_

FileReader	file_reader_

FileWriter	file_writer_

CheckPointReader	checkpoint_reader_

CheckPointWriter	checkpoint_writer_

double	best_error_rate_

double	best_error_rates_ [ET_COUNT]

int	best_iteration_

double	worst_error_rate_

double	worst_error_rates_ [ET_COUNT]

int	worst_iteration_

int	stall_iteration_

GenericVector< char >	best_model_data_

GenericVector< char >	worst_model_data_

GenericVector< char >	best_trainer_

LSTMTrainer *	sub_trainer_

float	error_rate_of_last_saved_best_

int	training_stage_

GenericVector< double >	best_error_history_

GenericVector< int >	best_error_iterations_

int	improvement_steps_

int	learning_iteration_

int	prev_sample_iteration_

int	perfect_delay_

int	last_perfect_training_iteration_

GenericVector< double >	error_buffers_ [ET_COUNT]

double	error_rates_ [ET_COUNT]

Protected Attributes inherited from tesseract::LSTMRecognizer
Network *	network_

CCUtil	ccutil_

UnicharCompress	recoder_

STRING	network_str_

inT32	training_flags_

inT32	training_iteration_

inT32	sample_iteration_

inT32	null_char_

float	weight_range_

float	learning_rate_

float	momentum_

TRand	randomizer_

NetworkScratch	scratch_space_

Dict *	dict_

RecodeBeamSearch *	search_

ScrollView *	debug_win_

Static Protected Attributes
static const int	kRollingBufferSize_ = 1000

Detailed Description

Definition at line 89 of file lstmtrainer.h.

Constructor & Destructor Documentation

◆ LSTMTrainer() [1/2]

tesseract::LSTMTrainer::LSTMTrainer ( )

Definition at line 73 of file lstmtrainer.cpp.

     : training_data_(0),
       file_reader_(LoadDataFromFile),
       file_writer_(SaveDataToFile),
       checkpoint_reader_(
           NewPermanentTessCallback(this, &LSTMTrainer::ReadTrainingDump)),
       checkpoint_writer_(
           NewPermanentTessCallback(this, &LSTMTrainer::SaveTrainingDump)),
       sub_trainer_(NULL) {
   EmptyConstructor();
   debug_interval_ = 0;
 }

◆ LSTMTrainer() [2/2]

tesseract::LSTMTrainer::LSTMTrainer	(	FileReader	file_reader,
		FileWriter	file_writer,
		CheckPointReader	checkpoint_reader,
		CheckPointWriter	checkpoint_writer,
		const char *	model_base,
		const char *	checkpoint_name,
		int	debug_interval,
		inT64	max_memory
	)

Definition at line 86 of file lstmtrainer.cpp.

     : training_data_(max_memory),
       file_reader_(file_reader),
       file_writer_(file_writer),
       checkpoint_reader_(checkpoint_reader),
       checkpoint_writer_(checkpoint_writer),
       sub_trainer_(NULL) {
   EmptyConstructor();
   if (file_reader_ == NULL) file_reader_ = LoadDataFromFile;
   if (file_writer_ == NULL) file_writer_ = SaveDataToFile;
   if (checkpoint_reader_ == NULL) {
     checkpoint_reader_ =
         NewPermanentTessCallback(this, &LSTMTrainer::ReadTrainingDump);
   }
   if (checkpoint_writer_ == NULL) {
     checkpoint_writer_ =
         NewPermanentTessCallback(this, &LSTMTrainer::SaveTrainingDump);
   }
   debug_interval_ = debug_interval;
   model_base_ = model_base;
   checkpoint_name_ = checkpoint_name;
 }

◆ ~LSTMTrainer()

tesseract::LSTMTrainer::~LSTMTrainer ( )

virtual

Definition at line 113 of file lstmtrainer.cpp.

                           {
   delete align_win_;
   delete target_win_;
   delete ctc_win_;
   delete recon_win_;
   delete checkpoint_reader_;
   delete checkpoint_writer_;
   delete sub_trainer_;
 }

Member Function Documentation

◆ ActivationError()

double tesseract::LSTMTrainer::ActivationError ( ) const

inline

Definition at line 136 of file lstmtrainer.h.

                                  {
     return error_rates_[ET_DELTA];
   }

◆ best_error_rate()

double tesseract::LSTMTrainer::best_error_rate ( ) const

inline

Definition at line 143 of file lstmtrainer.h.

                                  {
     return best_error_rate_;
   }

◆ best_iteration()

int tesseract::LSTMTrainer::best_iteration ( ) const

inline

Definition at line 146 of file lstmtrainer.h.

                              {
     return best_iteration_;
   }

◆ best_trainer()

const GenericVector<char>& tesseract::LSTMTrainer::best_trainer ( ) const

inline

Definition at line 152 of file lstmtrainer.h.

152 { return best_trainer_; }

tesseract::LSTMTrainer::best_trainer_

GenericVector< char > best_trainer_

Definition: lstmtrainer.h:441

◆ CharError()

double tesseract::LSTMTrainer::CharError ( ) const

inline

Definition at line 139 of file lstmtrainer.h.

139 { return error_rates_[ET_CHAR_ERROR]; }

tesseract::LSTMTrainer::error_rates_

double error_rates_[ET_COUNT]

Definition: lstmtrainer.h:475

tesseract::ET_CHAR_ERROR

Definition: lstmtrainer.h:41

◆ ComputeCharError()

double tesseract::LSTMTrainer::ComputeCharError	(	const GenericVector< int > &	truth_str,
		const GenericVector< int > &	ocr_str
	)

protected

Definition at line 1186 of file lstmtrainer.cpp.

                                                                         {
   GenericVector<int> label_counts;
   label_counts.init_to_size(NumOutputs(), 0);
   int truth_size = 0;
   for (int i = 0; i < truth_str.size(); ++i) {
     if (truth_str[i] != null_char_) {
       ++label_counts[truth_str[i]];
       ++truth_size;
     }
   }
   for (int i = 0; i < ocr_str.size(); ++i) {
     if (ocr_str[i] != null_char_) {
       --label_counts[ocr_str[i]];
     }
   }
   int char_errors = 0;
   for (int i = 0; i < label_counts.size(); ++i) {
     char_errors += abs(label_counts[i]);
   }
   if (truth_size == 0) {
     return (char_errors == 0) ? 0.0 : 1.0;
   }
   return static_cast<double>(char_errors) / truth_size;
 }

◆ ComputeCTCTargets()

bool tesseract::LSTMTrainer::ComputeCTCTargets	(	const GenericVector< int > &	truth_labels,
		NetworkIO *	outputs,
		NetworkIO *	targets
	)

protected

Definition at line 1118 of file lstmtrainer.cpp.

                                                                             {
   // Bottom-clip outputs to a minimum probability.
   CTC::NormalizeProbs(outputs);
   return CTC::ComputeCTCTargets(truth_labels, null_char_,
                                 outputs->float_array(), targets);
 }

◆ ComputeErrorRates()

double tesseract::LSTMTrainer::ComputeErrorRates	(	const NetworkIO &	deltas,
		double	char_error,
		double	word_error
	)

protected

Definition at line 1129 of file lstmtrainer.cpp.

                                                                             {
   UpdateErrorBuffer(ComputeRMSError(deltas), ET_RMS);
   // Delta error is the fraction of timesteps with >0.5 error in the top choice
   // score. If zero, then the top choice characters are guaranteed correct,
   // even when there is residue in the RMS error.
   double delta_error = ComputeWinnerError(deltas);
   UpdateErrorBuffer(delta_error, ET_DELTA);
   UpdateErrorBuffer(word_error, ET_WORD_RECERR);
   UpdateErrorBuffer(char_error, ET_CHAR_ERROR);
   // Skip ratio measures the difference between sample_iteration_ and
   // training_iteration_, which reflects the number of unusable samples,
   // usually due to unencodable truth text, or the text not fitting in the
   // space for the output.
   double skip_count = sample_iteration_ - prev_sample_iteration_;
   UpdateErrorBuffer(skip_count, ET_SKIP_RATIO);
   return delta_error;
 }

◆ ComputeRMSError()

double tesseract::LSTMTrainer::ComputeRMSError ( const NetworkIO & deltas )

protected

Definition at line 1149 of file lstmtrainer.cpp.

                                                            {
   double total_error = 0.0;
   int width = deltas.Width();
   int num_classes = deltas.NumFeatures();
   for (int t = 0; t < width; ++t) {
     const float* class_errs = deltas.f(t);
     for (int c = 0; c < num_classes; ++c) {
       double error = class_errs[c];
       total_error += error * error;
     }
   }
   return sqrt(total_error / (width * num_classes));
 }

◆ ComputeTextTargets()

bool tesseract::LSTMTrainer::ComputeTextTargets	(	const NetworkIO &	outputs,
		const GenericVector< int > &	truth_labels,
		NetworkIO *	targets
	)

protected

Definition at line 1098 of file lstmtrainer.cpp.

                                                          {
   if (truth_labels.size() > targets->Width()) {
     tprintf("Error: transcription %s too long to fit into target of width %d\n",
             DecodeLabels(truth_labels).string(), targets->Width());
     return false;
   }
   for (int i = 0; i < truth_labels.size() && i < targets->Width(); ++i) {
     targets->SetActivations(i, truth_labels[i], 1.0);
   }
   for (int i = truth_labels.size(); i < targets->Width(); ++i) {
     targets->SetActivations(i, null_char_, 1.0);
   }
   return true;
 }

◆ ComputeWinnerError()

double tesseract::LSTMTrainer::ComputeWinnerError ( const NetworkIO & deltas )

protected

Definition at line 1168 of file lstmtrainer.cpp.

                                                               {
   int num_errors = 0;
   int width = deltas.Width();
   int num_classes = deltas.NumFeatures();
   for (int t = 0; t < width; ++t) {
     const float* class_errs = deltas.f(t);
     for (int c = 0; c < num_classes; ++c) {
       float abs_delta = fabs(class_errs[c]);
       // TODO(rays) Filtering cases where the delta is very large to cut out
       // GT errors doesn't work. Find a better way or get better truth.
       if (0.5 <= abs_delta)
         ++num_errors;
     }
   }
   return static_cast<double>(num_errors) / width;
 }

◆ ComputeWordError()

double tesseract::LSTMTrainer::ComputeWordError	(	STRING *	truth_str,
		STRING *	ocr_str
	)

protected

Definition at line 1214 of file lstmtrainer.cpp.

                                                                        {
   typedef std::unordered_map<std::string, int, std::hash<std::string> > StrMap;
   GenericVector<STRING> truth_words, ocr_words;
   truth_str->split(' ', &truth_words);
   if (truth_words.empty()) return 0.0;
   ocr_str->split(' ', &ocr_words);
   StrMap word_counts;
   for (int i = 0; i < truth_words.size(); ++i) {
     std::string truth_word(truth_words[i].string());
     StrMap::iterator it = word_counts.find(truth_word);
     if (it == word_counts.end())
       word_counts.insert(std::make_pair(truth_word, 1));
     else
       ++it->second;
   }
   for (int i = 0; i < ocr_words.size(); ++i) {
     std::string ocr_word(ocr_words[i].string());
     StrMap::iterator it = word_counts.find(ocr_word);
     if (it == word_counts.end())
       word_counts.insert(std::make_pair(ocr_word, -1));
     else
       --it->second;
   }
   int word_recall_errs = 0;
   for (StrMap::const_iterator it = word_counts.begin(); it != word_counts.end();
        ++it) {
     if (it->second > 0) word_recall_errs += it->second;
   }
   return static_cast<double>(word_recall_errs) / truth_words.size();
 }

◆ ConvertToInt()

void tesseract::LSTMTrainer::ConvertToInt ( )

inline

Definition at line 257 of file lstmtrainer.h.

                       {
     if ((training_flags_ & TF_INT_MODE) == 0) {
       network_->ConvertToInt();
       training_flags_ |= TF_INT_MODE;
     }
   }

◆ CurrentTrainingStage()

int tesseract::LSTMTrainer::CurrentTrainingStage ( ) const

inline

Definition at line 213 of file lstmtrainer.h.

213 { return training_stage_; }

tesseract::LSTMTrainer::training_stage_

int training_stage_

Definition: lstmtrainer.h:448

◆ DebugLSTMTraining()

bool tesseract::LSTMTrainer::DebugLSTMTraining	(	const NetworkIO &	inputs,
		const ImageData &	trainingdata,
		const NetworkIO &	fwd_outputs,
		const GenericVector< int > &	truth_labels,
		const NetworkIO &	outputs
	)

protected

Definition at line 1028 of file lstmtrainer.cpp.

                                                               {
   const STRING& truth_text = DecodeLabels(truth_labels);
   if (truth_text.string() == NULL || truth_text.length() <= 0) {
     tprintf("Empty truth string at decode time!\n");
     return false;
   }
   if (debug_interval_ != 0) {
     // Get class labels, xcoords and string.
     GenericVector<int> labels;
     GenericVector<int> xcoords;
     LabelsFromOutputs(outputs, 0.0f, &labels, &xcoords);
     STRING text = DecodeLabels(labels);
     tprintf("Iteration %d: ALIGNED TRUTH : %s\n",
             training_iteration(), text.string());
     if (debug_interval_ > 0 && training_iteration() % debug_interval_ == 0) {
       tprintf("TRAINING activation path for truth string %s\n",
               truth_text.string());
       DebugActivationPath(outputs, labels, xcoords);
       DisplayForward(inputs, labels, xcoords, "LSTMTraining", &align_win_);
       if (OutputLossType() == LT_CTC) {
         DisplayTargets(fwd_outputs, "CTC Outputs", &ctc_win_);
         DisplayTargets(outputs, "CTC Targets", &target_win_);
       }
     }
   }
   return true;
 }

◆ DebugNetwork()

void tesseract::LSTMTrainer::DebugNetwork ( )

Definition at line 298 of file lstmtrainer.cpp.

                                {
   network_->DebugWeights();
 }

◆ DeSerialize()

bool tesseract::LSTMTrainer::DeSerialize ( TFile * fp )

virtual

Definition at line 483 of file lstmtrainer.cpp.

                                        {
   if (!LSTMRecognizer::DeSerialize(fp)) return false;
   if (fp->FRead(&learning_iteration_, sizeof(learning_iteration_), 1) != 1) {
     // Special case. If we successfully decoded the recognizer, but fail here
     // then it means we were just given a recognizer, so issue a warning and
     // allow it.
     tprintf("Warning: LSTMTrainer deserialized an LSTMRecognizer!\n");
     learning_iteration_ = 0;
     network_->SetEnableTraining(TS_ENABLED);
     return true;
   }
   if (fp->FReadEndian(&prev_sample_iteration_, sizeof(prev_sample_iteration_),
                       1) != 1)
     return false;
   if (fp->FReadEndian(&perfect_delay_, sizeof(perfect_delay_), 1) != 1)
     return false;
   if (fp->FReadEndian(&last_perfect_training_iteration_,
                       sizeof(last_perfect_training_iteration_), 1) != 1)
     return false;
   for (int i = 0; i < ET_COUNT; ++i) {
     if (!error_buffers_[i].DeSerialize(fp)) return false;
   }
   if (fp->FRead(&error_rates_, sizeof(error_rates_), 1) != 1) return false;
   if (fp->FReadEndian(&training_stage_, sizeof(training_stage_), 1) != 1)
     return false;
   uinT8 amount;
   if (fp->FRead(&amount, sizeof(amount), 1) != 1) return false;
   if (amount == LIGHT) return true;  // Don't read the rest.
   if (fp->FReadEndian(&best_error_rate_, sizeof(best_error_rate_), 1) != 1)
     return false;
   if (fp->FReadEndian(&best_error_rates_, sizeof(best_error_rates_), 1) != 1)
     return false;
   if (fp->FReadEndian(&best_iteration_, sizeof(best_iteration_), 1) != 1)
     return false;
   if (fp->FReadEndian(&worst_error_rate_, sizeof(worst_error_rate_), 1) != 1)
     return false;
   if (fp->FReadEndian(&worst_error_rates_, sizeof(worst_error_rates_), 1) != 1)
     return false;
   if (fp->FReadEndian(&worst_iteration_, sizeof(worst_iteration_), 1) != 1)
     return false;
   if (fp->FReadEndian(&stall_iteration_, sizeof(stall_iteration_), 1) != 1)
     return false;
   if (!best_model_data_.DeSerialize(fp)) return false;
   if (!worst_model_data_.DeSerialize(fp)) return false;
   if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false;
   GenericVector<char> sub_data;
   if (!sub_data.DeSerialize(fp)) return false;
   delete sub_trainer_;
   if (sub_data.empty()) {
     sub_trainer_ = NULL;
   } else {
     sub_trainer_ = new LSTMTrainer();
     if (!ReadTrainingDump(sub_data, sub_trainer_)) return false;
   }
   if (!best_error_history_.DeSerialize(fp)) return false;
   if (!best_error_iterations_.DeSerialize(fp)) return false;
   if (fp->FReadEndian(&improvement_steps_, sizeof(improvement_steps_), 1) != 1)
     return false;
   return true;
 }

◆ DisplayTargets()

void tesseract::LSTMTrainer::DisplayTargets	(	const NetworkIO &	targets,
		const char *	window_name,
		ScrollView **	window
	)

protected

Definition at line 1061 of file lstmtrainer.cpp.

                                                                                {
 #ifndef GRAPHICS_DISABLED  // do nothing if there's no graphics.
   int width = targets.Width();
   int num_features = targets.NumFeatures();
   Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale,
                        window);
   for (int c = 0; c < num_features; ++c) {
     int color = c % (ScrollView::GREEN_YELLOW - 1) + 2;
     (*window)->Pen(static_cast<ScrollView::Color>(color));
     int start_t = -1;
     for (int t = 0; t < width; ++t) {
       double target = targets.f(t)[c];
       target *= kTargetYScale;
       if (target >= 1) {
         if (start_t < 0) {
           (*window)->SetCursor(t - 1, 0);
           start_t = t;
         }
         (*window)->DrawTo(t, target);
       } else if (start_t >= 0) {
         (*window)->DrawTo(t, 0);
         (*window)->DrawTo(start_t - 1, 0);
         start_t = -1;
       }
     }
     if (start_t >= 0) {
       (*window)->DrawTo(width, 0);
       (*window)->DrawTo(start_t - 1, 0);
     }
   }
   (*window)->Update();
 #endif  // GRAPHICS_DISABLED
 }

◆ DumpFilename()

STRING tesseract::LSTMTrainer::DumpFilename ( ) const

Definition at line 951 of file lstmtrainer.cpp.

                                        {
   STRING filename;
   filename.add_str_double(model_base_.string(), best_error_rate_);
   filename.add_str_int("_", best_iteration_);
   filename += ".lstm";
   return filename;
 }

◆ EmptyConstructor()

void tesseract::LSTMTrainer::EmptyConstructor ( )

protected

Definition at line 967 of file lstmtrainer.cpp.

                                    {
   align_win_ = NULL;
   target_win_ = NULL;
   ctc_win_ = NULL;
   recon_win_ = NULL;
   checkpoint_iteration_ = 0;
   serialize_amount_ = FULL;
   training_stage_ = 0;
   num_training_stages_ = 2;
   InitIterations();
 }

◆ EncodeString() [1/2]

bool tesseract::LSTMTrainer::EncodeString	(	const STRING &	str,
		GenericVector< int > *	labels
	)		const

inline

Definition at line 247 of file lstmtrainer.h.

                                                                          {
     return EncodeString(str, GetUnicharset(), IsRecoding() ? &recoder_ : NULL,
                         SimpleTextOutput(), null_char_, labels);
   }

◆ EncodeString() [2/2]

bool tesseract::LSTMTrainer::EncodeString	(	const STRING &	str,
		const UNICHARSET &	unicharset,
		const UnicharCompress *	recoder,
		bool	simple_text,
		int	null_char,
		GenericVector< int > *	labels
	)

static

Definition at line 748 of file lstmtrainer.cpp.

                                                                           {
   if (str.string() == NULL || str.length() <= 0) {
     tprintf("Empty truth string!\n");
     return false;
   }
   int err_index;
   GenericVector<int> internal_labels;
   labels->truncate(0);
   if (!simple_text) labels->push_back(null_char);
   if (unicharset.encode_string(str.string(), true, &internal_labels, NULL,
                                &err_index)) {
     bool success = true;
     for (int i = 0; i < internal_labels.size(); ++i) {
       if (recoder != NULL) {
         // Re-encode labels via recoder.
         RecodedCharID code;
         int len = recoder->EncodeUnichar(internal_labels[i], &code);
         if (len > 0) {
           for (int j = 0; j < len; ++j) {
             labels->push_back(code(j));
             if (!simple_text) labels->push_back(null_char);
           }
         } else {
           success = false;
           err_index = 0;
           break;
         }
       } else {
         labels->push_back(internal_labels[i]);
         if (!simple_text) labels->push_back(null_char);
       }
     }
     if (success) return true;
   }
   tprintf("Encoding of string failed! Failure bytes:");
   while (err_index < str.length()) {
     tprintf(" %x", str[err_index++]);
   }
   tprintf("\n");
   return false;
 }

◆ error_rates()

const double* tesseract::LSTMTrainer::error_rates ( ) const

inline

Definition at line 140 of file lstmtrainer.h.

                                     {
     return error_rates_;
   }

◆ FillErrorBuffer()

void tesseract::LSTMTrainer::FillErrorBuffer	(	double	new_error,
		ErrorTypes	type
	)

Definition at line 960 of file lstmtrainer.cpp.

                                                                    {
   for (int i = 0; i < kRollingBufferSize_; ++i)
     error_buffers_[type][i] = new_error;
   error_rates_[type] = 100.0 * new_error;
 }

◆ GridSearchDictParams()

Trainability tesseract::LSTMTrainer::GridSearchDictParams	(	const ImageData *	trainingdata,
		int	iteration,
		double	min_dict_ratio,
		double	dict_ratio_step,
		double	max_dict_ratio,
		double	min_cert_offset,
		double	cert_offset_step,
		double	max_cert_offset,
		STRING *	results
	)

Definition at line 248 of file lstmtrainer.cpp.

                                                                       {
   sample_iteration_ = iteration;
   NetworkIO fwd_outputs, targets;
   Trainability result =
       PrepareForBackward(trainingdata, &fwd_outputs, &targets);
   if (result == UNENCODABLE || result == HI_PRECISION_ERR || dict_ == NULL)
     return result;
 
   // Encode/decode the truth to get the normalization.
   GenericVector<int> truth_labels, ocr_labels, xcoords;
   ASSERT_HOST(EncodeString(trainingdata->transcription(), &truth_labels));
   // NO-dict error.
   RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), NULL);
   base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty,
                      NULL);
   base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
   STRING truth_text = DecodeLabels(truth_labels);
   STRING ocr_text = DecodeLabels(ocr_labels);
   double baseline_error = ComputeWordError(&truth_text, &ocr_text);
   results->add_str_double("0,0=", baseline_error);
 
   RecodeBeamSearch search(recoder_, null_char_, SimpleTextOutput(), dict_);
   for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) {
     for (double c = min_cert_offset; c < max_cert_offset;
          c += cert_offset_step) {
       search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty, NULL);
       search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
       truth_text = DecodeLabels(truth_labels);
       ocr_text = DecodeLabels(ocr_labels);
       // This is destructive on both strings.
       double word_error = ComputeWordError(&truth_text, &ocr_text);
       if ((r == min_dict_ratio && c == min_cert_offset) ||
           !std::isfinite(word_error)) {
         STRING t = DecodeLabels(truth_labels);
         STRING o = DecodeLabels(ocr_labels);
         tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c,
                 t.string(), o.string(), word_error, truth_labels[0]);
       }
       results->add_str_double(" ", r);
       results->add_str_double(",", c);
       results->add_str_double("=", word_error);
     }
   }
   return result;
 }

◆ improvement_steps()

int tesseract::LSTMTrainer::improvement_steps ( ) const

inline

Definition at line 150 of file lstmtrainer.h.

150 { return improvement_steps_; }

tesseract::LSTMTrainer::improvement_steps_

int improvement_steps_

Definition: lstmtrainer.h:454

◆ InitCharSet() [1/2]

void tesseract::LSTMTrainer::InitCharSet	(	const UNICHARSET &	unicharset,
		const STRING &	script_dir,
		int	train_flags
	)

Definition at line 138 of file lstmtrainer.cpp.

                                                                          {
   EmptyConstructor();
   training_flags_ = train_flags;
   ccutil_.unicharset.CopyFrom(unicharset);
   null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN
                                                    : GetUnicharset().size();
   SetUnicharsetProperties(script_dir);
 }

◆ InitCharSet() [2/2]

void tesseract::LSTMTrainer::InitCharSet	(	const UNICHARSET &	unicharset,
		const UnicharCompress &	recoder
	)

Definition at line 152 of file lstmtrainer.cpp.

                                                               {
   EmptyConstructor();
   int flags = TF_COMPRESS_UNICHARSET;
   training_flags_ = static_cast<TrainingFlags>(flags);
   ccutil_.unicharset.CopyFrom(unicharset);
   recoder_ = recoder;
   null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN
                                                    : GetUnicharset().size();
   RecodedCharID code;
   recoder_.EncodeUnichar(null_char_, &code);
   null_char_ = code(0);
   // Space should encode as itself.
   recoder_.EncodeUnichar(UNICHAR_SPACE, &code);
   ASSERT_HOST(code(0) == UNICHAR_SPACE);
 }

◆ InitIterations()

void tesseract::LSTMTrainer::InitIterations ( )

Definition at line 223 of file lstmtrainer.cpp.

                                  {
   sample_iteration_ = 0;
   training_iteration_ = 0;
   learning_iteration_ = 0;
   prev_sample_iteration_ = 0;
   best_error_rate_ = 100.0;
   best_iteration_ = 0;
   worst_error_rate_ = 0.0;
   worst_iteration_ = 0;
   stall_iteration_ = kMinStallIterations;
   improvement_steps_ = kMinStallIterations;
   perfect_delay_ = 0;
   last_perfect_training_iteration_ = 0;
   for (int i = 0; i < ET_COUNT; ++i) {
     best_error_rates_[i] = 100.0;
     worst_error_rates_[i] = 0.0;
     error_buffers_[i].init_to_size(kRollingBufferSize_, 0.0);
     error_rates_[i] = 100.0;
   }
   error_rate_of_last_saved_best_ = kMinStartedErrorRate;
 }

◆ InitNetwork()

bool tesseract::LSTMTrainer::InitNetwork	(	const STRING &	network_spec,
		int	append_index,
		int	net_flags,
		float	weight_range,
		float	learning_rate,
		float	momentum
	)

Definition at line 175 of file lstmtrainer.cpp.

                                                                    {
   // Call after InitCharSet.
   ASSERT_HOST(GetUnicharset().size() > SPECIAL_UNICHAR_CODES_COUNT);
   weight_range_ = weight_range;
   learning_rate_ = learning_rate;
   momentum_ = momentum;
   int num_outputs = null_char_ == GetUnicharset().size()
                         ? null_char_ + 1
                         : GetUnicharset().size();
   if (IsRecoding()) num_outputs = recoder_.code_range();
   if (!NetworkBuilder::InitNetwork(num_outputs, network_spec, append_index,
                                    net_flags, weight_range, &randomizer_,
                                    &network_)) {
     return false;
   }
   network_str_ += network_spec;
   tprintf("Built network:%s from request %s\n",
           network_->spec().string(), network_spec.string());
   tprintf("Training parameters:\n  Debug interval = %d,"
           " weights = %g, learning rate = %g, momentum=%g\n",
           debug_interval_, weight_range_, learning_rate_, momentum_);
   return true;
 }

◆ InitTensorFlowNetwork()

int tesseract::LSTMTrainer::InitTensorFlowNetwork ( const std::string & tf_proto )

Definition at line 203 of file lstmtrainer.cpp.

                                                                 {
 #ifdef INCLUDE_TENSORFLOW
   delete network_;
   TFNetwork* tf_net = new TFNetwork("TensorFlow");
   training_iteration_ = tf_net->InitFromProtoStr(tf_proto);
   if (training_iteration_ == 0) {
     tprintf("InitFromProtoStr failed!!\n");
     return 0;
   }
   network_ = tf_net;
   ASSERT_HOST(recoder_.code_range() == tf_net->num_classes());
   return training_iteration_;
 #else
   tprintf("TensorFlow not compiled in! -DINCLUDE_TENSORFLOW\n");
   return 0;
 #endif
 }

◆ LastSingleError()

double tesseract::LSTMTrainer::LastSingleError ( ErrorTypes type ) const

inline

Definition at line 160 of file lstmtrainer.h.

                                                 {
     return error_buffers_[type]
                          [(training_iteration() + kRollingBufferSize_ - 1) %
                           kRollingBufferSize_];
   }

◆ learning_iteration()

int tesseract::LSTMTrainer::learning_iteration ( ) const

inline

Definition at line 149 of file lstmtrainer.h.

149 { return learning_iteration_; }

tesseract::LSTMTrainer::learning_iteration_

int learning_iteration_

Definition: lstmtrainer.h:458

◆ LoadAllTrainingData()

bool tesseract::LSTMTrainer::LoadAllTrainingData ( const GenericVector< STRING > & filenames )

Definition at line 305 of file lstmtrainer.cpp.

                                                                             {
   training_data_.Clear();
   return training_data_.LoadDocuments(filenames, CacheStrategy(), file_reader_);
 }

◆ LogIterations()

void tesseract::LSTMTrainer::LogIterations	(	const char *	intro_str,
		STRING *	log_msg
	)		const

Definition at line 414 of file lstmtrainer.cpp.

                                                                             {
   *log_msg += intro_str;
   log_msg->add_str_int(" iteration ", learning_iteration());
   log_msg->add_str_int("/", training_iteration());
   log_msg->add_str_int("/", sample_iteration());
 }

◆ MaintainCheckpoints()

bool tesseract::LSTMTrainer::MaintainCheckpoints	(	TestCallback	tester,
		STRING *	log_msg
	)

Definition at line 314 of file lstmtrainer.cpp.

                                                                           {
   PrepareLogMsg(log_msg);
   double error_rate = CharError();
   int iteration = learning_iteration();
   if (iteration >= stall_iteration_ &&
       error_rate > best_error_rate_ * (1.0 + kSubTrainerMarginFraction) &&
       best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
     // It hasn't got any better in a long while, and is a margin worse than the
     // best, so go back to the best model and try a different learning rate.
     StartSubtrainer(log_msg);
   }
   SubTrainerResult sub_trainer_result = STR_NONE;
   if (sub_trainer_ != NULL) {
     sub_trainer_result = UpdateSubtrainer(log_msg);
     if (sub_trainer_result == STR_REPLACED) {
       // Reset the inputs, as we have overwritten *this.
       error_rate = CharError();
       iteration = learning_iteration();
       PrepareLogMsg(log_msg);
     }
   }
   bool result = true;  // Something interesting happened.
   GenericVector<char> rec_model_data;
   if (error_rate < best_error_rate_) {
     SaveRecognitionDump(&rec_model_data);
     log_msg->add_str_double(" New best char error = ", error_rate);
     *log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
     // If sub_trainer_ is not NULL, either *this beat it to a new best, or it
     // just overwrote *this. In either case, we have finished with it.
     delete sub_trainer_;
     sub_trainer_ = NULL;
     stall_iteration_ = learning_iteration() + kMinStallIterations;
     if (TransitionTrainingStage(kStageTransitionThreshold)) {
       log_msg->add_str_int(" Transitioned to stage ", CurrentTrainingStage());
     }
     checkpoint_writer_->Run(NO_BEST_TRAINER, this, &best_trainer_);
     if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {
       STRING best_model_name = DumpFilename();
       if (!(*file_writer_)(best_trainer_, best_model_name)) {
         *log_msg += " failed to write best model:";
       } else {
         *log_msg += " wrote best model:";
         error_rate_of_last_saved_best_ = best_error_rate_;
       }
       *log_msg += best_model_name;
     }
   } else if (error_rate > worst_error_rate_) {
     SaveRecognitionDump(&rec_model_data);
     log_msg->add_str_double(" New worst char error = ", error_rate);
     *log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
     if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate &&
         best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
       // Error rate has ballooned. Go back to the best model.
       *log_msg += "\nDivergence! ";
       // Copy best_trainer_ before reading it, as it will get overwritten.
       GenericVector<char> revert_data(best_trainer_);
       if (checkpoint_reader_->Run(revert_data, this)) {
         LogIterations("Reverted to", log_msg);
         ReduceLearningRates(this, log_msg);
       } else {
         LogIterations("Failed to Revert at", log_msg);
       }
       // If it fails again, we will wait twice as long before reverting again.
       stall_iteration_ = iteration + 2 * (iteration - learning_iteration());
       // Re-save the best trainer with the new learning rates and stall
       // iteration.
       checkpoint_writer_->Run(NO_BEST_TRAINER, this, &best_trainer_);
     }
   } else {
     // Something interesting happened only if the sub_trainer_ was trained.
     result = sub_trainer_result != STR_NONE;
   }
   if (checkpoint_writer_ != NULL && file_writer_ != NULL &&
       checkpoint_name_.length() > 0) {
     // Write a current checkpoint.
     GenericVector<char> checkpoint;
     if (!checkpoint_writer_->Run(FULL, this, &checkpoint) ||
         !(*file_writer_)(checkpoint, checkpoint_name_)) {
       *log_msg += " failed to write checkpoint.";
     } else {
       *log_msg += " wrote checkpoint.";
     }
   }
   *log_msg += "\n";
   return result;
 }

◆ MaintainCheckpointsSpecific()

bool tesseract::LSTMTrainer::MaintainCheckpointsSpecific	(	int	iteration,
		const GenericVector< char > *	train_model,
		const GenericVector< char > *	rec_model,
		TestCallback	tester,
		STRING *	log_msg
	)

◆ mutable_training_data()

DocumentCache* tesseract::LSTMTrainer::mutable_training_data ( )

inline

Definition at line 168 of file lstmtrainer.h.

168 { return &training_data_; }

tesseract::LSTMTrainer::training_data_

DocumentCache training_data_

Definition: lstmtrainer.h:406

◆ NewSingleError()

double tesseract::LSTMTrainer::NewSingleError ( ErrorTypes type ) const

inline

Definition at line 154 of file lstmtrainer.h.

                                                {
     return error_buffers_[type][training_iteration() % kRollingBufferSize_];
   }

◆ PrepareForBackward()

Trainability tesseract::LSTMTrainer::PrepareForBackward	(	const ImageData *	trainingdata,
		NetworkIO *	fwd_outputs,
		NetworkIO *	targets
	)

Definition at line 827 of file lstmtrainer.cpp.

                                                                  {
   if (trainingdata == NULL) {
     tprintf("Null trainingdata.\n");
     return UNENCODABLE;
   }
   // Ensure repeatability of random elements even across checkpoints.
   bool debug = debug_interval_ > 0 &&
       training_iteration() % debug_interval_ == 0;
   GenericVector<int> truth_labels;
   if (!EncodeString(trainingdata->transcription(), &truth_labels)) {
     tprintf("Can't encode transcription: %s\n",
             trainingdata->transcription().string());
     return UNENCODABLE;
   }
   int w = 0;
   while (w < truth_labels.size() &&
          (truth_labels[w] == UNICHAR_SPACE || truth_labels[w] == null_char_))
     ++w;
   if (w == truth_labels.size()) {
     tprintf("Blank transcription: %s\n",
             trainingdata->transcription().string());
     return UNENCODABLE;
   }
   float image_scale;
   NetworkIO inputs;
   bool invert = trainingdata->boxes().empty();
   if (!RecognizeLine(*trainingdata, invert, debug, invert, 0.0f, &image_scale,
                      &inputs, fwd_outputs)) {
     tprintf("Image not trainable\n");
     return UNENCODABLE;
   }
   targets->Resize(*fwd_outputs, network_->NumOutputs());
   LossType loss_type = OutputLossType();
   if (loss_type == LT_SOFTMAX) {
     if (!ComputeTextTargets(*fwd_outputs, truth_labels, targets)) {
       tprintf("Compute simple targets failed!\n");
       return UNENCODABLE;
     }
   } else if (loss_type == LT_CTC) {
     if (!ComputeCTCTargets(truth_labels, fwd_outputs, targets)) {
       tprintf("Compute CTC targets failed!\n");
       return UNENCODABLE;
     }
   } else {
     tprintf("Logistic outputs not implemented yet!\n");
     return UNENCODABLE;
   }
   GenericVector<int> ocr_labels;
   GenericVector<int> xcoords;
   LabelsFromOutputs(*fwd_outputs, 0.0f, &ocr_labels, &xcoords);
   // CTC does not produce correct target labels to begin with.
   if (loss_type != LT_CTC) {
     LabelsFromOutputs(*targets, 0.0f, &truth_labels, &xcoords);
   }
   if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels,
                          *targets)) {
     tprintf("Input width was %d\n", inputs.Width());
     return UNENCODABLE;
   }
   STRING ocr_text = DecodeLabels(ocr_labels);
   STRING truth_text = DecodeLabels(truth_labels);
   targets->SubtractAllFromFloat(*fwd_outputs);
   if (debug_interval_ != 0) {
     tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(),
             ocr_text.string());
   }
   double char_error = ComputeCharError(truth_labels, ocr_labels);
   double word_error = ComputeWordError(&truth_text, &ocr_text);
   double delta_error = ComputeErrorRates(*targets, char_error, word_error);
   if (debug_interval_ != 0) {
     tprintf("File %s page %d %s:\n", trainingdata->imagefilename().string(),
             trainingdata->page_number(), delta_error == 0.0 ? "(Perfect)" : "");
   }
   if (delta_error == 0.0) return PERFECT;
   if (targets->AnySuspiciousTruth(kHighConfidence)) return HI_PRECISION_ERR;
   return TRAINABLE;
 }

◆ PrepareLogMsg()

void tesseract::LSTMTrainer::PrepareLogMsg ( STRING * log_msg ) const

Definition at line 402 of file lstmtrainer.cpp.

                                                      {
   LogIterations("At", log_msg);
   log_msg->add_str_double(", Mean rms=", error_rates_[ET_RMS]);
   log_msg->add_str_double("%, delta=", error_rates_[ET_DELTA]);
   log_msg->add_str_double("%, char train=", error_rates_[ET_CHAR_ERROR]);
   log_msg->add_str_double("%, word train=", error_rates_[ET_WORD_RECERR]);
   log_msg->add_str_double("%, skip ratio=", error_rates_[ET_SKIP_RATIO]);
   *log_msg += "%, ";
 }

◆ ReadRecognitionDump()

LSTMRecognizer * tesseract::LSTMTrainer::ReadRecognitionDump ( const GenericVector< char > & data )

static

Definition at line 940 of file lstmtrainer.cpp.

                                      {
   TFile fp;
   fp.Open(&data[0], data.size());
   LSTMRecognizer* recognizer = new LSTMRecognizer;
   ASSERT_HOST(recognizer->DeSerialize(&fp));
   return recognizer;
 }

◆ ReadSizedTrainingDump()

bool tesseract::LSTMTrainer::ReadSizedTrainingDump	(	const char *	data,
		int	size
	)

Definition at line 924 of file lstmtrainer.cpp.

                                                                   {
   TFile fp;
   fp.Open(data, size);
   return DeSerialize(&fp);
 }

◆ ReadTrainingDump()

bool tesseract::LSTMTrainer::ReadTrainingDump	(	const GenericVector< char > &	data,
		LSTMTrainer *	trainer
	)

Definition at line 919 of file lstmtrainer.cpp.

                                                          {
   return trainer->ReadSizedTrainingDump(&data[0], data.size());
 }

◆ ReduceLayerLearningRates()

int tesseract::LSTMTrainer::ReduceLayerLearningRates	(	double	factor,
		int	num_samples,
		LSTMTrainer *	samples_trainer
	)

Definition at line 639 of file lstmtrainer.cpp.

                                                                         {
   enum WhichWay {
     LR_DOWN,  // Learning rate will go down by factor.
     LR_SAME,  // Learning rate will stay the same.
     LR_COUNT  // Size of arrays.
   };
   // Epsilon is so small that it may as well be zero, but still positive.
   const double kEpsilon = 1.0e-30;
   GenericVector<STRING> layers = EnumerateLayers();
   int num_layers = layers.size();
   GenericVector<int> num_weights;
   num_weights.init_to_size(num_layers, 0);
   GenericVector<double> bad_sums[LR_COUNT];
   GenericVector<double> ok_sums[LR_COUNT];
   for (int i = 0; i < LR_COUNT; ++i) {
     bad_sums[i].init_to_size(num_layers, 0.0);
     ok_sums[i].init_to_size(num_layers, 0.0);
   }
   double momentum_factor = 1.0 / (1.0 - momentum_);
   GenericVector<char> orig_trainer;
   SaveTrainingDump(LIGHT, this, &orig_trainer);
   for (int i = 0; i < num_layers; ++i) {
     Network* layer = GetLayer(layers[i]);
     num_weights[i] = layer->IsTraining() ? layer->num_weights() : 0;
   }
   int iteration = sample_iteration();
   for (int s = 0; s < num_samples; ++s) {
     // Which way will we modify the learning rate?
     for (int ww = 0; ww < LR_COUNT; ++ww) {
       // Transfer momentum to learning rate and adjust by the ww factor.
       float ww_factor = momentum_factor;
       if (ww == LR_DOWN) ww_factor *= factor;
       // Make a copy of *this, so we can mess about without damaging anything.
       LSTMTrainer copy_trainer;
       copy_trainer.ReadTrainingDump(orig_trainer, &copy_trainer);
       // Clear the updates, doing nothing else.
       copy_trainer.network_->Update(0.0, 0.0, 0);
       // Adjust the learning rate in each layer.
       for (int i = 0; i < num_layers; ++i) {
         if (num_weights[i] == 0) continue;
         copy_trainer.ScaleLayerLearningRate(layers[i], ww_factor);
       }
       copy_trainer.SetIteration(iteration);
       // Train on the sample, but keep the update in updates_ instead of
       // applying to the weights.
       const ImageData* trainingdata =
           copy_trainer.TrainOnLine(samples_trainer, true);
       if (trainingdata == NULL) continue;
       // We'll now use this trainer again for each layer.
       GenericVector<char> updated_trainer;
       SaveTrainingDump(LIGHT, &copy_trainer, &updated_trainer);
       for (int i = 0; i < num_layers; ++i) {
         if (num_weights[i] == 0) continue;
         LSTMTrainer layer_trainer;
         layer_trainer.ReadTrainingDump(updated_trainer, &layer_trainer);
         Network* layer = layer_trainer.GetLayer(layers[i]);
         // Update the weights in just the layer, and also zero the updates
         // matrix (to epsilon).
         layer->Update(0.0, kEpsilon, 0);
         // Train again on the same sample, again holding back the updates.
         layer_trainer.TrainOnLine(trainingdata, true);
         // Count the sign changes in the updates in layer vs in copy_trainer.
         float before_bad = bad_sums[ww][i];
         float before_ok = ok_sums[ww][i];
         layer->CountAlternators(*copy_trainer.GetLayer(layers[i]),
                                 &ok_sums[ww][i], &bad_sums[ww][i]);
         float bad_frac =
             bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok;
         if (bad_frac > 0.0f)
           bad_frac = (bad_sums[ww][i] - before_bad) / bad_frac;
       }
     }
     ++iteration;
   }
   int num_lowered = 0;
   for (int i = 0; i < num_layers; ++i) {
     if (num_weights[i] == 0) continue;
     Network* layer = GetLayer(layers[i]);
     float lr = GetLayerLearningRate(layers[i]);
     double total_down = bad_sums[LR_DOWN][i] + ok_sums[LR_DOWN][i];
     double total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];
     double frac_down = bad_sums[LR_DOWN][i] / total_down;
     double frac_same = bad_sums[LR_SAME][i] / total_same;
     tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().string(),
             lr * factor, 100.0 * frac_down, lr, 100.0 * frac_same);
     if (frac_down < frac_same * kImprovementFraction) {
       tprintf(" REDUCED\n");
       ScaleLayerLearningRate(layers[i], factor);
       ++num_lowered;
     } else {
       tprintf(" SAME\n");
     }
   }
   if (num_lowered == 0) {
     // Just lower everything to make sure.
     for (int i = 0; i < num_layers; ++i) {
       if (num_weights[i] > 0) {
         ScaleLayerLearningRate(layers[i], factor);
         ++num_lowered;
       }
     }
   }
   return num_lowered;
 }

◆ ReduceLearningRates()

void tesseract::LSTMTrainer::ReduceLearningRates	(	LSTMTrainer *	samples_trainer,
		STRING *	log_msg
	)

Definition at line 620 of file lstmtrainer.cpp.

                                                        {
   if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
     int num_reduced = ReduceLayerLearningRates(
         kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
     log_msg->add_str_int("\nReduced learning rate on layers: ", num_reduced);
   } else {
     ScaleLearningRate(kLearningRateDecay);
     log_msg->add_str_double("\nReduced learning rate to :", learning_rate_);
   }
   *log_msg += "\n";
 }

◆ RollErrorBuffers()

void tesseract::LSTMTrainer::RollErrorBuffers ( )

protected

Definition at line 1260 of file lstmtrainer.cpp.

                                    {
   prev_sample_iteration_ = sample_iteration_;
   if (NewSingleError(ET_DELTA) > 0.0)
     ++learning_iteration_;
   else
     last_perfect_training_iteration_ = training_iteration_;
   ++training_iteration_;
   if (debug_interval_ != 0) {
     tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n",
             error_rates_[ET_RMS], error_rates_[ET_DELTA],
             error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR],
             error_rates_[ET_SKIP_RATIO]);
   }
 }

◆ SaveBestModel()

bool tesseract::LSTMTrainer::SaveBestModel ( FileWriter writer ) const

◆ SaveRecognitionDump()

void tesseract::LSTMTrainer::SaveRecognitionDump ( GenericVector< char > * data ) const

Definition at line 931 of file lstmtrainer.cpp.

                                                                      {
   TFile fp;
   fp.OpenWrite(data);
   network_->SetEnableTraining(TS_TEMP_DISABLE);
   ASSERT_HOST(LSTMRecognizer::Serialize(&fp));
   network_->SetEnableTraining(TS_RE_ENABLE);
 }

◆ SaveTrainingDump()

bool tesseract::LSTMTrainer::SaveTrainingDump	(	SerializeAmount	serialize_amount,
		const LSTMTrainer *	trainer,
		GenericVector< char > *	data
	)		const

Definition at line 909 of file lstmtrainer.cpp.

                                                                     {
   TFile fp;
   fp.OpenWrite(data);
   trainer->serialize_amount_ = serialize_amount;
   return trainer->Serialize(&fp);
 }

◆ Serialize()

bool tesseract::LSTMTrainer::Serialize ( TFile * fp ) const

virtual

Definition at line 433 of file lstmtrainer.cpp.

                                            {
   if (!LSTMRecognizer::Serialize(fp)) return false;
   if (fp->FWrite(&learning_iteration_, sizeof(learning_iteration_), 1) != 1)
     return false;
   if (fp->FWrite(&prev_sample_iteration_, sizeof(prev_sample_iteration_), 1) !=
       1)
     return false;
   if (fp->FWrite(&perfect_delay_, sizeof(perfect_delay_), 1) != 1) return false;
   if (fp->FWrite(&last_perfect_training_iteration_,
                  sizeof(last_perfect_training_iteration_), 1) != 1)
     return false;
   for (int i = 0; i < ET_COUNT; ++i) {
     if (!error_buffers_[i].Serialize(fp)) return false;
   }
   if (fp->FWrite(&error_rates_, sizeof(error_rates_), 1) != 1) return false;
   if (fp->FWrite(&training_stage_, sizeof(training_stage_), 1) != 1)
     return false;
   uinT8 amount = serialize_amount_;
   if (fp->FWrite(&amount, sizeof(amount), 1) != 1) return false;
   if (amount == LIGHT) return true;  // We are done.
   if (fp->FWrite(&best_error_rate_, sizeof(best_error_rate_), 1) != 1)
     return false;
   if (fp->FWrite(&best_error_rates_, sizeof(best_error_rates_), 1) != 1)
     return false;
   if (fp->FWrite(&best_iteration_, sizeof(best_iteration_), 1) != 1)
     return false;
   if (fp->FWrite(&worst_error_rate_, sizeof(worst_error_rate_), 1) != 1)
     return false;
   if (fp->FWrite(&worst_error_rates_, sizeof(worst_error_rates_), 1) != 1)
     return false;
   if (fp->FWrite(&worst_iteration_, sizeof(worst_iteration_), 1) != 1)
     return false;
   if (fp->FWrite(&stall_iteration_, sizeof(stall_iteration_), 1) != 1)
     return false;
   if (!best_model_data_.Serialize(fp)) return false;
   if (!worst_model_data_.Serialize(fp)) return false;
   if (amount != NO_BEST_TRAINER && !best_trainer_.Serialize(fp)) return false;
   GenericVector<char> sub_data;
   if (sub_trainer_ != NULL && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data))
     return false;
   if (!sub_data.Serialize(fp)) return false;
   if (!best_error_history_.Serialize(fp)) return false;
   if (!best_error_iterations_.Serialize(fp)) return false;
   if (fp->FWrite(&improvement_steps_, sizeof(improvement_steps_), 1) != 1)
     return false;
   return true;
 }

◆ set_perfect_delay()

void tesseract::LSTMTrainer::set_perfect_delay ( int delay )

inline

Definition at line 151 of file lstmtrainer.h.

151 { perfect_delay_ = delay; }

tesseract::LSTMTrainer::perfect_delay_

int perfect_delay_

Definition: lstmtrainer.h:466

◆ SetSerializeMode()

void tesseract::LSTMTrainer::SetSerializeMode ( SerializeAmount serialize_amount ) const

inline

Definition at line 178 of file lstmtrainer.h.

                                                                 {
     serialize_amount_ = serialize_amount;
   }

◆ SetUnicharsetProperties()

void tesseract::LSTMTrainer::SetUnicharsetProperties ( const STRING & script_dir )

protected

Definition at line 982 of file lstmtrainer.cpp.

                                                                   {
   tprintf("Setting unichar properties\n");
   for (int s = 0; s < GetUnicharset().get_script_table_size(); ++s) {
     if (strcmp("NULL", GetUnicharset().get_script_from_script_id(s)) == 0)
       continue;
     // Load the unicharset for the script if available.
     STRING filename = script_dir + "/" +
                       GetUnicharset().get_script_from_script_id(s) +
                       ".unicharset";
     UNICHARSET script_set;
     GenericVector<char> data;
     if ((*file_reader_)(filename, &data) &&
         script_set.load_from_inmemory_file(&data[0], data.size())) {
       tprintf("Setting properties for script %s\n",
               GetUnicharset().get_script_from_script_id(s));
       ccutil_.unicharset.SetPropertiesFromOther(script_set);
     }
   }
   if (IsRecoding()) {
     STRING filename = script_dir + "/radical-stroke.txt";
     GenericVector<char> data;
     if ((*file_reader_)(filename, &data)) {
       data += '\0';
       STRING stroke_table = &data[0];
       if (recoder_.ComputeEncoding(GetUnicharset(), null_char_,
                                    &stroke_table)) {
         RecodedCharID code;
         recoder_.EncodeUnichar(null_char_, &code);
         null_char_ = code(0);
         // Space should encode as itself.
         recoder_.EncodeUnichar(UNICHAR_SPACE, &code);
         ASSERT_HOST(code(0) == UNICHAR_SPACE);
         return;
       }
     } else {
       tprintf("Failed to load radical-stroke info from: %s\n",
               filename.string());
     }
     training_flags_ &= ~TF_COMPRESS_UNICHARSET;
   }
 }

◆ SetupCheckpointInfo()

void tesseract::LSTMTrainer::SetupCheckpointInfo ( )

◆ StartSubtrainer()

void tesseract::LSTMTrainer::StartSubtrainer ( STRING * log_msg )

Definition at line 547 of file lstmtrainer.cpp.

                                                  {
   delete sub_trainer_;
   sub_trainer_ = new LSTMTrainer();
   if (!checkpoint_reader_->Run(best_trainer_, sub_trainer_)) {
     *log_msg += " Failed to revert to previous best for trial!";
     delete sub_trainer_;
     sub_trainer_ = NULL;
   } else {
     log_msg->add_str_int(" Trial sub_trainer_ from iteration ",
                          sub_trainer_->training_iteration());
     // Reduce learning rate so it doesn't diverge this time.
     sub_trainer_->ReduceLearningRates(this, log_msg);
     // If it fails again, we will wait twice as long before reverting again.
     int stall_offset =
         learning_iteration() - sub_trainer_->learning_iteration();
     stall_iteration_ = learning_iteration() + 2 * stall_offset;
     sub_trainer_->stall_iteration_ = stall_iteration_;
     // Re-save the best trainer with the new learning rates and stall iteration.
     checkpoint_writer_->Run(NO_BEST_TRAINER, sub_trainer_, &best_trainer_);
   }
 }

◆ training_data()

const DocumentCache& tesseract::LSTMTrainer::training_data ( ) const

inline

Definition at line 165 of file lstmtrainer.h.

                                              {
     return training_data_;
   }

◆ TrainOnLine() [1/2]

const ImageData* tesseract::LSTMTrainer::TrainOnLine	(	LSTMTrainer *	samples_trainer,
		bool	batch
	)

inline

Definition at line 268 of file lstmtrainer.h.

                                                                          {
     int sample_index = sample_iteration();
     const ImageData* image =
         samples_trainer->training_data_.GetPageBySerial(sample_index);
     if (image != NULL) {
       Trainability trainable = TrainOnLine(image, batch);
       if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
         return NULL;  // Sample was unusable.
       }
     } else {
       ++sample_iteration_;
     }
     return image;
   }

◆ TrainOnLine() [2/2]

Trainability tesseract::LSTMTrainer::TrainOnLine	(	const ImageData *	trainingdata,
		bool	batch
	)

Definition at line 794 of file lstmtrainer.cpp.

                                                   {
   NetworkIO fwd_outputs, targets;
   Trainability trainable =
       PrepareForBackward(trainingdata, &fwd_outputs, &targets);
   ++sample_iteration_;
   if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
     return trainable;  // Sample was unusable.
   }
   bool debug = debug_interval_ > 0 &&
       training_iteration() % debug_interval_ == 0;
   // Run backprop on the output.
   NetworkIO bp_deltas;
   if (network_->IsTraining() &&
       (trainable != PERFECT ||
        training_iteration() >
            last_perfect_training_iteration_ + perfect_delay_)) {
     network_->Backward(debug, targets, &scratch_space_, &bp_deltas);
     network_->Update(learning_rate_, batch ? -1.0f : momentum_,
                      training_iteration_ + 1);
   }
 #ifndef GRAPHICS_DISABLED
   if (debug_interval_ == 1 && debug_win_ != NULL) {
     delete debug_win_->AwaitEvent(SVET_CLICK);
   }
 #endif  // GRAPHICS_DISABLED
   // Roll the memory of past means.
   RollErrorBuffers();
   return trainable;
 }

◆ TransitionTrainingStage()

bool tesseract::LSTMTrainer::TransitionTrainingStage ( float error_threshold )

Definition at line 423 of file lstmtrainer.cpp.

                                                                {
   if (best_error_rate_ < error_threshold &&
       training_stage_ + 1 < num_training_stages_) {
     ++training_stage_;
     return true;
   }
   return false;
 }

◆ TryLoadingCheckpoint()

bool tesseract::LSTMTrainer::TryLoadingCheckpoint ( const char * filename )

Definition at line 125 of file lstmtrainer.cpp.

                                                            {
   GenericVector<char> data;
   if (!(*file_reader_)(filename, &data)) return false;
   tprintf("Loaded file %s, unpacking...\n", filename);
   return checkpoint_reader_->Run(data, this);
 }

◆ UpdateErrorBuffer()

void tesseract::LSTMTrainer::UpdateErrorBuffer	(	double	new_error,
		ErrorTypes	type
	)

protected

Definition at line 1247 of file lstmtrainer.cpp.

                                                                      {
   int index = training_iteration_ % kRollingBufferSize_;
   error_buffers_[type][index] = new_error;
   // Compute the mean error.
   int mean_count = MIN(training_iteration_ + 1, error_buffers_[type].size());
   double buffer_sum = 0.0;
   for (int i = 0; i < mean_count; ++i) buffer_sum += error_buffers_[type][i];
   double mean = buffer_sum / mean_count;
   // Trim precision to 1/1000 of 1%.
   error_rates_[type] = IntCastRounded(100000.0 * mean) / 1000.0;
 }

◆ UpdateErrorGraph()

STRING tesseract::LSTMTrainer::UpdateErrorGraph	(	int	iteration,
		double	error_rate,
		const GenericVector< char > &	model_data,
		TestCallback	tester
	)

protected

Definition at line 1279 of file lstmtrainer.cpp.

                                                           {
   if (error_rate > best_error_rate_
       && iteration < best_iteration_ + kErrorGraphInterval) {
     // Too soon to record a new point.
     if (tester != NULL)
       return tester->Run(worst_iteration_, NULL, worst_model_data_,
                          CurrentTrainingStage());
     else
       return "";
   }
   STRING result;
   // NOTE: there are 2 asymmetries here:
   // 1. We are computing the global minimum, but the local maximum in between.
   // 2. If the tester returns an empty string, indicating that it is busy,
   //    call it repeatedly on new local maxima to test the previous min, but
   //    not the other way around, as there is little point testing the maxima
   //    between very frequent minima.
   if (error_rate < best_error_rate_) {
     // This is a new (global) minimum.
     if (tester != NULL) {
       result = tester->Run(worst_iteration_, worst_error_rates_,
                            worst_model_data_, CurrentTrainingStage());
       worst_model_data_.truncate(0);
       best_model_data_ = model_data;
     }
     best_error_rate_ = error_rate;
     memcpy(best_error_rates_, error_rates_, sizeof(error_rates_));
     best_iteration_ = iteration;
     best_error_history_.push_back(error_rate);
     best_error_iterations_.push_back(iteration);
     // Compute 2% decay time.
     double two_percent_more = error_rate + 2.0;
     int i;
     for (i = best_error_history_.size() - 1;
          i >= 0 && best_error_history_[i] < two_percent_more; --i) {
     }
     int old_iteration = i >= 0 ? best_error_iterations_[i] : 0;
     improvement_steps_ = iteration - old_iteration;
     tprintf("2 Percent improvement time=%d, best error was %g @ %d\n",
             improvement_steps_, i >= 0 ? best_error_history_[i] : 100.0,
             old_iteration);
   } else if (error_rate > best_error_rate_) {
     // This is a new (local) maximum.
     if (tester != NULL) {
       if (best_model_data_.empty()) {
         // Allow for multiple data points with "worst" error rate.
         result = tester->Run(worst_iteration_, worst_error_rates_,
                              worst_model_data_, CurrentTrainingStage());
       } else {
         result = tester->Run(best_iteration_, best_error_rates_,
                              best_model_data_, CurrentTrainingStage());
       }
       if (result.length() > 0)
         best_model_data_.truncate(0);
       worst_model_data_ = model_data;
     }
   }
   worst_error_rate_ = error_rate;
   memcpy(worst_error_rates_, error_rates_, sizeof(error_rates_));
   worst_iteration_ = iteration;
   return result;
 }

◆ UpdateSubtrainer()

SubTrainerResult tesseract::LSTMTrainer::UpdateSubtrainer ( STRING * log_msg )

Definition at line 577 of file lstmtrainer.cpp.

                                                               {
   double training_error = CharError();
   double sub_error = sub_trainer_->CharError();
   double sub_margin = (training_error - sub_error) / sub_error;
   if (sub_margin >= kSubTrainerMarginFraction) {
     log_msg->add_str_double(" sub_trainer=", sub_error);
     log_msg->add_str_double(" margin=", 100.0 * sub_margin);
     *log_msg += "\n";
     // Catch up to current iteration.
     int end_iteration = training_iteration();
     while (sub_trainer_->training_iteration() < end_iteration &&
            sub_margin >= kSubTrainerMarginFraction) {
       int target_iteration =
           sub_trainer_->training_iteration() + kNumPagesPerBatch;
       while (sub_trainer_->training_iteration() < target_iteration) {
         sub_trainer_->TrainOnLine(this, false);
       }
       STRING batch_log = "Sub:";
       sub_trainer_->PrepareLogMsg(&batch_log);
       batch_log += "\n";
       tprintf("UpdateSubtrainer:%s", batch_log.string());
       *log_msg += batch_log;
       sub_error = sub_trainer_->CharError();
       sub_margin = (training_error - sub_error) / sub_error;
     }
     if (sub_error < best_error_rate_ &&
         sub_margin >= kSubTrainerMarginFraction) {
       // The sub_trainer_ has won the race to a new best. Switch to it.
       GenericVector<char> updated_trainer;
       SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer);
       ReadTrainingDump(updated_trainer, this);
       log_msg->add_str_int(" Sub trainer wins at iteration ",
                            training_iteration());
       *log_msg += "\n";
       return STR_REPLACED;
     }
     return STR_UPDATED;
   }
   return STR_NONE;
 }

Member Data Documentation

◆ align_win_

ScrollView* tesseract::LSTMTrainer::align_win_

protected

Definition at line 390 of file lstmtrainer.h.

◆ best_error_history_

GenericVector<double> tesseract::LSTMTrainer::best_error_history_

protected

Definition at line 451 of file lstmtrainer.h.

◆ best_error_iterations_

GenericVector<int> tesseract::LSTMTrainer::best_error_iterations_

protected

Definition at line 452 of file lstmtrainer.h.

◆ best_error_rate_

double tesseract::LSTMTrainer::best_error_rate_

protected

Definition at line 424 of file lstmtrainer.h.

◆ best_error_rates_

double tesseract::LSTMTrainer::best_error_rates_[ET_COUNT]

protected

Definition at line 426 of file lstmtrainer.h.

◆ best_iteration_

int tesseract::LSTMTrainer::best_iteration_

protected

Definition at line 428 of file lstmtrainer.h.

◆ best_model_data_

GenericVector<char> tesseract::LSTMTrainer::best_model_data_

protected

Definition at line 438 of file lstmtrainer.h.

◆ best_model_name_

STRING tesseract::LSTMTrainer::best_model_name_

protected

Definition at line 410 of file lstmtrainer.h.

◆ best_trainer_

GenericVector<char> tesseract::LSTMTrainer::best_trainer_

protected

Definition at line 441 of file lstmtrainer.h.

◆ checkpoint_iteration_

int tesseract::LSTMTrainer::checkpoint_iteration_

protected

Definition at line 400 of file lstmtrainer.h.

◆ checkpoint_name_

STRING tesseract::LSTMTrainer::checkpoint_name_

protected

Definition at line 404 of file lstmtrainer.h.

◆ checkpoint_reader_

CheckPointReader tesseract::LSTMTrainer::checkpoint_reader_

protected

Definition at line 418 of file lstmtrainer.h.

◆ checkpoint_writer_

CheckPointWriter tesseract::LSTMTrainer::checkpoint_writer_

protected

Definition at line 419 of file lstmtrainer.h.

◆ ctc_win_

ScrollView* tesseract::LSTMTrainer::ctc_win_

protected

Definition at line 394 of file lstmtrainer.h.

◆ debug_interval_

int tesseract::LSTMTrainer::debug_interval_

protected

Definition at line 398 of file lstmtrainer.h.

◆ error_buffers_

GenericVector<double> tesseract::LSTMTrainer::error_buffers_[ET_COUNT]

protected

Definition at line 473 of file lstmtrainer.h.

◆ error_rate_of_last_saved_best_

float tesseract::LSTMTrainer::error_rate_of_last_saved_best_

protected

Definition at line 446 of file lstmtrainer.h.

◆ error_rates_

double tesseract::LSTMTrainer::error_rates_[ET_COUNT]

protected

Definition at line 475 of file lstmtrainer.h.

◆ file_reader_

FileReader tesseract::LSTMTrainer::file_reader_

protected

Definition at line 414 of file lstmtrainer.h.

◆ file_writer_

FileWriter tesseract::LSTMTrainer::file_writer_

protected

Definition at line 415 of file lstmtrainer.h.

◆ improvement_steps_

int tesseract::LSTMTrainer::improvement_steps_

protected

Definition at line 454 of file lstmtrainer.h.

◆ kRollingBufferSize_

const int tesseract::LSTMTrainer::kRollingBufferSize_ = 1000

staticprotected

Definition at line 472 of file lstmtrainer.h.

◆ last_perfect_training_iteration_

int tesseract::LSTMTrainer::last_perfect_training_iteration_

protected

Definition at line 469 of file lstmtrainer.h.

◆ learning_iteration_

int tesseract::LSTMTrainer::learning_iteration_

protected

Definition at line 458 of file lstmtrainer.h.

◆ model_base_

STRING tesseract::LSTMTrainer::model_base_

protected

Definition at line 402 of file lstmtrainer.h.

◆ num_training_stages_

int tesseract::LSTMTrainer::num_training_stages_

protected

Definition at line 412 of file lstmtrainer.h.

◆ perfect_delay_

int tesseract::LSTMTrainer::perfect_delay_

protected

Definition at line 466 of file lstmtrainer.h.

◆ prev_sample_iteration_

int tesseract::LSTMTrainer::prev_sample_iteration_

protected

Definition at line 460 of file lstmtrainer.h.

◆ recon_win_

ScrollView* tesseract::LSTMTrainer::recon_win_

protected

Definition at line 396 of file lstmtrainer.h.

◆ serialize_amount_

SerializeAmount tesseract::LSTMTrainer::serialize_amount_

mutableprotected

Definition at line 408 of file lstmtrainer.h.

◆ stall_iteration_

int tesseract::LSTMTrainer::stall_iteration_

protected

Definition at line 436 of file lstmtrainer.h.

◆ sub_trainer_

LSTMTrainer* tesseract::LSTMTrainer::sub_trainer_

protected

Definition at line 444 of file lstmtrainer.h.

◆ target_win_

ScrollView* tesseract::LSTMTrainer::target_win_

protected

Definition at line 392 of file lstmtrainer.h.

◆ training_data_

DocumentCache tesseract::LSTMTrainer::training_data_

protected

Definition at line 406 of file lstmtrainer.h.

◆ training_stage_

int tesseract::LSTMTrainer::training_stage_

protected

Definition at line 448 of file lstmtrainer.h.

◆ worst_error_rate_

double tesseract::LSTMTrainer::worst_error_rate_

protected

Definition at line 430 of file lstmtrainer.h.

◆ worst_error_rates_

double tesseract::LSTMTrainer::worst_error_rates_[ET_COUNT]

protected

Definition at line 432 of file lstmtrainer.h.

◆ worst_iteration_

int tesseract::LSTMTrainer::worst_iteration_

protected

Definition at line 434 of file lstmtrainer.h.

◆ worst_model_data_

GenericVector<char> tesseract::LSTMTrainer::worst_model_data_

protected

Definition at line 439 of file lstmtrainer.h.

The documentation for this class was generated from the following files:

/home/stefan/src/github/tesseract-ocr/tesseract/lstm/lstmtrainer.h
/home/stefan/src/github/tesseract-ocr/tesseract/lstm/lstmtrainer.cpp

Public Member Functions

Static Public Member Functions

Protected Member Functions

Protected Attributes

Static Protected Attributes

Detailed Description

Constructor & Destructor Documentation

◆ LSTMTrainer() [1/2]

◆ LSTMTrainer() [2/2]

◆ ~LSTMTrainer()

Member Function Documentation

◆ ActivationError()

◆ best_error_rate()

◆ best_iteration()

◆ best_trainer()

◆ CharError()

◆ ComputeCharError()

◆ ComputeCTCTargets()

◆ ComputeErrorRates()

◆ ComputeRMSError()

◆ ComputeTextTargets()

◆ ComputeWinnerError()

◆ ComputeWordError()

◆ ConvertToInt()

◆ CurrentTrainingStage()

◆ DebugLSTMTraining()

◆ DebugNetwork()

◆ DeSerialize()

◆ DisplayTargets()

◆ DumpFilename()

◆ EmptyConstructor()

◆ EncodeString() [1/2]

◆ EncodeString() [2/2]

◆ error_rates()

◆ FillErrorBuffer()

◆ GridSearchDictParams()

◆ improvement_steps()

◆ InitCharSet() [1/2]

◆ InitCharSet() [2/2]

◆ InitIterations()

◆ InitNetwork()

◆ InitTensorFlowNetwork()

◆ LastSingleError()

◆ learning_iteration()

◆ LoadAllTrainingData()

◆ LogIterations()

◆ MaintainCheckpoints()

◆ MaintainCheckpointsSpecific()

◆ mutable_training_data()

◆ NewSingleError()

◆ PrepareForBackward()

◆ PrepareLogMsg()

◆ ReadRecognitionDump()

◆ ReadSizedTrainingDump()

◆ ReadTrainingDump()

◆ ReduceLayerLearningRates()

◆ ReduceLearningRates()

◆ RollErrorBuffers()

◆ SaveBestModel()

◆ SaveRecognitionDump()

◆ SaveTrainingDump()

◆ Serialize()

◆ set_perfect_delay()

◆ SetSerializeMode()

◆ SetUnicharsetProperties()

◆ SetupCheckpointInfo()

◆ StartSubtrainer()

◆ training_data()

◆ TrainOnLine() [1/2]

◆ TrainOnLine() [2/2]

◆ TransitionTrainingStage()

◆ TryLoadingCheckpoint()

◆ UpdateErrorBuffer()

◆ UpdateErrorGraph()

◆ UpdateSubtrainer()

Member Data Documentation

◆ align_win_

◆ best_error_history_

◆ best_error_iterations_

◆ best_error_rate_