tesseract  4.00.00dev
lstmtester.h
Go to the documentation of this file.
1 // File: lstmtester.h
3 // Description: Top-level line evaluation class for LSTM-based networks.
4 // Author: Ray Smith
5 // Created: Wed Nov 23 11:05:06 PST 2016
6 //
7 // (C) Copyright 2016, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 #ifndef TESSERACT_TRAINING_LSTMTESTER_H_
20 #define TESSERACT_TRAINING_LSTMTESTER_H_
21 
22 #include "genericvector.h"
23 #include "lstmtrainer.h"
24 #include "strngs.h"
25 #include "svutil.h"
26 
27 namespace tesseract {
28 
29 class LSTMTester {
30  public:
31  LSTMTester(inT64 max_memory);
32 
33  // Loads a set of lstmf files that were created using the lstm.train config to
34  // tesseract into memory ready for testing. Returns false if nothing was
35  // loaded. The arg is a filename of a file that lists the filenames, with one
36  // name per line. Conveniently, tesstrain.sh generates such a file, along
37  // with the files themselves.
38  bool LoadAllEvalData(const STRING& filenames_file);
39  // Loads a set of lstmf files that were created using the lstm.train config to
40  // tesseract into memory ready for testing. Returns false if nothing was
41  // loaded.
42  bool LoadAllEvalData(const GenericVector<STRING>& filenames);
43 
44  // Runs an evaluation asynchronously on the stored eval data and returns a
45  // string describing the results of the previous test. Args match TestCallback
46  // declared in lstmtrainer.h:
47  // iteration: Current learning iteration number.
48  // training_errors: If not null, is an array of size ET_COUNT, indexed by
49  // the ErrorTypes enum and indicates the current errors measured by the
50  // trainer, and this is a serious request to run an evaluation. If null,
51  // then the caller is just polling for the results of the previous eval.
52  // model_data: is the model to evaluate, which should be a serialized
53  // LSTMTrainer.
54  // training_stage: an arbitrary number on the progress of training.
55  STRING RunEvalAsync(int iteration, const double* training_errors,
56  const GenericVector<char>& model_data,
57  int training_stage);
58  // Runs an evaluation synchronously on the stored eval data and returns a
59  // string describing the results. Args as RunEvalAsync.
60  STRING RunEvalSync(int iteration, const double* training_errors,
61  const GenericVector<char>& model_data, int training_stage);
62 
63  private:
64  // Static helper thread function for RunEvalAsync, with a specific signature
65  // required by SVSync::StartThread. Actually a member function pretending to
66  // be static, its arg is a this pointer that it will cast back to LSTMTester*
67  // to call RunEvalSync using the stored args that RunEvalAsync saves in *this.
68  // LockIfNotRunning must have returned true before calling ThreadFunc, and
69  // it will call UnlockRunning to release the lock after RunEvalSync completes.
70  static void* ThreadFunc(void* lstmtester_void);
71  // Returns true if there is currently nothing running, and takes the lock
72  // if there is nothing running.
73  bool LockIfNotRunning();
74  // Releases the running lock.
75  void UnlockRunning();
76 
77  // The data to test with.
78  DocumentCache test_data_;
79  int total_pages_;
80  // Flag that indicates an asynchronous test is currently running.
81  // Protected by running_mutex_.
82  bool async_running_;
83  SVMutex running_mutex_;
84  // Stored copies of the args for use while running asynchronously.
85  int test_iteration_;
86  const double* test_training_errors_;
87  GenericVector<char> test_model_data_;
88  int test_training_stage_;
89  STRING test_result_;
90 };
91 
92 } // namespace tesseract
93 
94 #endif // TESSERACT_TRAINING_LSTMTESTER_H_
Definition: svutil.h:90
int64_t inT64
Definition: host.h:40
LSTMTester(inT64 max_memory)
Definition: lstmtester.cpp:24
Definition: strngs.h:45
STRING RunEvalSync(int iteration, const double *training_errors, const GenericVector< char > &model_data, int training_stage)
Definition: lstmtester.cpp:82
bool LoadAllEvalData(const STRING &filenames_file)
Definition: lstmtester.cpp:30
STRING RunEvalAsync(int iteration, const double *training_errors, const GenericVector< char > &model_data, int training_stage)
Definition: lstmtester.cpp:52