tesseract  4.00.00dev
unicharset_training_utils.h
Go to the documentation of this file.
1 // File: unicharset_training_utils.h
3 // Description: Training utilities for UNICHARSET.
4 // Author: Ray Smith
5 // Created: Fri Oct 17 17:14:01 PDT 2014
6 //
7 // (C) Copyright 2014, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
21 #define TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
22 
23 #include <string>
24 
25 #ifdef USE_STD_NAMESPACE
26 using std::string;
27 #endif
28 
29 class STATS;
30 class UNICHARSET;
31 
32 namespace tesseract {
33 
34 // Helper sets the character attribute properties and sets up the script table.
35 // Does not set tops and bottoms.
36 void SetupBasicProperties(bool report_errors, bool decompose,
37  UNICHARSET* unicharset);
38 // Default behavior is to compose, until it is proven that decomposed benefits
39 // at least one language.
40 inline void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) {
41  SetupBasicProperties(report_errors, false, unicharset);
42 }
43 
44 // Helper to set the properties for an input unicharset file, writes to the
45 // output file. If an appropriate script unicharset can be found in the
46 // script_dir directory, then the tops and bottoms are expanded using the
47 // script unicharset.
48 // If non-empty, xheight data for the fonts are written to the xheights_file.
49 void SetPropertiesForInputFile(const string& script_dir,
50  const string& input_unicharset_file,
51  const string& output_unicharset_file,
52  const string& output_xheights_file);
53 
54 } // namespace tesseract.
55 
56 #endif // TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset)
void SetPropertiesForInputFile(const string &script_dir, const string &input_unicharset_file, const string &output_unicharset_file, const string &output_xheights_file)
Definition: statistc.h:33