tesseract  4.00.00dev
pango_font_info.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: pango_font_info.h
3  * Description: Font-related objects and helper functions
4  * Author: Ranjith Unnikrishnan
5  * Created: Mon Nov 18 2013
6  *
7  * (C) Copyright 2013, Google Inc.
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  * http://www.apache.org/licenses/LICENSE-2.0
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef TESSERACT_TRAINING_PANGO_FONT_INFO_H_
21 #define TESSERACT_TRAINING_PANGO_FONT_INFO_H_
22 
23 #include <string>
24 #include <unordered_map>
25 #include <utility>
26 #include <vector>
27 
28 #include "commandlineflags.h"
29 #include "host.h"
30 #include "pango/pango-font.h"
31 #include "pango/pango.h"
32 #include "pango/pangocairo.h"
33 #include "util.h"
34 
35 DECLARE_STRING_PARAM_FLAG(fonts_dir);
36 DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir);
37 
38 typedef signed int char32;
39 
40 namespace tesseract {
41 
42 // Data holder class for a font, intended to avoid having to work with Pango or
43 // FontConfig-specific objects directly.
45  public:
46  enum FontTypeEnum {
51  };
52  PangoFontInfo();
54  // Initialize from parsing a font description name, defined as a string of the
55  // format:
56  // "FamilyName [FaceName] [PointSize]"
57  // where a missing FaceName implies the default regular face.
58  // eg. "Arial Italic 12", "Verdana"
59  //
60  // FaceName is a combination of:
61  // [StyleName] [Variant] [Weight] [Stretch]
62  // with (all optional) Pango-defined values of:
63  // StyleName: Oblique, Italic
64  // Variant : Small-Caps
65  // Weight : Ultra-Light, Light, Medium, Semi-Bold, Bold, Ultra-Bold, Heavy
66  // Stretch : Ultra-Condensed, Extra-Condensed, Condensed, Semi-Condensed,
67  // Semi-Expanded, Expanded, Extra-Expanded, Ultra-Expanded.
68  explicit PangoFontInfo(const string& name);
69  bool ParseFontDescriptionName(const string& name);
70 
71  // Returns true if the font have codepoint coverage for the specified text.
72  bool CoversUTF8Text(const char* utf8_text, int byte_length) const;
73  // Modifies string to remove unicode points that are not covered by the
74  // font. Returns the number of characters dropped.
75  int DropUncoveredChars(string* utf8_text) const;
76 
77  // Returns true if the entire string can be rendered by the font with full
78  // character coverage and no unknown glyph or dotted-circle glyph
79  // substitutions on encountering a badly formed unicode sequence.
80  // If true, returns individual graphemes. Any whitespace characters in the
81  // original string are also included in the list.
82  bool CanRenderString(const char* utf8_word, int len,
83  std::vector<string>* graphemes) const;
84  bool CanRenderString(const char* utf8_word, int len) const;
85 
86  // Retrieves the x_bearing and x_advance for the given utf8 character in the
87  // font. Returns false if the glyph for the character could not be found in
88  // the font.
89  // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html
90  bool GetSpacingProperties(const string& utf8_char,
91  int* x_bearing, int* x_advance) const;
92 
93  // If not already initialized, initializes FontConfig by setting its
94  // environment variable and creating a fonts.conf file that points to the
95  // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
96  static void SoftInitFontConfig();
97  // Re-initializes font config, whether or not already initialized.
98  // If already initialized, any existing cache is deleted, just to be sure.
99  static void HardInitFontConfig(const string& fonts_dir,
100  const string& cache_dir);
101 
102  // Accessors
103  string DescriptionName() const;
104  // Font Family name eg. "Arial"
105  const string& family_name() const { return family_name_; }
106  // Size in points (1/72"), rounded to the nearest integer.
107  int font_size() const { return font_size_; }
108  bool is_bold() const { return is_bold_; }
109  bool is_italic() const { return is_italic_; }
110  bool is_smallcaps() const { return is_smallcaps_; }
111  bool is_monospace() const { return is_monospace_; }
112  bool is_fraktur() const { return is_fraktur_; }
113  FontTypeEnum font_type() const { return font_type_; }
114 
115  int resolution() const { return resolution_; }
116  void set_resolution(const int resolution) {
117  resolution_ = resolution;
118  }
119 
120  private:
121  friend class FontUtils;
122  void Clear();
123  bool ParseFontDescription(const PangoFontDescription* desc);
124  // Returns the PangoFont structure corresponding to the closest available font
125  // in the font map.
126  PangoFont* ToPangoFont() const;
127 
128  // Font properties set automatically from parsing the font description name.
129  string family_name_;
130  int font_size_;
131  bool is_bold_;
132  bool is_italic_;
133  bool is_smallcaps_;
134  bool is_monospace_;
135  bool is_fraktur_;
136  FontTypeEnum font_type_;
137  // The Pango description that was used to initialize the instance.
138  PangoFontDescription* desc_;
139  // Default output resolution to assume for GetSpacingProperties() and any
140  // other methods that returns pixel values.
141  int resolution_;
142  // Fontconfig operates through an environment variable, so it intrinsically
143  // cannot be thread-friendly, but you can serialize multiple independent
144  // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir).
145  // These hold the last initialized values set by HardInitFontConfig or
146  // the first call to SoftInitFontConfig.
147  // Directory to be scanned for font files.
148  static string fonts_dir_;
149  // Directory to store the cache of font information. (Can be the same as
150  // fonts_dir_)
151  static string cache_dir_;
152 
153  private:
155  void operator=(const PangoFontInfo&);
156 };
157 
158 // Static utility methods for querying font availability and font-selection
159 // based on codepoint coverage.
160 class FontUtils {
161  public:
162  // Returns true if the font of the given description name is available in the
163  // target directory specified by --fonts_dir
164  static bool IsAvailableFont(const char* font_desc) {
165  return IsAvailableFont(font_desc, nullptr);
166  }
167  // Returns true if the font of the given description name is available in the
168  // target directory specified by --fonts_dir. If false is returned, and
169  // best_match is not nullptr, the closest matching font is returned there.
170  static bool IsAvailableFont(const char* font_desc, string* best_match);
171  // Outputs description names of available fonts.
172  static const std::vector<string>& ListAvailableFonts();
173 
174  // Picks font among available fonts that covers and can render the given word,
175  // and returns the font description name and the decomposition of the word to
176  // graphemes. Returns false if no suitable font was found.
177  static bool SelectFont(const char* utf8_word, const int utf8_len,
178  string* font_name, std::vector<string>* graphemes);
179 
180  // Picks font among all_fonts that covers and can render the given word,
181  // and returns the font description name and the decomposition of the word to
182  // graphemes. Returns false if no suitable font was found.
183  static bool SelectFont(const char* utf8_word, const int utf8_len,
184  const std::vector<string>& all_fonts,
185  string* font_name, std::vector<string>* graphemes);
186 
187  // Returns a bitmask where the value of true at index 'n' implies that unicode
188  // value 'n' is renderable by at least one available font.
189  static void GetAllRenderableCharacters(std::vector<bool>* unichar_bitmap);
190  // Variant of the above function that inspects only the provided font names.
191  static void GetAllRenderableCharacters(const std::vector<string>& font_names,
192  std::vector<bool>* unichar_bitmap);
193  static void GetAllRenderableCharacters(const string& font_name,
194  std::vector<bool>* unichar_bitmap);
195 
196  // NOTE: The following utilities were written to be backward compatible with
197  // StringRender.
198 
199  // BestFonts returns a font name and a bit vector of the characters it
200  // can render for the fonts that score within some fraction of the best
201  // font on the characters in the given hash map.
202  // In the flags vector, each flag is set according to whether the
203  // corresponding character (in order of iterating ch_map) can be rendered.
204  // The return string is a list of the acceptable fonts that were used.
205  static string BestFonts(
206  const std::unordered_map<char32, inT64>& ch_map,
207  std::vector<std::pair<const char*, std::vector<bool> > >* font_flag);
208 
209  // FontScore returns the weighted renderability score of the given
210  // hash map character table in the given font. The unweighted score
211  // is also returned in raw_score.
212  // The values in the bool vector ch_flags correspond to whether the
213  // corresponding character (in order of iterating ch_map) can be rendered.
214  static int FontScore(const std::unordered_map<char32, inT64>& ch_map,
215  const string& fontname, int* raw_score,
216  std::vector<bool>* ch_flags);
217 
218  // PangoFontInfo is reinitialized, so clear the static list of fonts.
219  static void ReInit();
220 
221  private:
222  static std::vector<string> available_fonts_; // cache list
223 };
224 } // namespace tesseract
225 
226 #endif // TESSERACT_TRAINING_PANGO_FONT_INFO_H_
int DropUncoveredChars(string *utf8_text) const
bool CanRenderString(const char *utf8_word, int len, std::vector< string > *graphemes) const
static void HardInitFontConfig(const string &fonts_dir, const string &cache_dir)
bool GetSpacingProperties(const string &utf8_char, int *x_bearing, int *x_advance) const
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
DECLARE_STRING_PARAM_FLAG(fonts_dir)
static bool IsAvailableFont(const char *font_desc)
void set_resolution(const int resolution)
const string & family_name() const
signed int char32
bool ParseFontDescriptionName(const string &name)
FontTypeEnum font_type() const