22 #include "config_auto.h" 25 #if (defined __MINGW32__) || (defined __CYGWIN__) 27 #undef __STRICT_ANSI__ 29 #if (defined __MINGW32__) 31 #elif !defined(_GNU_SOURCE) 36 #elif defined(_MSC_VER) 38 #define strncasecmp _strnicmp 39 #define strcasecmp _stricmp 46 #include <sys/param.h> 57 #include "pango/pango.h" 58 #include "pango/pangocairo.h" 59 #include "pango/pangofc-font.h" 62 "Overrides fontconfig default temporary dir");
64 #ifndef USE_STD_NAMESPACE 65 #include "ocr/trainingdata/typesetting/legacy_fonts.h" 67 "Overrides --fonts_dir and sets the known universe of fonts to" 68 "the list in legacy_fonts.h");
71 "Overrides system default font location");
75 "If empty it use system default. Otherwise it overrides" 76 " system default font location");
85 string PangoFontInfo::fonts_dir_;
86 string PangoFontInfo::cache_dir_;
89 : desc_(nullptr), resolution_(kDefaultResolution) {
94 : desc_(nullptr), resolution_(kDefaultResolution) {
96 tprintf(
"ERROR: Could not parse %s\n", desc.c_str());
101 void PangoFontInfo::Clear() {
105 is_smallcaps_ =
false;
106 is_monospace_ =
false;
107 family_name_.clear();
110 pango_font_description_free(desc_);
118 if (!desc_)
return "";
119 char* desc_str = pango_font_description_to_string(desc_);
120 string desc_name(desc_str);
130 if (fonts_dir_.empty()) {
132 FLAGS_fontconfig_tmpdir.c_str());
140 const string& cache_dir) {
141 if (!cache_dir_.empty()) {
145 const int MAX_FONTCONF_FILESIZE = 1024;
146 char fonts_conf_template[MAX_FONTCONF_FILESIZE];
147 cache_dir_ = cache_dir;
148 fonts_dir_ = fonts_dir;
149 snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
150 "<?xml version=\"1.0\"?>\n" 151 "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n" 154 "<cachedir>%s</cachedir>\n" 155 "<config></config>\n" 157 fonts_dir.c_str(), cache_dir_.c_str());
158 string fonts_conf_file =
File::JoinPath(cache_dir_.c_str(),
"fonts.conf");
161 std::string env(
"FONTCONFIG_PATH=");
162 env.append(cache_dir_.c_str());
164 putenv(
"LANG=en_US.utf8");
166 setenv(
"FONTCONFIG_PATH", cache_dir_.c_str(),
true);
168 setenv(
"LANG",
"en_US.utf8",
true);
171 if (FcInitReinitialize() != FcTrue) {
172 tprintf(
"FcInitiReinitialize failed!!\n");
176 pango_cairo_font_map_set_default(
nullptr);
179 static void ListFontFamilies(PangoFontFamily*** families,
182 PangoFontMap* font_map = pango_cairo_font_map_get_default();
184 pango_font_map_list_families(font_map, families, n_families);
189 static bool IsMonospaceFontFamily(
const char*
family_name) {
190 PangoFontFamily** families = 0;
193 ListFontFamilies(&families, &n_families);
196 for (
int i = 0; i < n_families; ++i) {
197 if (!strcasecmp(family_name, pango_font_family_get_name(families[i]))) {
198 is_monospace = pango_font_family_is_monospace(families[i]);
204 tlog(1,
"Could not find monospace property of family %s\n", family_name);
210 bool PangoFontInfo::ParseFontDescription(
const PangoFontDescription *desc) {
212 const char* family = pango_font_description_get_family(desc);
214 char* desc_str = pango_font_description_to_string(desc);
215 tprintf(
"WARNING: Could not parse family name from description: '%s'\n",
220 family_name_ = string(family);
221 desc_ = pango_font_description_copy(desc);
222 is_monospace_ = IsMonospaceFontFamily(family);
225 font_size_ = pango_font_description_get_size(desc);
226 if (!pango_font_description_get_size_is_absolute(desc)) {
227 font_size_ /= PANGO_SCALE;
230 PangoStyle style = pango_font_description_get_style(desc);
231 is_italic_ = (PANGO_STYLE_ITALIC == style ||
232 PANGO_STYLE_OBLIQUE == style);
233 is_smallcaps_ = (pango_font_description_get_variant(desc)
234 == PANGO_VARIANT_SMALL_CAPS);
236 is_bold_ = (pango_font_description_get_weight(desc) >= PANGO_WEIGHT_BOLD);
240 is_fraktur_ = (
strcasestr(family,
"Fraktur") !=
nullptr);
245 PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
246 bool success = ParseFontDescription(desc);
247 pango_font_description_free(desc);
254 PangoFont* PangoFontInfo::ToPangoFont()
const {
256 PangoFontMap* font_map = pango_cairo_font_map_get_default();
257 PangoContext* context = pango_context_new();
258 pango_cairo_context_set_resolution(context, resolution_);
259 pango_context_set_font_map(context, font_map);
260 PangoFont* font =
nullptr;
263 font = pango_font_map_load_font(font_map, context, desc_);
265 g_object_unref(context);
270 PangoFont* font = ToPangoFont();
271 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
277 if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
279 int len = it.get_utf8(tmp);
281 tlog(2,
"'%s' (U+%x) not covered by font\n", tmp, *it);
290 static char* my_strnmove(
char* dest,
const char* src,
size_t n) {
299 }
while (n && src[0]);
311 PangoFont* font = ToPangoFont();
312 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
313 int num_dropped_chars = 0;
317 char* out =
const_cast<char*
>(utf8_text->c_str());
324 if (!it.is_legal()) {
330 const char* utf8_char = it.utf8_data();
333 if (!
IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
334 pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
338 tlog(2,
"'%s' (U+%x) not covered by font\n", str, unicode);
344 my_strnmove(out, utf8_char, utf8_len);
347 utf8_text->resize(out - utf8_text->c_str());
348 return num_dropped_chars;
352 int* x_bearing,
int* x_advance)
const {
354 PangoFont* font = ToPangoFont();
356 int total_advance = 0;
366 PangoGlyph glyph_index = pango_fc_font_get_glyph(
367 reinterpret_cast<PangoFcFont*>(font), *it);
373 PangoRectangle ink_rect, logical_rect;
374 pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
375 pango_extents_to_pixels(&ink_rect,
nullptr);
376 pango_extents_to_pixels(&logical_rect,
nullptr);
378 int bearing = total_advance + PANGO_LBEARING(ink_rect);
379 if (it == it_begin || bearing < min_bearing) {
380 min_bearing = bearing;
382 total_advance += PANGO_RBEARING(logical_rect);
384 *x_bearing = min_bearing;
385 *x_advance = total_advance;
390 std::vector<string> graphemes;
395 std::vector<string>* graphemes)
const {
396 if (graphemes) graphemes->clear();
405 const char32 kDottedCircleGlyph = 9676;
406 bool bad_glyph =
false;
407 PangoFontMap* font_map = pango_cairo_font_map_get_default();
408 PangoContext* context = pango_context_new();
409 pango_context_set_font_map(context, font_map);
414 layout = pango_layout_new(context);
417 pango_layout_set_font_description(layout, desc_);
419 PangoFontDescription *desc = pango_font_description_from_string(
421 pango_layout_set_font_description(layout, desc);
422 pango_font_description_free(desc);
424 pango_layout_set_text(layout, utf8_word, len);
425 PangoLayoutIter* run_iter =
nullptr;
428 run_iter = pango_layout_get_iter(layout);
431 PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
433 tlog(2,
"Found end of line nullptr run marker\n");
436 PangoGlyph dotted_circle_glyph;
437 PangoFont* font = run->item->analysis.font;
439 #ifdef _WIN32 // Fixme! Leaks memory and breaks unittests. 440 PangoGlyphString* glyphs = pango_glyph_string_new();
441 char s[] =
"\xc2\xa7";
442 pango_shape(s,
sizeof(s), &(run->item->analysis), glyphs);
443 dotted_circle_glyph = glyphs->glyphs[0].glyph;
445 dotted_circle_glyph = pango_fc_font_get_glyph(
446 reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
450 PangoFontDescription* desc = pango_font_describe(font);
451 char* desc_str = pango_font_description_to_string(desc);
452 tlog(2,
"Desc of font in run: %s\n", desc_str);
454 pango_font_description_free(desc);
457 PangoGlyphItemIter cluster_iter;
458 gboolean have_cluster;
459 for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
461 have_cluster && !bad_glyph;
462 have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
463 const int start_byte_index = cluster_iter.start_index;
464 const int end_byte_index = cluster_iter.end_index;
465 int start_glyph_index = cluster_iter.start_glyph;
466 int end_glyph_index = cluster_iter.end_glyph;
467 string cluster_text = string(utf8_word + start_byte_index,
468 end_byte_index - start_byte_index);
469 if (graphemes) graphemes->push_back(cluster_text);
471 tlog(2,
"Skipping whitespace\n");
475 printf(
"start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
476 start_byte_index, end_byte_index,
477 start_glyph_index, end_glyph_index);
479 for (
int i = start_glyph_index,
480 step = (end_glyph_index > start_glyph_index) ? 1 : -1;
481 !bad_glyph && i != end_glyph_index; i+= step) {
482 const bool unknown_glyph =
483 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
484 PANGO_GLYPH_UNKNOWN_FLAG);
485 const bool illegal_glyph =
486 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
487 dotted_circle_glyph);
488 bad_glyph = unknown_glyph || illegal_glyph;
490 printf(
"(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
495 printf(
" '%s'\n", cluster_text.c_str());
498 tlog(1,
"Found illegal glyph!\n");
500 }
while (!bad_glyph && pango_layout_iter_next_run(run_iter));
502 pango_layout_iter_free(run_iter);
503 g_object_unref(context);
504 g_object_unref(layout);
505 if (bad_glyph && graphemes) graphemes->clear();
511 std::vector<string> FontUtils::available_fonts_;
524 string* best_match) {
525 string query_desc(input_query_desc);
526 #if (PANGO_VERSION <= 12005) 528 query_desc.erase(std::remove(query_desc.begin(), query_desc.end(),
','),
530 const string kMediumStr =
" Medium";
531 std::size_t found = query_desc.find(kMediumStr);
532 if (found != std::string::npos) {
533 query_desc.erase(found, kMediumStr.length());
536 PangoFontDescription *desc = pango_font_description_from_string(
538 PangoFont* selected_font =
nullptr;
541 PangoFontMap* font_map = pango_cairo_font_map_get_default();
542 PangoContext* context = pango_context_new();
543 pango_context_set_font_map(context, font_map);
546 selected_font = pango_font_map_load_font(font_map, context, desc);
548 g_object_unref(context);
550 if (selected_font ==
nullptr) {
551 pango_font_description_free(desc);
554 PangoFontDescription* selected_desc = pango_font_describe(selected_font);
556 bool equal = pango_font_description_equal(desc, selected_desc);
557 tlog(3,
"query weight = %d \t selected weight =%d\n",
558 pango_font_description_get_weight(desc),
559 pango_font_description_get_weight(selected_desc));
561 char* selected_desc_str = pango_font_description_to_string(selected_desc);
562 tlog(2,
"query_desc: '%s' Selected: '%s'\n", query_desc.c_str(),
564 if (!equal && best_match !=
nullptr) {
565 *best_match = selected_desc_str;
568 int len = best_match->size();
569 if (len > 2 && best_match->at(len - 1) ==
'0' &&
570 best_match->at(len - 2) ==
' ') {
571 *best_match = best_match->substr(0, len - 2);
574 g_free(selected_desc_str);
575 pango_font_description_free(selected_desc);
576 g_object_unref(selected_font);
577 pango_font_description_free(desc);
581 static bool ShouldIgnoreFontFamilyName(
const char* query) {
582 static const char* kIgnoredFamilyNames[] = {
"Sans",
"Serif",
"Monospace",
584 const char** list = kIgnoredFamilyNames;
585 for (; *list !=
nullptr; ++list) {
586 if (!strcmp(*list, query))
595 if (!available_fonts_.empty()) {
596 return available_fonts_;
598 #ifndef USE_STD_NAMESPACE 599 if (FLAGS_use_only_legacy_fonts) {
601 tprintf(
"Using list of legacy fonts only\n");
602 const int kNumFontLists = 4;
603 for (
int i = 0; i < kNumFontLists; ++i) {
604 for (
int j = 0; kFontlists[i][j] !=
nullptr; ++j) {
605 available_fonts_.push_back(kFontlists[i][j]);
608 return available_fonts_;
612 PangoFontFamily** families = 0;
614 ListFontFamilies(&families, &n_families);
615 for (
int i = 0; i < n_families; ++i) {
616 const char* family_name = pango_font_family_get_name(families[i]);
617 tlog(2,
"Listing family %s\n", family_name);
618 if (ShouldIgnoreFontFamilyName(family_name)) {
623 PangoFontFace** faces =
nullptr;
624 pango_font_family_list_faces(families[i], &faces, &n_faces);
625 for (
int j = 0; j < n_faces; ++j) {
626 PangoFontDescription* desc = pango_font_face_describe(faces[j]);
627 char* desc_str = pango_font_description_to_string(desc);
628 if (IsAvailableFont(desc_str)) {
629 available_fonts_.push_back(desc_str);
631 pango_font_description_free(desc);
637 std::sort(available_fonts_.begin(), available_fonts_.end());
638 return available_fonts_;
642 static void CharCoverageMapToBitmap(PangoCoverage* coverage,
643 std::vector<bool>* unichar_bitmap) {
644 const int kMinUnicodeValue = 33;
645 const int kMaxUnicodeValue = 0x10FFFF;
646 unichar_bitmap->resize(kMaxUnicodeValue + 1,
false);
648 for (
int i = kMinUnicodeValue; i <= kMaxUnicodeValue; ++i) {
651 = (pango_coverage_get(coverage, i) == PANGO_COVERAGE_EXACT);
658 const std::vector<string>& all_fonts = ListAvailableFonts();
659 return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
664 std::vector<bool>* unichar_bitmap) {
666 PangoCoverage* coverage =
667 pango_font_get_coverage(font_info.ToPangoFont(),
nullptr);
668 CharCoverageMapToBitmap(coverage, unichar_bitmap);
673 std::vector<bool>* unichar_bitmap) {
675 PangoCoverage* all_coverage = pango_coverage_new();
676 tlog(1,
"Processing %u fonts\n", static_cast<unsigned>(fonts.size()));
677 for (
unsigned i = 0; i < fonts.size(); ++i) {
679 PangoCoverage* coverage =
680 pango_font_get_coverage(font_info.ToPangoFont(),
nullptr);
682 pango_coverage_max(all_coverage, coverage);
684 CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
685 pango_coverage_unref(all_coverage);
693 const string& fontname,
int* raw_score,
694 std::vector<bool>* ch_flags) {
697 tprintf(
"ERROR: Could not parse %s\n", fontname.c_str());
699 PangoFont* font = font_info.ToPangoFont();
700 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
704 ch_flags->reserve(ch_map.size());
708 for (std::unordered_map<char32, inT64>::const_iterator it = ch_map.begin();
709 it != ch_map.end(); ++it) {
711 (pango_coverage_get(coverage, it->first)
712 == PANGO_COVERAGE_EXACT));
715 ok_chars += it->second;
718 ch_flags->push_back(covered);
727 const std::unordered_map<char32, inT64>& ch_map,
728 std::vector<std::pair<
const char*, std::vector<bool> > >* fonts) {
729 const double kMinOKFraction = 0.99;
732 const double kMinWeightedFraction = 0.99995;
735 std::vector<std::vector<bool> > font_flags;
736 std::vector<int> font_scores;
737 std::vector<int> raw_scores;
738 int most_ok_chars = 0;
739 int best_raw_score = 0;
741 for (
unsigned i = 0; i < font_names.size(); ++i) {
742 std::vector<bool> ch_flags;
744 int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
745 most_ok_chars =
MAX(ok_chars, most_ok_chars);
746 best_raw_score =
MAX(raw_score, best_raw_score);
748 font_flags.push_back(ch_flags);
749 font_scores.push_back(ok_chars);
750 raw_scores.push_back(raw_score);
761 int least_good_enough =
static_cast<int>(most_ok_chars * kMinOKFraction);
762 int least_raw_enough =
static_cast<int>(best_raw_score * kMinOKFraction);
763 int override_enough =
static_cast<int>(most_ok_chars * kMinWeightedFraction);
766 for (
unsigned i = 0; i < font_names.size(); ++i) {
767 int score = font_scores[i];
768 int raw_score = raw_scores[i];
769 if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
770 score >= override_enough) {
771 fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
772 tlog(1,
"OK font %s = %.4f%%, raw = %d = %.2f%%\n",
773 font_names[i].c_str(),
774 100.0 * score / most_ok_chars,
775 raw_score, 100.0 * raw_score / best_raw_score);
776 font_list += font_names[i];
778 }
else if (score >= least_good_enough || raw_score >= least_raw_enough) {
779 tlog(1,
"Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
780 font_names[i].c_str(),
781 100.0 * score / most_ok_chars,
782 raw_score, 100.0 * raw_score / best_raw_score);
790 string* font_name, std::vector<string>* graphemes) {
791 return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
797 const std::vector<string>& all_fonts,
798 string* font_name, std::vector<string>* graphemes) {
799 if (font_name) font_name->clear();
800 if (graphemes) graphemes->clear();
801 for (
unsigned i = 0; i < all_fonts.size(); ++i) {
803 std::vector<string> found_graphemes;
805 "Could not parse font desc name %s\n",
806 all_fonts[i].c_str());
808 if (graphemes) graphemes->swap(found_graphemes);
809 if (font_name) *font_name = all_fonts[i];
static const_iterator begin(const char *utf8_str, const int byte_length)
static void WriteStringToFileOrDie(const string &str, const string &filename)
static void SoftInitFontConfig()
#define ASSERT_HOST_MSG(x,...)
int DropUncoveredChars(string *utf8_text) const
static bool DeleteMatchingFiles(const char *pattern)
static const std::vector< string > & ListAvailableFonts()
static int FontScore(const std::unordered_map< char32, inT64 > &ch_map, const string &fontname, int *raw_score, std::vector< bool > *ch_flags)
STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp", "Overrides fontconfig default temporary dir")
static const_iterator end(const char *utf8_str, const int byte_length)
bool CanRenderString(const char *utf8_word, int len, std::vector< string > *graphemes) const
static string BestFonts(const std::unordered_map< char32, inT64 > &ch_map, std::vector< std::pair< const char *, std::vector< bool > > > *font_flag)
#define TLOG_IS_ON(level)
static void HardInitFontConfig(const string &fonts_dir, const string &cache_dir)
char * strcasestr(const char *haystack, const char *needle)
Locatea substring into a string, ignoring case.
bool is_monospace() const
BOOL_PARAM_FLAG(use_only_legacy_fonts, false, "Overrides --fonts_dir and sets the known universe of fonts to" "the list in legacy_fonts.h")
#define DISABLE_HEAP_LEAK_CHECK
static string JoinPath(const string &prefix, const string &suffix)
bool GetSpacingProperties(const string &utf8_char, int *x_bearing, int *x_advance) const
static bool SelectFont(const char *utf8_word, const int utf8_len, string *font_name, std::vector< string > *graphemes)
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
static void GetAllRenderableCharacters(std::vector< bool > *unichar_bitmap)
string DescriptionName() const
static bool IsAvailableFont(const char *font_desc)
bool IsWhitespace(const char32 ch)
const string & family_name() const
bool IsUTF8Whitespace(const char *text)
bool ParseFontDescriptionName(const string &name)
const int kDefaultResolution
bool IsInterchangeValid(const char32 ch)