42 for (
int r = 0; r < results.
size(); ++r) {
59 for (
int r = 0; r < results.
size(); ++r) {
60 if (results[r].unichar_id == unichar_id)
68 if (fwrite(&unichar_id,
sizeof(unichar_id), 1, fp) != 1)
return false;
69 if (!font_ids.Serialize(fp))
return false;
75 if (fp->
FReadEndian(&unichar_id,
sizeof(unichar_id), 1) != 1)
return false;
76 if (!font_ids.DeSerialize(fp))
return false;
89 uinT8 sorted = unichars_sorted_;
90 if (fwrite(&sorted,
sizeof(sorted), 1, fp) != 1)
92 if (!unichars_.SerializeClasses(fp))
return false;
99 if (fp->
FRead(&sorted,
sizeof(sorted), 1) != 1)
return false;
100 unichars_sorted_ = sorted != 0;
101 return unichars_.DeSerializeClasses(fp);
107 for (
int c = 0; c < unichars_.size(); ++c) {
108 if (unichars_[c].unichar_id == unichar_id) {
111 for (
int f = 0; f < font_list.
size(); ++f) {
112 if (font_list[f] == font_id)
121 unichars_sorted_ = unichars_.size() <= 1;
126 for (
int c = 0; c < other.unichars_.size(); ++c) {
127 for (
int f = 0; f < other.unichars_[c].font_ids.size(); ++f) {
128 AddToShape(other.unichars_[c].unichar_id,
129 other.unichars_[c].font_ids[f]);
132 unichars_sorted_ = unichars_.size() <= 1;
137 for (
int c = 0; c < unichars_.size(); ++c) {
138 if (unichars_[c].unichar_id == unichar_id) {
141 for (
int f = 0; f < font_list.
size(); ++f) {
142 if (font_list[f] == font_id)
153 for (
int c = 0; c < unichars_.size(); ++c) {
154 if (unichars_[c].unichar_id == unichar_id) {
163 for (
int c = 0; c < unichars_.size(); ++c) {
165 for (
int f = 0; f < font_list.
size(); ++f) {
166 if (font_list[f] == font_id)
175 uinT32 properties)
const {
176 for (
int c = 0; c < unichars_.size(); ++c) {
178 for (
int f = 0; f < font_list.
size(); ++f) {
179 if (font_table.
get(font_list[f]).properties == properties)
189 uinT32 properties = font_table.
get(unichars_[0].font_ids[0]).properties;
190 for (
int c = 0; c < unichars_.size(); ++c) {
192 for (
int f = 0; f < font_list.
size(); ++f) {
193 if (font_table.
get(font_list[f]).properties != properties)
203 return IsSubsetOf(other) && other.
IsSubsetOf(*
this);
208 for (
int c = 0; c < unichars_.size(); ++c) {
209 int unichar_id = unichars_[c].unichar_id;
211 for (
int f = 0; f < font_list.
size(); ++f) {
223 if (unichars_.size() != other->unichars_.size())
return false;
224 if (!unichars_sorted_) SortUnichars();
225 if (!other->unichars_sorted_) other->SortUnichars();
226 for (
int c = 0; c < unichars_.size(); ++c) {
227 if (unichars_[c].unichar_id != other->unichars_[c].unichar_id)
234 void Shape::SortUnichars() {
236 unichars_sorted_ =
true;
242 : unicharset_(&unicharset), num_fonts_(0) {
247 if (!shape_table_.Serialize(fp))
return false;
253 if (!shape_table_.DeSerialize(fp))
return false;
261 if (num_fonts_ <= 0) {
262 for (
int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
263 const Shape& shape = *shape_table_[shape_id];
264 for (
int c = 0; c < shape.
size(); ++c) {
265 for (
int f = 0; f < shape[c].font_ids.
size(); ++f) {
266 if (shape[c].font_ids[f] >= num_fonts_)
267 num_fonts_ = shape[c].font_ids[f] + 1;
278 for (
int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
279 Shape* shape = shape_table_[shape_id];
280 for (
int c = 0; c < shape->
size(); ++c) {
281 shape->
SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]);
288 if (shape_id < 0 || shape_id >= shape_table_.size())
289 return STRING(
"INVALID_UNICHAR_ID");
293 if (shape.
size() > 100) {
297 for (
int c = 0; c < shape.
size(); ++c) {
301 if (shape.
size() < 10) {
303 result +=
" fonts =";
304 int num_fonts = shape[c].font_ids.
size();
305 if (num_fonts > 10) {
307 result.
add_str_int(
" ... ", shape[c].font_ids[num_fonts - 1]);
309 for (
int f = 0; f < num_fonts; ++f) {
320 int max_unichars = 0;
321 int num_multi_shapes = 0;
322 int num_master_shapes = 0;
323 for (
int s = 0; s < shape_table_.size(); ++s) {
329 if (shape_size > max_unichars)
330 max_unichars = shape_size;
333 result.
add_str_int(
"Number of shapes = ", num_master_shapes);
334 result.
add_str_int(
" max unichars = ", max_unichars);
335 result.
add_str_int(
" number with multiple unichars = ", num_multi_shapes);
343 int index = shape_table_.size();
346 shape_table_.push_back(shape);
347 num_fonts_ =
MAX(num_fonts_, font_id + 1);
355 for (index = 0; index < shape_table_.size() &&
356 !(other == *shape_table_[index]); ++index)
358 if (index == shape_table_.size()) {
360 shape_table_.push_back(shape);
368 delete shape_table_[shape_id];
369 shape_table_[shape_id] = NULL;
370 shape_table_.remove(shape_id);
376 Shape& shape = *shape_table_[shape_id];
378 num_fonts_ =
MAX(num_fonts_, font_id + 1);
383 Shape& shape = *shape_table_[shape_id];
393 for (
int s = 0; s < shape_table_.size(); ++s) {
395 for (
int c = 0; c < shape.
size(); ++c) {
396 if (shape[c].unichar_id == unichar_id) {
399 for (
int f = 0; f < shape[c].font_ids.
size(); ++f) {
400 if (shape[c].font_ids[f] == font_id)
411 int* unichar_id,
int* font_id)
const {
412 const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0];
414 *font_id = unichar_and_fonts.
font_ids[0];
422 for (
int u_ind = 0; u_ind < shape.
size(); ++u_ind) {
423 for (
int f_ind = 0; f_ind < shape[u_ind].font_ids.
size(); ++f_ind) {
424 int c = shape[u_ind].unichar_id;
425 int f = shape[u_ind].font_ids[f_ind];
426 int master_id = master_shapes.
FindShape(c, f);
427 if (master_id >= 0) {
428 shape_map.SetBit(master_id);
435 for (
int s = 0; s < master_shapes.
NumShapes(); ++s) {
452 for (
int s1 = 0; s1 < num_shapes; ++s1) {
462 int max_num_unichars = 0;
464 for (
int s = 0; s < num_shapes; ++s) {
468 return max_num_unichars;
475 for (
int s1 = start; s1 < end; ++s1) {
477 int unichar_id =
GetShape(s1)[0].unichar_id;
478 for (
int s2 = s1 + 1; s2 < end; ++s2) {
480 unichar_id ==
GetShape(s2)[0].unichar_id) {
502 for (
int c = 0; c < shape.
size(); ++c) {
503 font_count += shape[c].font_ids.
size();
513 Shape combined_shape(*shape_table_[master_id1]);
514 combined_shape.
AddShape(*shape_table_[master_id2]);
515 return combined_shape.
size();
523 shape_table_[master_id2]->set_destination_index(master_id1);
525 shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
530 Shape* tmp = shape_table_[shape_id1];
531 shape_table_[shape_id1] = shape_table_[shape_id2];
532 shape_table_[shape_id2] = tmp;
538 int dest_id = shape_table_[shape_id]->destination_index();
539 if (dest_id == shape_id || dest_id < 0)
541 int master_id = shape_table_[dest_id]->destination_index();
542 if (master_id == dest_id || master_id < 0)
553 for (c1 = 0; c1 < shape1.
size(); ++c1) {
554 int unichar_id1 = shape1[c1].unichar_id;
558 for (c2 = 0; c2 < shape2.
size(); ++c2) {
559 int unichar_id2 = shape2[c2].unichar_id;
563 return c1 == shape1.
size() || c2 == shape2.
size();
568 int shape_id)
const {
573 for (cs = 0; cs < shape.
size(); ++cs) {
574 int unichar_id = shape[cs].unichar_id;
579 for (cm1 = 0; cm1 < merge1.
size(); ++cm1) {
580 int unichar_id1 = merge1[cm1].unichar_id;
584 for (cm2 = 0; cm2 < merge2.
size(); ++cm2) {
585 int unichar_id2 = merge2[cm2].unichar_id;
589 return cs == shape.
size() || (cm1 == merge1.
size() && cm2 == merge2.
size());
596 for (
int c1 = 0; c1 < shape1.
size(); ++c1) {
597 int unichar_id1 = shape1[c1].unichar_id;
601 for (
int c2 = 0; c2 < shape2.
size(); ++c2) {
602 int unichar_id2 = shape2[c2].unichar_id;
611 int shape_id)
const {
615 for (
int cs = 0; cs < shape.
size(); ++cs) {
616 int unichar_id = shape[cs].unichar_id;
621 for (
int cm1 = 0; cm1 < merge1.
size(); ++cm1) {
622 int unichar_id1 = merge1[cm1].unichar_id;
626 for (
int cm2 = 0; cm2 < merge2.
size(); ++cm2) {
627 int unichar_id2 = merge2[cm2].unichar_id;
638 for (
int c1 = 0; c1 < shape1.
size(); ++c1) {
639 int unichar_id1 = shape1[c1].unichar_id;
650 for (
int c1 = 0; c1 < shape1.
size(); ++c1) {
652 for (
int f = 0; f < font_list1.
size(); ++f) {
664 if (shape_map != NULL)
666 for (
int s = 0; s < other.shape_table_.size(); ++s) {
667 if (other.shape_table_[s]->destination_index() < 0) {
668 int index =
AddShape(*other.shape_table_[s]);
669 if (shape_map != NULL)
670 (*shape_map)[s] = index;
678 for (
int s = 0; s < shape_table_.size(); ++s) {
679 if (shape_table_[s]->destination_index() < 0)
696 if (shape_rating.
joined) {
700 if (shape_rating.
broken) {
705 for (
int u = 0;
u < shape.
size(); ++
u) {
706 int result_index = AddUnicharToResults(shape[
u].unichar_id,
708 unichar_map, results);
709 for (
int f = 0; f < shape[
u].font_ids.
size(); ++f) {
710 (*results)[result_index].fonts.push_back(
719 int ShapeTable::AddUnicharToResults(
722 int result_index = unichar_map->
get(unichar_id);
723 if (result_index < 0) {
725 result_index = results->
push_back(result);
726 (*unichar_map)[unichar_id] = result_index;
bool operator==(const Shape &other) const
const UNICHARSET & unicharset() const
bool IsEqualUnichars(Shape *other)
void add_str_int(const char *str, int number)
void AppendMasterShapes(const ShapeTable &other, GenericVector< int > *shape_map)
static int SortByUnicharId(const void *v1, const void *v2)
bool Serialize(FILE *fp) const
bool DeSerialize(TFile *fp)
int BuildFromShape(const Shape &shape, const ShapeTable &master_shapes)
int NumMasterShapes() const
int MasterUnicharCount(int shape_id) const
static int FirstResultWithUnichar(const GenericVector< ShapeRating > &results, const ShapeTable &shape_table, UNICHAR_ID unichar_id)
bool DeSerialize(TFile *fp)
bool SubsetUnichar(int shape_id1, int shape_id2) const
void init_to_size(int size, T t)
STRING SummaryStr() const
bool Serialize(FILE *fp) const
bool CommonFont(int shape_id1, int shape_id2) const
int MergedUnicharCount(int shape_id1, int shape_id2) const
bool AnyMultipleUnichars() const
STRING DebugStr(int shape_id) const
int MaxNumUnichars() const
void MergeShapes(int shape_id1, int shape_id2)
bool EqualUnichars(int shape_id1, int shape_id2) const
bool IsSubsetOf(const Shape &other) const
bool ContainsFontProperties(const FontInfoTable &font_table, uinT32 properties) const
bool ContainsMultipleFontProperties(const FontInfoTable &font_table) const
bool CommonUnichars(int shape_id1, int shape_id2) const
bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const
bool DeSerialize(TFile *fp)
int FReadEndian(void *buffer, int size, int count)
int MasterFontCount(int shape_id) const
int IntCastRounded(double x)
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const
const char * id_to_unichar(UNICHAR_ID id) const
void AddToShape(int shape_id, int unichar_id, int font_id)
int MasterDestinationIndex(int shape_id) const
void AddShapeToShape(int shape_id, const Shape &other)
const Shape & GetShape(int shape_id) const
static int FirstResultWithUnichar(const GenericVector< UnicharRating > &results, UNICHAR_ID unichar_id)
void ForceFontMerges(int start, int end)
bool ContainsFont(int font_id) const
void ReMapClassIds(const GenericVector< int > &unicharset_map)
void AddToShape(int unichar_id, int font_id)
GenericVector< inT32 > font_ids
void AddShape(const Shape &other)
bool ContainsUnicharAndFont(int unichar_id, int font_id) const
bool ContainsUnichar(int unichar_id) const
int AddShape(int unichar_id, int font_id)
void AddShapeToResults(const ShapeRating &shape_rating, GenericVector< int > *unichar_map, GenericVector< UnicharRating > *results) const
void SetUnicharId(int index, int unichar_id)
int FindShape(int unichar_id, int font_id) const
bool Serialize(FILE *fp) const
bool AlreadyMerged(int shape_id1, int shape_id2) const
void SwapShapes(int shape_id1, int shape_id2)
void DeleteShape(int shape_id)
int FRead(void *buffer, int size, int count)