tesseract  4.00.00dev
adaptive.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: adaptive.c
3  ** Purpose: Adaptive matcher.
4  ** Author: Dan Johnson
5  ** History: Fri Mar 8 10:00:21 1991, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 /*----------------------------------------------------------------------------
20  Include Files and Type Defines
21 ----------------------------------------------------------------------------*/
22 #include "adaptive.h"
23 #include "emalloc.h"
24 #include "globals.h"
25 #include "classify.h"
26 
27 #ifdef __UNIX__
28 #include <assert.h>
29 #endif
30 #include <stdio.h>
31 
32 using tesseract::TFile;
33 
34 /*----------------------------------------------------------------------------
35  Public Code
36 ----------------------------------------------------------------------------*/
37 /*---------------------------------------------------------------------------*/
51  ADAPT_CLASS Class,
52  CLASS_ID ClassId) {
53  INT_CLASS IntClass;
54 
55  assert (Templates != NULL);
56  assert (Class != NULL);
57  assert (LegalClassId (ClassId));
58  assert (UnusedClassIdIn (Templates->Templates, ClassId));
59  assert (Class->NumPermConfigs == 0);
60 
61  IntClass = NewIntClass (1, 1);
62  AddIntClass (Templates->Templates, ClassId, IntClass);
63 
64  assert (Templates->Class[ClassId] == NULL);
65  Templates->Class[ClassId] = Class;
66 
67 } /* AddAdaptedClass */
68 
69 
70 /*---------------------------------------------------------------------------*/
82  assert (Config != NULL);
83  FreeBitVector (Config->Protos);
84  free(Config);
85 } /* FreeTempConfig */
86 
87 /*---------------------------------------------------------------------------*/
88 void FreeTempProto(void *arg) {
89  PROTO proto = (PROTO) arg;
90 
91  free(proto);
92 }
93 
95  assert(Config != NULL);
96  delete [] Config->Ambigs;
97  free(Config);
98 }
99 
100 /*---------------------------------------------------------------------------*/
112  ADAPT_CLASS Class;
113  int i;
114 
115  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
116  Class->NumPermConfigs = 0;
117  Class->MaxNumTimesSeen = 0;
118  Class->TempProtos = NIL_LIST;
119 
124 
125  for (i = 0; i < MAX_NUM_CONFIGS; i++)
126  TempConfigFor (Class, i) = NULL;
127 
128  return (Class);
129 
130 } /* NewAdaptedClass */
131 
132 
133 /*-------------------------------------------------------------------------*/
134 void free_adapted_class(ADAPT_CLASS adapt_class) {
135  int i;
136 
137  for (i = 0; i < MAX_NUM_CONFIGS; i++) {
138  if (ConfigIsPermanent (adapt_class, i)
139  && PermConfigFor (adapt_class, i) != NULL)
140  FreePermConfig (PermConfigFor (adapt_class, i));
141  else if (!ConfigIsPermanent (adapt_class, i)
142  && TempConfigFor (adapt_class, i) != NULL)
143  FreeTempConfig (TempConfigFor (adapt_class, i));
144  }
145  FreeBitVector (adapt_class->PermProtos);
146  FreeBitVector (adapt_class->PermConfigs);
147  destroy_nodes (adapt_class->TempProtos, FreeTempProto);
148  Efree(adapt_class);
149 }
150 
151 
152 /*---------------------------------------------------------------------------*/
153 namespace tesseract {
166  ADAPT_TEMPLATES Templates;
167  int i;
168 
169  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
170 
171  Templates->Templates = NewIntTemplates ();
172  Templates->NumPermClasses = 0;
173  Templates->NumNonEmptyClasses = 0;
174 
175  /* Insert an empty class for each unichar id in unicharset */
176  for (i = 0; i < MAX_NUM_CLASSES; i++) {
177  Templates->Class[i] = NULL;
178  if (InitFromUnicharset && i < unicharset.size()) {
179  AddAdaptedClass(Templates, NewAdaptedClass(), i);
180  }
181  }
182 
183  return (Templates);
184 
185 } /* NewAdaptedTemplates */
186 
187 // Returns FontinfoId of the given config of the given adapted class.
189  return (ConfigIsPermanent(Class, ConfigId) ?
190  PermConfigFor(Class, ConfigId)->FontinfoId :
191  TempConfigFor(Class, ConfigId)->FontinfoId);
192 }
193 
194 } // namespace tesseract
195 
196 /*----------------------------------------------------------------------------*/
198 
199  if (templates != NULL) {
200  int i;
201  for (i = 0; i < (templates->Templates)->NumClasses; i++)
202  free_adapted_class (templates->Class[i]);
203  free_int_templates (templates->Templates);
204  Efree(templates);
205  }
206 }
207 
208 
209 /*---------------------------------------------------------------------------*/
221 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
222  int NumProtos = MaxProtoId + 1;
223 
224  TEMP_CONFIG Config = (TEMP_CONFIG) malloc(sizeof(TEMP_CONFIG_STRUCT));
225  Config->Protos = NewBitVector (NumProtos);
226 
227  Config->NumTimesSeen = 1;
228  Config->MaxProtoId = MaxProtoId;
229  Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
230  zero_all_bits (Config->Protos, Config->ProtoVectorSize);
231  Config->FontinfoId = FontinfoId;
232 
233  return (Config);
234 
235 } /* NewTempConfig */
236 
237 
238 /*---------------------------------------------------------------------------*/
249  return (TEMP_PROTO) malloc(sizeof(TEMP_PROTO_STRUCT));
250 } /* NewTempProto */
251 
252 
253 /*---------------------------------------------------------------------------*/
254 namespace tesseract {
267  int i;
268  INT_CLASS IClass;
269  ADAPT_CLASS AClass;
270 
271  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
272  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
273  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
274  fprintf (File, " Id NC NPC NP NPP\n");
275  fprintf (File, "------------------------\n");
276 
277  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
278  IClass = Templates->Templates->Class[i];
279  AClass = Templates->Class[i];
280  if (!IsEmptyAdaptedClass (AClass)) {
281  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
283  IClass->NumConfigs, AClass->NumPermConfigs,
284  IClass->NumProtos,
285  IClass->NumProtos - count (AClass->TempProtos));
286  }
287  }
288  fprintf (File, "\n");
289 
290 } /* PrintAdaptedTemplates */
291 } // namespace tesseract
292 
293 
294 /*---------------------------------------------------------------------------*/
307  int NumTempProtos;
308  int NumConfigs;
309  int i;
310  ADAPT_CLASS Class;
311 
312  /* first read high level adapted class structure */
313  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
314  fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
315 
316  /* then read in the definitions of the permanent protos and configs */
319  fp->FRead(Class->PermProtos, sizeof(uinT32),
321  fp->FRead(Class->PermConfigs, sizeof(uinT32),
323 
324  /* then read in the list of temporary protos */
325  fp->FRead(&NumTempProtos, sizeof(int), 1);
326  Class->TempProtos = NIL_LIST;
327  for (i = 0; i < NumTempProtos; i++) {
328  TEMP_PROTO TempProto = (TEMP_PROTO) malloc(sizeof(TEMP_PROTO_STRUCT));
329  fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
330  Class->TempProtos = push_last (Class->TempProtos, TempProto);
331  }
332 
333  /* then read in the adapted configs */
334  fp->FRead(&NumConfigs, sizeof(int), 1);
335  for (i = 0; i < NumConfigs; i++)
336  if (test_bit (Class->PermConfigs, i))
337  Class->Config[i].Perm = ReadPermConfig(fp);
338  else
339  Class->Config[i].Temp = ReadTempConfig(fp);
340 
341  return (Class);
342 
343 } /* ReadAdaptedClass */
344 
345 
346 /*---------------------------------------------------------------------------*/
347 namespace tesseract {
360  int i;
361  ADAPT_TEMPLATES Templates;
362 
363  /* first read the high level adaptive template struct */
364  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
365  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
366 
367  /* then read in the basic integer templates */
368  Templates->Templates = ReadIntTemplates(fp);
369 
370  /* then read in the adaptive info for each class */
371  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
372  Templates->Class[i] = ReadAdaptedClass(fp);
373  }
374  return (Templates);
375 
376 } /* ReadAdaptedTemplates */
377 } // namespace tesseract
378 
379 
380 /*---------------------------------------------------------------------------*/
393  PERM_CONFIG Config = (PERM_CONFIG) malloc(sizeof(PERM_CONFIG_STRUCT));
394  uinT8 NumAmbigs;
395  fp->FRead(&NumAmbigs, sizeof(uinT8), 1);
396  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
397  fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
398  Config->Ambigs[NumAmbigs] = -1;
399  fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
400 
401  return (Config);
402 
403 } /* ReadPermConfig */
404 
405 
406 /*---------------------------------------------------------------------------*/
419  TEMP_CONFIG Config = (TEMP_CONFIG) malloc(sizeof(TEMP_CONFIG_STRUCT));
420  fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
421 
422  Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
423  fp->FRead(Config->Protos, sizeof(uinT32), Config->ProtoVectorSize);
424 
425  return (Config);
426 
427 } /* ReadTempConfig */
428 
429 
430 /*---------------------------------------------------------------------------*/
443 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
444  int NumTempProtos;
445  LIST TempProtos;
446  int i;
447 
448  /* first write high level adapted class structure */
449  fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
450 
451  /* then write out the definitions of the permanent protos and configs */
452  fwrite ((char *) Class->PermProtos, sizeof (uinT32),
454  fwrite ((char *) Class->PermConfigs, sizeof (uinT32),
456 
457  /* then write out the list of temporary protos */
458  NumTempProtos = count (Class->TempProtos);
459  fwrite ((char *) &NumTempProtos, sizeof (int), 1, File);
460  TempProtos = Class->TempProtos;
461  iterate (TempProtos) {
462  void* proto = first_node(TempProtos);
463  fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File);
464  }
465 
466  /* then write out the adapted configs */
467  fwrite ((char *) &NumConfigs, sizeof (int), 1, File);
468  for (i = 0; i < NumConfigs; i++)
469  if (test_bit (Class->PermConfigs, i))
470  WritePermConfig (File, Class->Config[i].Perm);
471  else
472  WriteTempConfig (File, Class->Config[i].Temp);
473 
474 } /* WriteAdaptedClass */
475 
476 
477 /*---------------------------------------------------------------------------*/
478 namespace tesseract {
490  int i;
491 
492  /* first write the high level adaptive template struct */
493  fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
494 
495  /* then write out the basic integer templates */
496  WriteIntTemplates (File, Templates->Templates, unicharset);
497 
498  /* then write out the adaptive info for each class */
499  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
500  WriteAdaptedClass (File, Templates->Class[i],
501  Templates->Templates->Class[i]->NumConfigs);
502  }
503 } /* WriteAdaptedTemplates */
504 } // namespace tesseract
505 
506 
507 /*---------------------------------------------------------------------------*/
519 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
520  uinT8 NumAmbigs = 0;
521 
522  assert (Config != NULL);
523  while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
524 
525  fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File);
526  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
527  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
528 } /* WritePermConfig */
529 
530 
531 /*---------------------------------------------------------------------------*/
543 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
544  assert (Config != NULL);
545 
546  fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
547  fwrite ((char *) Config->Protos, sizeof (uinT32),
548  Config->ProtoVectorSize, File);
549 
550 } /* WriteTempConfig */
PERM_CONFIG ReadPermConfig(TFile *fp)
Definition: adaptive.cpp:392
ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]
Definition: adaptive.h:70
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:54
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1067
BIT_VECTOR PermConfigs
Definition: adaptive.h:68
int UNICHAR_ID
Definition: unichar.h:33
#define BITSINLONG
Definition: bitvec.h:27
#define MAX_NUM_PROTOS
Definition: intproto.h:47
uinT8 NumConfigs
Definition: intproto.h:110
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:221
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
uinT8 ProtoVectorSize
Definition: adaptive.h:42
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:111
void * Emalloc(int Size)
Definition: emalloc.cpp:47
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:165
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:81
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:54
BIT_VECTOR Protos
Definition: adaptive.h:44
ADAPT_CLASS ReadAdaptedClass(TFile *fp)
Definition: adaptive.cpp:306
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:104
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:332
ADAPT_CLASS_STRUCT * ADAPT_CLASS
Definition: adaptive.h:72
uinT16 NumProtos
Definition: intproto.h:108
#define LegalClassId(c)
Definition: intproto.h:179
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:50
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:359
#define NIL_LIST
Definition: oldlist.h:126
CLUSTERCONFIG Config
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:266
TEMP_PROTO_STRUCT * TEMP_PROTO
Definition: adaptive.h:37
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:89
uinT8 MaxNumTimesSeen
Definition: adaptive.h:65
void WritePermConfig(FILE *File, PERM_CONFIG Config)
Definition: adaptive.cpp:519
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
TEMP_CONFIG Temp
Definition: adaptive.h:58
INT_TEMPLATES Templates
Definition: adaptive.h:76
PERM_CONFIG Perm
Definition: adaptive.h:59
PROTO_ID MaxProtoId
Definition: adaptive.h:43
uint32_t uinT32
Definition: host.h:39
PROTO_STRUCT * PROTO
Definition: protos.h:52
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:197
INT_TEMPLATES ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:761
#define zero_all_bits(array, length)
Definition: bitvec.h:33
void WriteTempConfig(FILE *File, TEMP_CONFIG Config)
Definition: adaptive.cpp:543
void free_adapted_class(ADAPT_CLASS adapt_class)
Definition: adaptive.cpp:134
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:443
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:82
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:101
TEMP_CONFIG_STRUCT * TEMP_CONFIG
Definition: adaptive.h:47
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:723
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:188
UNICHARSET unicharset
Definition: ccutil.h:68
#define first_node(l)
Definition: oldlist.h:139
uinT8 NumTimesSeen
Definition: adaptive.h:41
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:248
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:238
void destroy_nodes(LIST list, void_dest destructor)
Definition: oldlist.cpp:199
void FreeTempProto(void *arg)
Definition: adaptive.cpp:88
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:489
TEMP_CONFIG ReadTempConfig(TFile *fp)
Definition: adaptive.cpp:418
#define test_bit(array, bit)
Definition: bitvec.h:61
BIT_VECTOR PermProtos
Definition: adaptive.h:67
void FreePermConfig(PERM_CONFIG Config)
Definition: adaptive.cpp:94
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
uint8_t uinT8
Definition: host.h:35
int size() const
Definition: unicharset.h:299
uinT8 NumPermConfigs
Definition: adaptive.h:64
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:89
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
void Efree(void *ptr)
Definition: emalloc.cpp:79
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:739
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:92
#define iterate(l)
Definition: oldlist.h:159
int count(LIST var_list)
Definition: oldlist.cpp:103
UNICHAR_ID * Ambigs
Definition: adaptive.h:51
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:664
int FRead(void *buffer, int size, int count)
Definition: serialis.cpp:108