tesseract  4.00.00dev
clusttool.cpp File Reference
#include "clusttool.h"
#include "const.h"
#include "danerror.h"
#include "emalloc.h"
#include "scanutils.h"
#include <stdio.h>
#include <math.h>

Go to the source code of this file.

Macros

#define TOKENSIZE   80
 
#define QUOTED_TOKENSIZE   "79"
 
#define MAXSAMPLESIZE   65535
 

Functions

uinT16 ReadSampleSize (TFile *fp)
 
PARAM_DESCReadParamDesc (TFile *fp, uinT16 N)
 
PROTOTYPEReadPrototype (TFile *fp, uinT16 N)
 
PROTOSTYLE ReadProtoStyle (const char *shape)
 
FLOAT32ReadNFloats (TFile *fp, uinT16 N, FLOAT32 Buffer[])
 
void WriteParamDesc (FILE *File, uinT16 N, const PARAM_DESC ParamDesc[])
 
void WritePrototype (FILE *File, uinT16 N, PROTOTYPE *Proto)
 
void WriteNFloats (FILE *File, uinT16 N, FLOAT32 Array[])
 
void WriteProtoStyle (FILE *File, PROTOSTYLE ProtoStyle)
 
void WriteProtoList (FILE *File, uinT16 N, PARAM_DESC ParamDesc[], LIST ProtoList, BOOL8 WriteSigProtos, BOOL8 WriteInsigProtos)
 

Macro Definition Documentation

◆ MAXSAMPLESIZE

#define MAXSAMPLESIZE   65535

Definition at line 33 of file clusttool.cpp.

◆ QUOTED_TOKENSIZE

#define QUOTED_TOKENSIZE   "79"

Definition at line 32 of file clusttool.cpp.

◆ TOKENSIZE

#define TOKENSIZE   80

Definition at line 31 of file clusttool.cpp.

Function Documentation

◆ ReadNFloats()

FLOAT32* ReadNFloats ( TFile fp,
uinT16  N,
FLOAT32  Buffer[] 
)

This routine reads N floats from the specified text file and places them into Buffer. If Buffer is NULL, a buffer is created and passed back to the caller. If EOF is encountered before any floats can be read, NULL is returned.

Parameters
Fileopen text file to read floats from
Nnumber of floats to read
Bufferpointer to buffer to place floats into
Returns
Pointer to buffer holding floats or NULL if EOF
Note
Globals: None
Exceptions: ILLEGALFLOAT
History: 6/6/89, DSJ, Created.

Definition at line 220 of file clusttool.cpp.

220  {
221  const int kMaxLineSize = 1024;
222  char line[kMaxLineSize];
223  if (fp->FGets(line, kMaxLineSize) == nullptr) {
224  tprintf("Hit EOF in ReadNFloats!\n");
225  return nullptr;
226  }
227  bool needs_free = false;
228 
229  if (Buffer == NULL) {
230  Buffer = static_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
231  needs_free = true;
232  }
233 
234  char *startptr = line;
235  for (int i = 0; i < N; i++) {
236  char *endptr;
237  Buffer[i] = strtof(startptr, &endptr);
238  if (endptr == startptr) {
239  tprintf("Read of %d floats failed!\n", N);
240  if (needs_free) Efree(Buffer);
241  return nullptr;
242  }
243  startptr = endptr;
244  }
245  return Buffer;
246 }
void * Emalloc(int Size)
Definition: emalloc.cpp:47
#define tprintf(...)
Definition: tprintf.h:31
float FLOAT32
Definition: host.h:42
void Efree(void *ptr)
Definition: emalloc.cpp:79
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:86

◆ ReadParamDesc()

PARAM_DESC* ReadParamDesc ( TFile fp,
uinT16  N 
)

This routine reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions.

Exceptions:

  • ILLEGALCIRCULARSPEC
  • ILLEGALESSENTIALSPEC
  • ILLEGALMINMAXSPEC
    Parameters
    Fileopen text file to read N parameter descriptions from
    Nnumber of parameter descriptions to read
    Returns
    Pointer to an array of parameter descriptors.
    Note
    Globals: None
    History: 6/6/89, DSJ, Created.

Definition at line 73 of file clusttool.cpp.

73  {
74  PARAM_DESC *ParamDesc;
75  char linear_token[TOKENSIZE], essential_token[TOKENSIZE];
76 
77  ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
78  for (int i = 0; i < N; i++) {
79  const int kMaxLineSize = TOKENSIZE * 4;
80  char line[kMaxLineSize];
81  if (fp->FGets(line, kMaxLineSize) == nullptr ||
82  sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %f %f",
83  linear_token, essential_token, &ParamDesc[i].Min,
84  &ParamDesc[i].Max) != 4)
85  DoError(ILLEGALCIRCULARSPEC, "Illegal Parameter specification");
86  if (linear_token[0] == 'c')
87  ParamDesc[i].Circular = TRUE;
88  else
89  ParamDesc[i].Circular = FALSE;
90 
91  if (linear_token[0] == 'e')
92  ParamDesc[i].NonEssential = FALSE;
93  else
94  ParamDesc[i].NonEssential = TRUE;
95  ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
96  ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
97  ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
98  }
99  return (ParamDesc);
100 }
#define TRUE
Definition: capi.h:45
#define QUOTED_TOKENSIZE
Definition: clusttool.cpp:32
FLOAT32 MidRange
Definition: ocrfeatures.h:53
void * Emalloc(int Size)
Definition: emalloc.cpp:47
FLOAT32 Range
Definition: ocrfeatures.h:51
inT8 Circular
Definition: ocrfeatures.h:47
FLOAT32 Min
Definition: ocrfeatures.h:49
#define ILLEGALCIRCULARSPEC
Definition: clusttool.h:56
inT8 NonEssential
Definition: ocrfeatures.h:48
#define FALSE
Definition: capi.h:46
#define TOKENSIZE
Definition: clusttool.cpp:31
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
FLOAT32 Max
Definition: ocrfeatures.h:50
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:86
FLOAT32 HalfRange
Definition: ocrfeatures.h:52

◆ ReadProtoStyle()

PROTOSTYLE ReadProtoStyle ( const char *  shape)

This routine reads an single token from the specified text file and interprets it as a prototype specification.

Parameters
Fileopen text file to read prototype style from
Returns
Prototype style read from text file
Note
Globals: None
Exceptions: ILLEGALSTYLESPEC illegal prototype style specification
History: 6/8/89, DSJ, Created.

Definition at line 191 of file clusttool.cpp.

191  {
192  switch (shape[0]) {
193  case 's':
194  return spherical;
195  case 'e':
196  return elliptical;
197  case 'a':
198  return automatic;
199  default:
200  break;
201  }
202  tprintf("Invalid prototype style specification:%s\n", shape);
203  return elliptical;
204 }
#define tprintf(...)
Definition: tprintf.h:31

◆ ReadPrototype()

PROTOTYPE* ReadPrototype ( TFile fp,
uinT16  N 
)

This routine reads a textual description of a prototype from the specified file.

Exceptions:

  • ILLEGALSIGNIFICANCESPEC
  • ILLEGALSAMPLECOUNT
  • ILLEGALMEANSPEC
  • ILLEGALVARIANCESPEC
  • ILLEGALDISTRIBUTION
    Parameters
    Fileopen text file to read prototype from
    Nnumber of dimensions used in prototype
    Returns
    List of prototypes
    Note
    Globals: None
    History: 6/6/89, DSJ, Created.

Definition at line 118 of file clusttool.cpp.

118  {
119  char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
120  PROTOTYPE *Proto;
121  int SampleCount;
122  int i;
123 
124  const int kMaxLineSize = TOKENSIZE * 4;
125  char line[kMaxLineSize];
126  if (fp->FGets(line, kMaxLineSize) == nullptr ||
127  sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d",
128  sig_token, shape_token, &SampleCount) != 3) {
129  tprintf("Invalid prototype: %s\n", line);
130  return nullptr;
131  }
132  Proto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
133  Proto->Cluster = NULL;
134  if (sig_token[0] == 's')
135  Proto->Significant = TRUE;
136  else
137  Proto->Significant = FALSE;
138 
139  Proto->Style = ReadProtoStyle(shape_token);
140 
141  if (SampleCount < 0) DoError(ILLEGALSAMPLECOUNT, "Illegal sample count");
142  Proto->NumSamples = SampleCount;
143 
144  Proto->Mean = ReadNFloats(fp, N, NULL);
145  if (Proto->Mean == NULL) DoError(ILLEGALMEANSPEC, "Illegal prototype mean");
146 
147  switch (Proto->Style) {
148  case spherical:
149  if (ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) == NULL)
150  DoError(ILLEGALVARIANCESPEC, "Illegal prototype variance");
151  Proto->Magnitude.Spherical =
152  1.0 / sqrt((double)(2.0 * PI * Proto->Variance.Spherical));
153  Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, (float)N);
154  Proto->LogMagnitude = log((double)Proto->TotalMagnitude);
155  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
156  Proto->Distrib = NULL;
157  break;
158  case elliptical:
159  Proto->Variance.Elliptical = ReadNFloats(fp, N, NULL);
160  if (Proto->Variance.Elliptical == NULL)
161  DoError(ILLEGALVARIANCESPEC, "Illegal prototype variance");
162  Proto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
163  Proto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
164  Proto->TotalMagnitude = 1.0;
165  for (i = 0; i < N; i++) {
166  Proto->Magnitude.Elliptical[i] =
167  1.0 / sqrt((double)(2.0 * PI * Proto->Variance.Elliptical[i]));
168  Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
169  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
170  }
171  Proto->LogMagnitude = log((double)Proto->TotalMagnitude);
172  Proto->Distrib = NULL;
173  break;
174  default:
175  Efree(Proto);
176  tprintf("Invalid prototype style\n");
177  return nullptr;
178  }
179  return Proto;
180 }
DISTRIBUTION * Distrib
Definition: cluster.h:77
FLOAT32 LogMagnitude
Definition: cluster.h:80
#define TRUE
Definition: capi.h:45
FLOAT32 * Elliptical
Definition: cluster.h:64
#define QUOTED_TOKENSIZE
Definition: clusttool.cpp:32
#define ILLEGALSAMPLECOUNT
Definition: clusttool.h:60
void * Emalloc(int Size)
Definition: emalloc.cpp:47
#define tprintf(...)
Definition: tprintf.h:31
FLOATUNION Weight
Definition: cluster.h:83
FLOATUNION Variance
Definition: cluster.h:81
unsigned Style
Definition: cluster.h:74
#define PI
Definition: const.h:19
FLOAT32 * ReadNFloats(TFile *fp, uinT16 N, FLOAT32 Buffer[])
Definition: clusttool.cpp:220
#define FALSE
Definition: capi.h:46
FLOAT32 * Mean
Definition: cluster.h:78
#define TOKENSIZE
Definition: clusttool.cpp:31
PROTOSTYLE ReadProtoStyle(const char *shape)
Definition: clusttool.cpp:191
float FLOAT32
Definition: host.h:42
FLOAT32 Spherical
Definition: cluster.h:63
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
unsigned NumSamples
Definition: cluster.h:75
unsigned Significant
Definition: cluster.h:68
CLUSTER * Cluster
Definition: cluster.h:76
#define ILLEGALVARIANCESPEC
Definition: clusttool.h:62
void Efree(void *ptr)
Definition: emalloc.cpp:79
#define ILLEGALMEANSPEC
Definition: clusttool.h:61
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:86
FLOATUNION Magnitude
Definition: cluster.h:82
FLOAT32 TotalMagnitude
Definition: cluster.h:79

◆ ReadSampleSize()

uinT16 ReadSampleSize ( TFile fp)

This routine reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE.

Parameters
Fileopen text file to read sample size from
Returns
Sample size
Note
Globals: None
Exceptions: ILLEGALSAMPLESIZE illegal format or range
History: 6/6/89, DSJ, Created.

Definition at line 47 of file clusttool.cpp.

47  {
48  int SampleSize = 0;
49 
50  const int kMaxLineSize = 100;
51  char line[kMaxLineSize];
52  if (fp->FGets(line, kMaxLineSize) == nullptr ||
53  sscanf(line, "%d", &SampleSize) != 1 || (SampleSize < 0) ||
54  (SampleSize > MAXSAMPLESIZE))
55  DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
56  return (SampleSize);
57 }
#define MAXSAMPLESIZE
Definition: clusttool.cpp:33
#define ILLEGALSAMPLESIZE
Definition: clusttool.h:55
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:86

◆ WriteNFloats()

void WriteNFloats ( FILE *  File,
uinT16  N,
FLOAT32  Array[] 
)

This routine writes a text representation of N floats from an array to a file. All of the floats are placed on one line.

Parameters
Fileopen text file to write N floats to
Nnumber of floats to write
Arrayarray of floats to write
Returns
None
Note
Globals: None
Exceptions: None
History: 6/6/89, DSJ, Created.

Definition at line 338 of file clusttool.cpp.

338  {
339  for (int i = 0; i < N; i++)
340  fprintf(File, " %9.6f", Array[i]);
341  fprintf(File, "\n");
342 }

◆ WriteParamDesc()

void WriteParamDesc ( FILE *  File,
uinT16  N,
const PARAM_DESC  ParamDesc[] 
)

This routine writes an array of dimension descriptors to the specified text file.

Parameters
Fileopen text file to write param descriptors to
Nnumber of param descriptors to write
ParamDescarray of param descriptors to write
Returns
None
Note
Globals: None
Exceptions: None
History: 6/6/89, DSJ, Created.

Definition at line 259 of file clusttool.cpp.

259  {
260  int i;
261 
262  for (i = 0; i < N; i++) {
263  if (ParamDesc[i].Circular)
264  fprintf (File, "circular ");
265  else
266  fprintf (File, "linear ");
267 
268  if (ParamDesc[i].NonEssential)
269  fprintf (File, "non-essential ");
270  else
271  fprintf (File, "essential ");
272 
273  fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
274  }
275 }

◆ WriteProtoList()

void WriteProtoList ( FILE *  File,
uinT16  N,
PARAM_DESC  ParamDesc[],
LIST  ProtoList,
BOOL8  WriteSigProtos,
BOOL8  WriteInsigProtos 
)

This routine writes a textual description of each prototype in the prototype list to the specified file. It also writes a file header which includes the number of dimensions in feature space and the descriptions for each dimension.

Parameters
Fileopen text file to write prototypes to
Nnumber of dimensions in feature space
ParamDescdescriptions for each dimension
ProtoListlist of prototypes to be written
WriteSigProtosTRUE to write out significant prototypes
WriteInsigProtosTRUE to write out insignificants
Note
Globals: None
Returns
None
Note
Exceptions: None
History: 6/12/89, DSJ, Created.

Definition at line 389 of file clusttool.cpp.

391  {
392  PROTOTYPE *Proto;
393 
394  /* write file header */
395  fprintf(File,"%0d\n",N);
396  WriteParamDesc(File,N,ParamDesc);
397 
398  /* write prototypes */
399  iterate(ProtoList)
400  {
401  Proto = (PROTOTYPE *) first_node ( ProtoList );
402  if ((Proto->Significant && WriteSigProtos) ||
403  (!Proto->Significant && WriteInsigProtos))
404  WritePrototype(File, N, Proto);
405  }
406 }
void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[])
Definition: clusttool.cpp:259
void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto)
Definition: clusttool.cpp:288
#define first_node(l)
Definition: oldlist.h:139
unsigned Significant
Definition: cluster.h:68
#define iterate(l)
Definition: oldlist.h:159

◆ WriteProtoStyle()

void WriteProtoStyle ( FILE *  File,
PROTOSTYLE  ProtoStyle 
)

This routine writes to the specified text file a word which represents the ProtoStyle. It does not append a carriage return to the end.

Parameters
Fileopen text file to write prototype style to
ProtoStyleprototype style to write
Returns
None
Note
Globals: None
Exceptions: None
History: 6/8/89, DSJ, Created.

Definition at line 355 of file clusttool.cpp.

355  {
356  switch (ProtoStyle) {
357  case spherical:
358  fprintf (File, "spherical");
359  break;
360  case elliptical:
361  fprintf (File, "elliptical");
362  break;
363  case mixed:
364  fprintf (File, "mixed");
365  break;
366  case automatic:
367  fprintf (File, "automatic");
368  break;
369  }
370 }
Definition: cluster.h:45

◆ WritePrototype()

void WritePrototype ( FILE *  File,
uinT16  N,
PROTOTYPE Proto 
)

This routine writes a textual description of a prototype to the specified text file.

Parameters
Fileopen text file to write prototype to
Nnumber of dimensions in feature space
Protoprototype to write out
Returns
None
Note
Globals: None
Exceptions: None
History: 6/12/89, DSJ, Created.

Definition at line 288 of file clusttool.cpp.

288  {
289  int i;
290 
291  if (Proto->Significant)
292  fprintf (File, "significant ");
293  else
294  fprintf (File, "insignificant ");
295  WriteProtoStyle (File, (PROTOSTYLE) Proto->Style);
296  fprintf (File, "%6d\n\t", Proto->NumSamples);
297  WriteNFloats (File, N, Proto->Mean);
298  fprintf (File, "\t");
299 
300  switch (Proto->Style) {
301  case spherical:
302  WriteNFloats (File, 1, &(Proto->Variance.Spherical));
303  break;
304  case elliptical:
305  WriteNFloats (File, N, Proto->Variance.Elliptical);
306  break;
307  case mixed:
308  for (i = 0; i < N; i++)
309  switch (Proto->Distrib[i]) {
310  case normal:
311  fprintf (File, " %9s", "normal");
312  break;
313  case uniform:
314  fprintf (File, " %9s", "uniform");
315  break;
316  case D_random:
317  fprintf (File, " %9s", "random");
318  break;
319  case DISTRIBUTION_COUNT:
320  ASSERT_HOST(!"Distribution count not allowed!");
321  }
322  fprintf (File, "\n\t");
323  WriteNFloats (File, N, Proto->Variance.Elliptical);
324  }
325 }
DISTRIBUTION * Distrib
Definition: cluster.h:77
void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle)
Definition: clusttool.cpp:355
FLOAT32 * Elliptical
Definition: cluster.h:64
#define ASSERT_HOST(x)
Definition: errcode.h:84
FLOATUNION Variance
Definition: cluster.h:81
unsigned Style
Definition: cluster.h:74
void WriteNFloats(FILE *File, uinT16 N, FLOAT32 Array[])
Definition: clusttool.cpp:338
PROTOSTYLE
Definition: cluster.h:44
FLOAT32 * Mean
Definition: cluster.h:78
Definition: cluster.h:45
FLOAT32 Spherical
Definition: cluster.h:63
Definition: cluster.h:59
unsigned NumSamples
Definition: cluster.h:75
unsigned Significant
Definition: cluster.h:68