tesseract/a00674_source.html

 /******************************************************************************
  ** Filename: cluster.c
  ** Purpose:  Routines for clustering points in N-D space
  ** Author:   Dan Johnson
  ** History:  5/29/89, DSJ, Created.
  **
  ** (c) Copyright Hewlett-Packard Company, 1988.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
  ** http://www.apache.org/licenses/LICENSE-2.0
  ** Unless required by applicable law or agreed to in writing, software
  ** distributed under the License is distributed on an "AS IS" BASIS,
  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ** See the License for the specific language governing permissions and
  ** limitations under the License.
  ******************************************************************************/
 #include "const.h"
 #include "cluster.h"
 #include "emalloc.h"
 #include "genericheap.h"
 #include "helpers.h"
 #include "kdpair.h"
 #include "matrix.h"
 #include "tprintf.h"
 #include "danerror.h"
 #include <math.h>

 #define HOTELLING 1  // If true use Hotelling's test to decide where to split.
 #define FTABLE_X 10  // Size of FTable.
 #define FTABLE_Y 100  // Size of FTable.

 // Table of values approximating the cumulative F-distribution for a confidence of 1%.
 const double FTable[FTABLE_Y][FTABLE_X] = {
  {4052.19, 4999.52, 5403.34, 5624.62, 5763.65, 5858.97, 5928.33, 5981.10, 6022.50, 6055.85,},
   {98.502,  99.000,  99.166,  99.249,  99.300,  99.333,  99.356,  99.374,  99.388,  99.399,},
   {34.116,  30.816,  29.457,  28.710,  28.237,  27.911,  27.672,  27.489,  27.345,  27.229,},
   {21.198,  18.000,  16.694,  15.977,  15.522,  15.207,  14.976,  14.799,  14.659,  14.546,},
   {16.258,  13.274,  12.060,  11.392,  10.967,  10.672,  10.456,  10.289,  10.158,  10.051,},
   {13.745,  10.925,   9.780,   9.148,   8.746,   8.466,   8.260,   8.102,   7.976,   7.874,},
   {12.246,   9.547,   8.451,   7.847,   7.460,   7.191,   6.993,   6.840,   6.719,   6.620,},
   {11.259,   8.649,   7.591,   7.006,   6.632,   6.371,   6.178,   6.029,   5.911,   5.814,},
   {10.561,   8.022,   6.992,   6.422,   6.057,   5.802,   5.613,   5.467,   5.351,   5.257,},
   {10.044,   7.559,   6.552,   5.994,   5.636,   5.386,   5.200,   5.057,   4.942,   4.849,},
   { 9.646,   7.206,   6.217,   5.668,   5.316,   5.069,   4.886,   4.744,   4.632,   4.539,},
   { 9.330,   6.927,   5.953,   5.412,   5.064,   4.821,   4.640,   4.499,   4.388,   4.296,},
   { 9.074,   6.701,   5.739,   5.205,   4.862,   4.620,   4.441,   4.302,   4.191,   4.100,},
   { 8.862,   6.515,   5.564,   5.035,   4.695,   4.456,   4.278,   4.140,   4.030,   3.939,},
   { 8.683,   6.359,   5.417,   4.893,   4.556,   4.318,   4.142,   4.004,   3.895,   3.805,},
   { 8.531,   6.226,   5.292,   4.773,   4.437,   4.202,   4.026,   3.890,   3.780,   3.691,},
   { 8.400,   6.112,   5.185,   4.669,   4.336,   4.102,   3.927,   3.791,   3.682,   3.593,},
   { 8.285,   6.013,   5.092,   4.579,   4.248,   4.015,   3.841,   3.705,   3.597,   3.508,},
   { 8.185,   5.926,   5.010,   4.500,   4.171,   3.939,   3.765,   3.631,   3.523,   3.434,},
   { 8.096,   5.849,   4.938,   4.431,   4.103,   3.871,   3.699,   3.564,   3.457,   3.368,},
   { 8.017,   5.780,   4.874,   4.369,   4.042,   3.812,   3.640,   3.506,   3.398,   3.310,},
   { 7.945,   5.719,   4.817,   4.313,   3.988,   3.758,   3.587,   3.453,   3.346,   3.258,},
   { 7.881,   5.664,   4.765,   4.264,   3.939,   3.710,   3.539,   3.406,   3.299,   3.211,},
   { 7.823,   5.614,   4.718,   4.218,   3.895,   3.667,   3.496,   3.363,   3.256,   3.168,},
   { 7.770,   5.568,   4.675,   4.177,   3.855,   3.627,   3.457,   3.324,   3.217,   3.129,},
   { 7.721,   5.526,   4.637,   4.140,   3.818,   3.591,   3.421,   3.288,   3.182,   3.094,},
   { 7.677,   5.488,   4.601,   4.106,   3.785,   3.558,   3.388,   3.256,   3.149,   3.062,},
   { 7.636,   5.453,   4.568,   4.074,   3.754,   3.528,   3.358,   3.226,   3.120,   3.032,},
   { 7.598,   5.420,   4.538,   4.045,   3.725,   3.499,   3.330,   3.198,   3.092,   3.005,},
   { 7.562,   5.390,   4.510,   4.018,   3.699,   3.473,   3.305,   3.173,   3.067,   2.979,},
   { 7.530,   5.362,   4.484,   3.993,   3.675,   3.449,   3.281,   3.149,   3.043,   2.955,},
   { 7.499,   5.336,   4.459,   3.969,   3.652,   3.427,   3.258,   3.127,   3.021,   2.934,},
   { 7.471,   5.312,   4.437,   3.948,   3.630,   3.406,   3.238,   3.106,   3.000,   2.913,},
   { 7.444,   5.289,   4.416,   3.927,   3.611,   3.386,   3.218,   3.087,   2.981,   2.894,},
   { 7.419,   5.268,   4.396,   3.908,   3.592,   3.368,   3.200,   3.069,   2.963,   2.876,},
   { 7.396,   5.248,   4.377,   3.890,   3.574,   3.351,   3.183,   3.052,   2.946,   2.859,},
   { 7.373,   5.229,   4.360,   3.873,   3.558,   3.334,   3.167,   3.036,   2.930,   2.843,},
   { 7.353,   5.211,   4.343,   3.858,   3.542,   3.319,   3.152,   3.021,   2.915,   2.828,},
   { 7.333,   5.194,   4.327,   3.843,   3.528,   3.305,   3.137,   3.006,   2.901,   2.814,},
   { 7.314,   5.179,   4.313,   3.828,   3.514,   3.291,   3.124,   2.993,   2.888,   2.801,},
   { 7.296,   5.163,   4.299,   3.815,   3.501,   3.278,   3.111,   2.980,   2.875,   2.788,},
   { 7.280,   5.149,   4.285,   3.802,   3.488,   3.266,   3.099,   2.968,   2.863,   2.776,},
   { 7.264,   5.136,   4.273,   3.790,   3.476,   3.254,   3.087,   2.957,   2.851,   2.764,},
   { 7.248,   5.123,   4.261,   3.778,   3.465,   3.243,   3.076,   2.946,   2.840,   2.754,},
   { 7.234,   5.110,   4.249,   3.767,   3.454,   3.232,   3.066,   2.935,   2.830,   2.743,},
   { 7.220,   5.099,   4.238,   3.757,   3.444,   3.222,   3.056,   2.925,   2.820,   2.733,},
   { 7.207,   5.087,   4.228,   3.747,   3.434,   3.213,   3.046,   2.916,   2.811,   2.724,},
   { 7.194,   5.077,   4.218,   3.737,   3.425,   3.204,   3.037,   2.907,   2.802,   2.715,},
   { 7.182,   5.066,   4.208,   3.728,   3.416,   3.195,   3.028,   2.898,   2.793,   2.706,},
   { 7.171,   5.057,   4.199,   3.720,   3.408,   3.186,   3.020,   2.890,   2.785,   2.698,},
   { 7.159,   5.047,   4.191,   3.711,   3.400,   3.178,   3.012,   2.882,   2.777,   2.690,},
   { 7.149,   5.038,   4.182,   3.703,   3.392,   3.171,   3.005,   2.874,   2.769,   2.683,},
   { 7.139,   5.030,   4.174,   3.695,   3.384,   3.163,   2.997,   2.867,   2.762,   2.675,},
   { 7.129,   5.021,   4.167,   3.688,   3.377,   3.156,   2.990,   2.860,   2.755,   2.668,},
   { 7.119,   5.013,   4.159,   3.681,   3.370,   3.149,   2.983,   2.853,   2.748,   2.662,},
   { 7.110,   5.006,   4.152,   3.674,   3.363,   3.143,   2.977,   2.847,   2.742,   2.655,},
   { 7.102,   4.998,   4.145,   3.667,   3.357,   3.136,   2.971,   2.841,   2.736,   2.649,},
   { 7.093,   4.991,   4.138,   3.661,   3.351,   3.130,   2.965,   2.835,   2.730,   2.643,},
   { 7.085,   4.984,   4.132,   3.655,   3.345,   3.124,   2.959,   2.829,   2.724,   2.637,},
   { 7.077,   4.977,   4.126,   3.649,   3.339,   3.119,   2.953,   2.823,   2.718,   2.632,},
   { 7.070,   4.971,   4.120,   3.643,   3.333,   3.113,   2.948,   2.818,   2.713,   2.626,},
   { 7.062,   4.965,   4.114,   3.638,   3.328,   3.108,   2.942,   2.813,   2.708,   2.621,},
   { 7.055,   4.959,   4.109,   3.632,   3.323,   3.103,   2.937,   2.808,   2.703,   2.616,},
   { 7.048,   4.953,   4.103,   3.627,   3.318,   3.098,   2.932,   2.803,   2.698,   2.611,},
   { 7.042,   4.947,   4.098,   3.622,   3.313,   3.093,   2.928,   2.798,   2.693,   2.607,},
   { 7.035,   4.942,   4.093,   3.618,   3.308,   3.088,   2.923,   2.793,   2.689,   2.602,},
   { 7.029,   4.937,   4.088,   3.613,   3.304,   3.084,   2.919,   2.789,   2.684,   2.598,},
   { 7.023,   4.932,   4.083,   3.608,   3.299,   3.080,   2.914,   2.785,   2.680,   2.593,},
   { 7.017,   4.927,   4.079,   3.604,   3.295,   3.075,   2.910,   2.781,   2.676,   2.589,},
   { 7.011,   4.922,   4.074,   3.600,   3.291,   3.071,   2.906,   2.777,   2.672,   2.585,},
   { 7.006,   4.917,   4.070,   3.596,   3.287,   3.067,   2.902,   2.773,   2.668,   2.581,},
   { 7.001,   4.913,   4.066,   3.591,   3.283,   3.063,   2.898,   2.769,   2.664,   2.578,},
   { 6.995,   4.908,   4.062,   3.588,   3.279,   3.060,   2.895,   2.765,   2.660,   2.574,},
   { 6.990,   4.904,   4.058,   3.584,   3.275,   3.056,   2.891,   2.762,   2.657,   2.570,},
   { 6.985,   4.900,   4.054,   3.580,   3.272,   3.052,   2.887,   2.758,   2.653,   2.567,},
   { 6.981,   4.896,   4.050,   3.577,   3.268,   3.049,   2.884,   2.755,   2.650,   2.563,},
   { 6.976,   4.892,   4.047,   3.573,   3.265,   3.046,   2.881,   2.751,   2.647,   2.560,},
   { 6.971,   4.888,   4.043,   3.570,   3.261,   3.042,   2.877,   2.748,   2.644,   2.557,},
   { 6.967,   4.884,   4.040,   3.566,   3.258,   3.039,   2.874,   2.745,   2.640,   2.554,},
   { 6.963,   4.881,   4.036,   3.563,   3.255,   3.036,   2.871,   2.742,   2.637,   2.551,},
   { 6.958,   4.877,   4.033,   3.560,   3.252,   3.033,   2.868,   2.739,   2.634,   2.548,},
   { 6.954,   4.874,   4.030,   3.557,   3.249,   3.030,   2.865,   2.736,   2.632,   2.545,},
   { 6.950,   4.870,   4.027,   3.554,   3.246,   3.027,   2.863,   2.733,   2.629,   2.542,},
   { 6.947,   4.867,   4.024,   3.551,   3.243,   3.025,   2.860,   2.731,   2.626,   2.539,},
   { 6.943,   4.864,   4.021,   3.548,   3.240,   3.022,   2.857,   2.728,   2.623,   2.537,},
   { 6.939,   4.861,   4.018,   3.545,   3.238,   3.019,   2.854,   2.725,   2.621,   2.534,},
   { 6.935,   4.858,   4.015,   3.543,   3.235,   3.017,   2.852,   2.723,   2.618,   2.532,},
   { 6.932,   4.855,   4.012,   3.540,   3.233,   3.014,   2.849,   2.720,   2.616,   2.529,},
   { 6.928,   4.852,   4.010,   3.538,   3.230,   3.012,   2.847,   2.718,   2.613,   2.527,},
   { 6.925,   4.849,   4.007,   3.535,   3.228,   3.009,   2.845,   2.715,   2.611,   2.524,},
   { 6.922,   4.846,   4.004,   3.533,   3.225,   3.007,   2.842,   2.713,   2.609,   2.522,},
   { 6.919,   4.844,   4.002,   3.530,   3.223,   3.004,   2.840,   2.711,   2.606,   2.520,},
   { 6.915,   4.841,   3.999,   3.528,   3.221,   3.002,   2.838,   2.709,   2.604,   2.518,},
   { 6.912,   4.838,   3.997,   3.525,   3.218,   3.000,   2.835,   2.706,   2.602,   2.515,},
   { 6.909,   4.836,   3.995,   3.523,   3.216,   2.998,   2.833,   2.704,   2.600,   2.513,},
   { 6.906,   4.833,   3.992,   3.521,   3.214,   2.996,   2.831,   2.702,   2.598,   2.511,},
   { 6.904,   4.831,   3.990,   3.519,   3.212,   2.994,   2.829,   2.700,   2.596,   2.509,},
   { 6.901,   4.829,   3.988,   3.517,   3.210,   2.992,   2.827,   2.698,   2.594,   2.507,},
   { 6.898,   4.826,   3.986,   3.515,   3.208,   2.990,   2.825,   2.696,   2.592,   2.505,},
   { 6.895,   4.824,   3.984,   3.513,   3.206,   2.988,   2.823,   2.694,   2.590,   2.503}
 };

 #define MINVARIANCE     0.0004

 #define MINSAMPLESPERBUCKET 5
 #define MINSAMPLES    (MINBUCKETS * MINSAMPLESPERBUCKET)
 #define MINSAMPLESNEEDED  1

 #define BUCKETTABLESIZE   1024
 #define NORMALEXTENT    3.0

 struct TEMPCLUSTER {
   CLUSTER *Cluster;
   CLUSTER *Neighbor;
 };

 typedef tesseract::KDPairInc<float, TEMPCLUSTER*> ClusterPair;
 typedef tesseract::GenericHeap<ClusterPair> ClusterHeap;

 struct STATISTICS {
   FLOAT32 AvgVariance;
   FLOAT32 *CoVariance;
   FLOAT32 *Min;                  // largest negative distance from the mean
   FLOAT32 *Max;                  // largest positive distance from the mean
 };

 struct BUCKETS {
   DISTRIBUTION Distribution;     // distribution being tested for
   uinT32 SampleCount;            // # of samples in histogram
   FLOAT64 Confidence;            // confidence level of test
   FLOAT64 ChiSquared;            // test threshold
   uinT16 NumberOfBuckets;        // number of cells in histogram
   uinT16 Bucket[BUCKETTABLESIZE];// mapping to histogram buckets
   uinT32 *Count;                 // frequency of occurrence histogram
   FLOAT32 *ExpectedCount;        // expected histogram
 };

 struct CHISTRUCT{
   uinT16 DegreesOfFreedom;
   FLOAT64 Alpha;
   FLOAT64 ChiSquared;
 };

 // For use with KDWalk / MakePotentialClusters
 struct ClusteringContext {
   ClusterHeap *heap;  // heap used to hold temp clusters, "best" on top
   TEMPCLUSTER *candidates;  // array of potential clusters
   KDTREE *tree;  // kd-tree to be searched for neighbors
   inT32 next;  // next candidate to be used
 };

 typedef FLOAT64 (*DENSITYFUNC) (inT32);
 typedef FLOAT64 (*SOLVEFUNC) (CHISTRUCT *, double);

 #define Odd(N) ((N)%2)
 #define Mirror(N,R) ((R) - (N) - 1)
 #define Abs(N) ( ( (N) < 0 ) ? ( -(N) ) : (N) )

 //--------------Global Data Definitions and Declarations----------------------
 #define SqrtOf2Pi     2.506628275
 static const FLOAT64 kNormalStdDev = BUCKETTABLESIZE / (2.0 * NORMALEXTENT);
 static const FLOAT64 kNormalVariance =
     (BUCKETTABLESIZE * BUCKETTABLESIZE) / (4.0 * NORMALEXTENT * NORMALEXTENT);
 static const FLOAT64 kNormalMagnitude =
     (2.0 * NORMALEXTENT) / (SqrtOf2Pi * BUCKETTABLESIZE);
 static const FLOAT64 kNormalMean = BUCKETTABLESIZE / 2;

 #define LOOKUPTABLESIZE   8
 #define MAXDEGREESOFFREEDOM MAXBUCKETS

 static const uinT32 kCountTable[LOOKUPTABLESIZE] = {
   MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000
 };  // number of samples

 static const uinT16 kBucketsTable[LOOKUPTABLESIZE] = {
   MINBUCKETS, 16, 20, 24, 27, 30, 35, MAXBUCKETS
 };  // number of buckets

 /*-------------------------------------------------------------------------
           Private Function Prototypes
 --------------------------------------------------------------------------*/
 void CreateClusterTree(CLUSTERER *Clusterer);

 void MakePotentialClusters(ClusteringContext *context, CLUSTER *Cluster,
                            inT32 Level);

 CLUSTER *FindNearestNeighbor(KDTREE *Tree,
                              CLUSTER *Cluster,
                              FLOAT32 *Distance);

 CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster);

 inT32 MergeClusters (inT16 N,
 register PARAM_DESC ParamDesc[],
 register inT32 n1,
 register inT32 n2,
 register FLOAT32 m[],
 register FLOAT32 m1[], register FLOAT32 m2[]);

 void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config);

 PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,
                          CLUSTERCONFIG *Config,
                          CLUSTER *Cluster);

 PROTOTYPE *MakeDegenerateProto(uinT16 N,
                                CLUSTER *Cluster,
                                STATISTICS *Statistics,
                                PROTOSTYLE Style,
                                inT32 MinSamples);

 PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
                                CLUSTERCONFIG *Config,
                                CLUSTER *Cluster,
                                STATISTICS *Statistics);

 PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,
                               CLUSTER *Cluster,
                               STATISTICS *Statistics,
                               BUCKETS *Buckets);

 PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,
                                CLUSTER *Cluster,
                                STATISTICS *Statistics,
                                BUCKETS *Buckets);

 PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,
                           CLUSTER *Cluster,
                           STATISTICS *Statistics,
                           BUCKETS *NormalBuckets,
                           FLOAT64 Confidence);

 void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc);

 void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics);

 STATISTICS *ComputeStatistics (inT16 N,
 PARAM_DESC ParamDesc[], CLUSTER * Cluster);

 PROTOTYPE *NewSphericalProto(uinT16 N,
                              CLUSTER *Cluster,
                              STATISTICS *Statistics);

 PROTOTYPE *NewEllipticalProto(inT16 N,
                               CLUSTER *Cluster,
                               STATISTICS *Statistics);

 PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics);

 PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster);

 BOOL8 Independent (PARAM_DESC ParamDesc[],
 inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence);

 BUCKETS *GetBuckets(CLUSTERER* clusterer,
                     DISTRIBUTION Distribution,
                     uinT32 SampleCount,
                     FLOAT64 Confidence);

 BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
                      uinT32 SampleCount,
                      FLOAT64 Confidence);

 uinT16 OptimumNumberOfBuckets(uinT32 SampleCount);

 FLOAT64 ComputeChiSquared(uinT16 DegreesOfFreedom, FLOAT64 Alpha);

 FLOAT64 NormalDensity(inT32 x);

 FLOAT64 UniformDensity(inT32 x);

 FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx);

 void FillBuckets(BUCKETS *Buckets,
                  CLUSTER *Cluster,
                  uinT16 Dim,
                  PARAM_DESC *ParamDesc,
                  FLOAT32 Mean,
                  FLOAT32 StdDev);

 uinT16 NormalBucket(PARAM_DESC *ParamDesc,
                     FLOAT32 x,
                     FLOAT32 Mean,
                     FLOAT32 StdDev);

 uinT16 UniformBucket(PARAM_DESC *ParamDesc,
                      FLOAT32 x,
                      FLOAT32 Mean,
                      FLOAT32 StdDev);

 BOOL8 DistributionOK(BUCKETS *Buckets);

 void FreeStatistics(STATISTICS *Statistics);

 void FreeBuckets(BUCKETS *Buckets);

 void FreeCluster(CLUSTER *Cluster);

 uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets);

 int NumBucketsMatch(void *arg1,   // BUCKETS *Histogram,
                     void *arg2);  // uinT16 *DesiredNumberOfBuckets);

 int ListEntryMatch(void *arg1, void *arg2);

 void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount);

 void InitBuckets(BUCKETS *Buckets);

 int AlphaMatch(void *arg1,   // CHISTRUCT *ChiStruct,
                void *arg2);  // CHISTRUCT *SearchKey);

 CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha);

 FLOAT64 Solve(SOLVEFUNC Function,
               void *FunctionParams,
               FLOAT64 InitialGuess,
               FLOAT64 Accuracy);

 FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x);

 BOOL8 MultipleCharSamples(CLUSTERER *Clusterer,
                           CLUSTER *Cluster,
                           FLOAT32 MaxIllegal);

 double InvertMatrix(const float* input, int size, float* inv);

 //--------------------------Public Code--------------------------------------
 CLUSTERER *
 MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) {
   CLUSTERER *Clusterer;
   int i;

   // allocate main clusterer data structure and init simple fields
   Clusterer = (CLUSTERER *) Emalloc (sizeof (CLUSTERER));
   Clusterer->SampleSize = SampleSize;
   Clusterer->NumberOfSamples = 0;
   Clusterer->NumChar = 0;

   // init fields which will not be used initially
   Clusterer->Root = NULL;
   Clusterer->ProtoList = NIL_LIST;

   // maintain a copy of param descriptors in the clusterer data structure
   Clusterer->ParamDesc =
     (PARAM_DESC *) Emalloc (SampleSize * sizeof (PARAM_DESC));
   for (i = 0; i < SampleSize; i++) {
     Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular;
     Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential;
     Clusterer->ParamDesc[i].Min = ParamDesc[i].Min;
     Clusterer->ParamDesc[i].Max = ParamDesc[i].Max;
     Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
     Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2;
     Clusterer->ParamDesc[i].MidRange =
       (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
   }

   // allocate a kd tree to hold the samples
   Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc);

   // Initialize cache of histogram buckets to minimize recomputing them.
   for (int d = 0; d < DISTRIBUTION_COUNT; ++d) {
     for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c)
       Clusterer->bucket_cache[d][c] = NULL;
   }

   return Clusterer;
 }                                // MakeClusterer

 SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature,
                    inT32 CharID) {
   SAMPLE *Sample;
   int i;

   // see if the samples have already been clustered - if so trap an error
   if (Clusterer->Root != NULL)
     DoError (ALREADYCLUSTERED,
       "Can't add samples after they have been clustered");

   // allocate the new sample and initialize it
   Sample = (SAMPLE *) Emalloc (sizeof (SAMPLE) +
     (Clusterer->SampleSize -
     1) * sizeof (FLOAT32));
   Sample->Clustered = FALSE;
   Sample->Prototype = FALSE;
   Sample->SampleCount = 1;
   Sample->Left = NULL;
   Sample->Right = NULL;
   Sample->CharID = CharID;

   for (i = 0; i < Clusterer->SampleSize; i++)
     Sample->Mean[i] = Feature[i];

   // add the sample to the KD tree - keep track of the total # of samples
   Clusterer->NumberOfSamples++;
   KDStore (Clusterer->KDTree, Sample->Mean, (char *) Sample);
   if (CharID >= Clusterer->NumChar)
     Clusterer->NumChar = CharID + 1;

   // execute hook for monitoring clustering operation
   // (*SampleCreationHook)( Sample );

   return (Sample);
 }                                // MakeSample

 LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
   //only create cluster tree if samples have never been clustered before
   if (Clusterer->Root == NULL)
     CreateClusterTree(Clusterer);

   //deallocate the old prototype list if one exists
   FreeProtoList (&Clusterer->ProtoList);
   Clusterer->ProtoList = NIL_LIST;

   //compute prototypes starting at the root node in the tree
   ComputePrototypes(Clusterer, Config);
   // We don't need the cluster pointers in the protos any more, so null them
   // out, which makes it safe to delete the clusterer.
   LIST proto_list = Clusterer->ProtoList;
   iterate(proto_list) {
     PROTOTYPE *proto = reinterpret_cast<PROTOTYPE *>(first_node(proto_list));
     proto->Cluster = NULL;
   }
   return Clusterer->ProtoList;
 }                                // ClusterSamples

 void FreeClusterer(CLUSTERER *Clusterer) {
   if (Clusterer != NULL) {
     free(Clusterer->ParamDesc);
     if (Clusterer->KDTree != NULL)
       FreeKDTree (Clusterer->KDTree);
     if (Clusterer->Root != NULL)
       FreeCluster (Clusterer->Root);
     // Free up all used buckets structures.
     for (int d = 0; d < DISTRIBUTION_COUNT; ++d) {
       for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c)
         if (Clusterer->bucket_cache[d][c] != NULL)
           FreeBuckets(Clusterer->bucket_cache[d][c]);
     }

     free(Clusterer);
   }
 }                                // FreeClusterer

 void FreeProtoList(LIST *ProtoList) {
   destroy_nodes(*ProtoList, FreePrototype);
 }                                // FreeProtoList

 void FreePrototype(void *arg) {  //PROTOTYPE     *Prototype)
   PROTOTYPE *Prototype = (PROTOTYPE *) arg;

   // unmark the corresponding cluster (if there is one
   if (Prototype->Cluster != NULL)
     Prototype->Cluster->Prototype = FALSE;

   // deallocate the prototype statistics and then the prototype itself
   free (Prototype->Distrib);
   free (Prototype->Mean);
   if (Prototype->Style != spherical) {
     free (Prototype->Variance.Elliptical);
     free (Prototype->Magnitude.Elliptical);
     free (Prototype->Weight.Elliptical);
   }
   free(Prototype);
 }                                // FreePrototype

 CLUSTER *NextSample(LIST *SearchState) {
   CLUSTER *Cluster;

   if (*SearchState == NIL_LIST)
     return (NULL);
   Cluster = (CLUSTER *) first_node (*SearchState);
   *SearchState = pop (*SearchState);
   while (TRUE) {
     if (Cluster->Left == NULL)
       return (Cluster);
     *SearchState = push (*SearchState, Cluster->Right);
     Cluster = Cluster->Left;
   }
 }                                // NextSample

 FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension) {
   return (Proto->Mean[Dimension]);
 }                                // Mean

 FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension) {
   switch (Proto->Style) {
     case spherical:
       return ((FLOAT32) sqrt ((double) Proto->Variance.Spherical));
     case elliptical:
       return ((FLOAT32)
         sqrt ((double) Proto->Variance.Elliptical[Dimension]));
     case mixed:
       switch (Proto->Distrib[Dimension]) {
         case normal:
           return ((FLOAT32)
             sqrt ((double) Proto->Variance.Elliptical[Dimension]));
         case uniform:
         case D_random:
           return (Proto->Variance.Elliptical[Dimension]);
         case DISTRIBUTION_COUNT:
           ASSERT_HOST(!"Distribution count not allowed!");
       }
   }
   return 0.0f;
 }                                // StandardDeviation


 /*---------------------------------------------------------------------------
             Private Code
 ----------------------------------------------------------------------------*/
 void CreateClusterTree(CLUSTERER *Clusterer) {
   ClusteringContext context;
   ClusterPair HeapEntry;
   TEMPCLUSTER *PotentialCluster;

   // each sample and its nearest neighbor form a "potential" cluster
   // save these in a heap with the "best" potential clusters on top
   context.tree = Clusterer->KDTree;
   context.candidates = (TEMPCLUSTER *)
     Emalloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER));
   context.next = 0;
   context.heap = new ClusterHeap(Clusterer->NumberOfSamples);
   KDWalk(context.tree, (void_proc)MakePotentialClusters, &context);

   // form potential clusters into actual clusters - always do "best" first
   while (context.heap->Pop(&HeapEntry)) {
     PotentialCluster = HeapEntry.data;

     // if main cluster of potential cluster is already in another cluster
     // then we don't need to worry about it
     if (PotentialCluster->Cluster->Clustered) {
       continue;
     }

     // if main cluster is not yet clustered, but its nearest neighbor is
     // then we must find a new nearest neighbor
     else if (PotentialCluster->Neighbor->Clustered) {
       PotentialCluster->Neighbor =
         FindNearestNeighbor(context.tree, PotentialCluster->Cluster,
                             &HeapEntry.key);
       if (PotentialCluster->Neighbor != NULL) {
         context.heap->Push(&HeapEntry);
       }
     }

     // if neither cluster is already clustered, form permanent cluster
     else {
       PotentialCluster->Cluster =
           MakeNewCluster(Clusterer, PotentialCluster);
       PotentialCluster->Neighbor =
           FindNearestNeighbor(context.tree, PotentialCluster->Cluster,
                               &HeapEntry.key);
       if (PotentialCluster->Neighbor != NULL) {
         context.heap->Push(&HeapEntry);
       }
     }
   }

   // the root node in the cluster tree is now the only node in the kd-tree
   Clusterer->Root = (CLUSTER *) RootOf(Clusterer->KDTree);

   // free up the memory used by the K-D tree, heap, and temp clusters
   FreeKDTree(context.tree);
   Clusterer->KDTree = NULL;
   delete context.heap;
   free(context.candidates);
 }                                // CreateClusterTree

 void MakePotentialClusters(ClusteringContext *context,
                            CLUSTER *Cluster, inT32 Level) {
   ClusterPair HeapEntry;
   int next = context->next;
   context->candidates[next].Cluster = Cluster;
   HeapEntry.data = &(context->candidates[next]);
   context->candidates[next].Neighbor =
       FindNearestNeighbor(context->tree,
                           context->candidates[next].Cluster,
                           &HeapEntry.key);
   if (context->candidates[next].Neighbor != NULL) {
     context->heap->Push(&HeapEntry);
     context->next++;
   }
 }                                // MakePotentialClusters

 CLUSTER *
 FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance)
 #define MAXNEIGHBORS  2
 #define MAXDISTANCE   MAX_FLOAT32
 {
   CLUSTER *Neighbor[MAXNEIGHBORS];
   FLOAT32 Dist[MAXNEIGHBORS];
   int NumberOfNeighbors;
   inT32 i;
   CLUSTER *BestNeighbor;

   // find the 2 nearest neighbors of the cluster
   KDNearestNeighborSearch(Tree, Cluster->Mean, MAXNEIGHBORS, MAXDISTANCE,
                           &NumberOfNeighbors, (void **)Neighbor, Dist);

   // search for the nearest neighbor that is not the cluster itself
   *Distance = MAXDISTANCE;
   BestNeighbor = NULL;
   for (i = 0; i < NumberOfNeighbors; i++) {
     if ((Dist[i] < *Distance) && (Neighbor[i] != Cluster)) {
       *Distance = Dist[i];
       BestNeighbor = Neighbor[i];
     }
   }
   return BestNeighbor;
 }                                // FindNearestNeighbor

 CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) {
   CLUSTER *Cluster;

   // allocate the new cluster and initialize it
   Cluster = (CLUSTER *) Emalloc(
       sizeof(CLUSTER) + (Clusterer->SampleSize - 1) * sizeof(FLOAT32));
   Cluster->Clustered = FALSE;
   Cluster->Prototype = FALSE;
   Cluster->Left = TempCluster->Cluster;
   Cluster->Right = TempCluster->Neighbor;
   Cluster->CharID = -1;

   // mark the old clusters as "clustered" and delete them from the kd-tree
   Cluster->Left->Clustered = TRUE;
   Cluster->Right->Clustered = TRUE;
   KDDelete(Clusterer->KDTree, Cluster->Left->Mean, Cluster->Left);
   KDDelete(Clusterer->KDTree, Cluster->Right->Mean, Cluster->Right);

   // compute the mean and sample count for the new cluster
   Cluster->SampleCount =
       MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc,
                     Cluster->Left->SampleCount, Cluster->Right->SampleCount,
                     Cluster->Mean, Cluster->Left->Mean, Cluster->Right->Mean);

   // add the new cluster to the KD tree
   KDStore(Clusterer->KDTree, Cluster->Mean, Cluster);
   return Cluster;
 }                                // MakeNewCluster

 inT32 MergeClusters(inT16 N,
                     PARAM_DESC ParamDesc[],
                     inT32 n1,
                     inT32 n2,
                     FLOAT32 m[],
                     FLOAT32 m1[], FLOAT32 m2[]) {
   inT32 i, n;

   n = n1 + n2;
   for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) {
     if (ParamDesc->Circular) {
       // if distance between means is greater than allowed
       // reduce upper point by one "rotation" to compute mean
       // then normalize the mean back into the accepted range
       if ((*m2 - *m1) > ParamDesc->HalfRange) {
         *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n;
         if (*m < ParamDesc->Min)
           *m += ParamDesc->Range;
       }
       else if ((*m1 - *m2) > ParamDesc->HalfRange) {
         *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n;
         if (*m < ParamDesc->Min)
           *m += ParamDesc->Range;
       }
       else
         *m = (n1 * *m1 + n2 * *m2) / n;
     }
     else
       *m = (n1 * *m1 + n2 * *m2) / n;
   }
   return n;
 }                                // MergeClusters

 void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
   LIST ClusterStack = NIL_LIST;
   CLUSTER *Cluster;
   PROTOTYPE *Prototype;

   // use a stack to keep track of clusters waiting to be processed
   // initially the only cluster on the stack is the root cluster
   if (Clusterer->Root != NULL)
     ClusterStack = push (NIL_LIST, Clusterer->Root);

   // loop until we have analyzed all clusters which are potential prototypes
   while (ClusterStack != NIL_LIST) {
     // remove the next cluster to be analyzed from the stack
     // try to make a prototype from the cluster
     // if successful, put it on the proto list, else split the cluster
     Cluster = (CLUSTER *) first_node (ClusterStack);
     ClusterStack = pop (ClusterStack);
     Prototype = MakePrototype(Clusterer, Config, Cluster);
     if (Prototype != NULL) {
       Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype);
     }
     else {
       ClusterStack = push (ClusterStack, Cluster->Right);
       ClusterStack = push (ClusterStack, Cluster->Left);
     }
   }
 }                                // ComputePrototypes

 PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,
                          CLUSTERCONFIG *Config,
                          CLUSTER *Cluster) {
   STATISTICS *Statistics;
   PROTOTYPE *Proto;
   BUCKETS *Buckets;

   // filter out clusters which contain samples from the same character
   if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal))
     return NULL;

   // compute the covariance matrix and ranges for the cluster
   Statistics =
       ComputeStatistics(Clusterer->SampleSize, Clusterer->ParamDesc, Cluster);

   // check for degenerate clusters which need not be analyzed further
   // note that the MinSamples test assumes that all clusters with multiple
   // character samples have been removed (as above)
   Proto = MakeDegenerateProto(
       Clusterer->SampleSize, Cluster, Statistics, Config->ProtoStyle,
       (inT32) (Config->MinSamples * Clusterer->NumChar));
   if (Proto != NULL) {
     FreeStatistics(Statistics);
     return Proto;
   }
   // check to ensure that all dimensions are independent
   if (!Independent(Clusterer->ParamDesc, Clusterer->SampleSize,
                    Statistics->CoVariance, Config->Independence)) {
     FreeStatistics(Statistics);
     return NULL;
   }

   if (HOTELLING && Config->ProtoStyle == elliptical) {
     Proto = TestEllipticalProto(Clusterer, Config, Cluster, Statistics);
     if (Proto != NULL) {
       FreeStatistics(Statistics);
       return Proto;
     }
   }

   // create a histogram data structure used to evaluate distributions
   Buckets = GetBuckets(Clusterer, normal, Cluster->SampleCount,
                        Config->Confidence);

   // create a prototype based on the statistics and test it
   switch (Config->ProtoStyle) {
     case spherical:
       Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets);
       break;
     case elliptical:
       Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets);
       break;
     case mixed:
       Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets,
                              Config->Confidence);
       break;
     case automatic:
       Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets);
       if (Proto != NULL)
         break;
       Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets);
       if (Proto != NULL)
         break;
       Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets,
                              Config->Confidence);
       break;
   }
   FreeStatistics(Statistics);
   return Proto;
 }                                // MakePrototype

 PROTOTYPE *MakeDegenerateProto(  //this was MinSample
                                uinT16 N,
                                CLUSTER *Cluster,
                                STATISTICS *Statistics,
                                PROTOSTYLE Style,
                                inT32 MinSamples) {
   PROTOTYPE *Proto = NULL;

   if (MinSamples < MINSAMPLESNEEDED)
     MinSamples = MINSAMPLESNEEDED;

   if (Cluster->SampleCount < MinSamples) {
     switch (Style) {
       case spherical:
         Proto = NewSphericalProto (N, Cluster, Statistics);
         break;
       case elliptical:
       case automatic:
         Proto = NewEllipticalProto (N, Cluster, Statistics);
         break;
       case mixed:
         Proto = NewMixedProto (N, Cluster, Statistics);
         break;
     }
     Proto->Significant = FALSE;
   }
   return (Proto);
 }                                // MakeDegenerateProto

 PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
                                CLUSTERCONFIG *Config,
                                CLUSTER *Cluster,
                                STATISTICS *Statistics) {
   // Fraction of the number of samples used as a range around 1 within
   // which a cluster has the magic size that allows a boost to the
   // FTable by kFTableBoostMargin, thus allowing clusters near the
   // magic size (equal to the number of sample characters) to be more
   // likely to stay together.
   const double kMagicSampleMargin = 0.0625;
   const double kFTableBoostMargin = 2.0;

   int N = Clusterer->SampleSize;
   CLUSTER* Left = Cluster->Left;
   CLUSTER* Right = Cluster->Right;
   if (Left == NULL || Right == NULL)
     return NULL;
   int TotalDims = Left->SampleCount + Right->SampleCount;
   if (TotalDims < N + 1 || TotalDims < 2)
     return NULL;
   const int kMatrixSize = N * N * sizeof(FLOAT32);
   FLOAT32* Covariance = static_cast<FLOAT32 *>(Emalloc(kMatrixSize));
   FLOAT32* Inverse = static_cast<FLOAT32 *>(Emalloc(kMatrixSize));
   FLOAT32* Delta = static_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
   // Compute a new covariance matrix that only uses essential features.
   for (int i = 0; i < N; ++i) {
     int row_offset = i * N;
     if (!Clusterer->ParamDesc[i].NonEssential) {
       for (int j = 0; j < N; ++j) {
         if (!Clusterer->ParamDesc[j].NonEssential)
           Covariance[j + row_offset] = Statistics->CoVariance[j + row_offset];
         else
           Covariance[j + row_offset] = 0.0f;
       }
     } else {
       for (int j = 0; j < N; ++j) {
         if (i == j)
           Covariance[j + row_offset] = 1.0f;
         else
           Covariance[j + row_offset] = 0.0f;
       }
     }
   }
   double err = InvertMatrix(Covariance, N, Inverse);
   if (err > 1) {
     tprintf("Clustering error: Matrix inverse failed with error %g\n", err);
   }
   int EssentialN = 0;
   for (int dim = 0; dim < N; ++dim) {
     if (!Clusterer->ParamDesc[dim].NonEssential) {
       Delta[dim] = Left->Mean[dim] - Right->Mean[dim];
       ++EssentialN;
     } else {
       Delta[dim] = 0.0f;
     }
   }
   // Compute Hotelling's T-squared.
   double Tsq = 0.0;
   for (int x = 0; x < N; ++x) {
     double temp = 0.0;
     for (int y = 0; y < N; ++y) {
       temp += Inverse[y + N*x] * Delta[y];
     }
     Tsq += Delta[x] * temp;
   }
   free(Covariance);
   free(Inverse);
   free(Delta);
   // Changed this function to match the formula in
   // Statistical Methods in Medical Research p 473
   // By Peter Armitage, Geoffrey Berry, J. N. S. Matthews.
   // Tsq *= Left->SampleCount * Right->SampleCount / TotalDims;
   double F = Tsq * (TotalDims - EssentialN - 1) / ((TotalDims - 2)*EssentialN);
   int Fx = EssentialN;
   if (Fx > FTABLE_X)
     Fx = FTABLE_X;
   --Fx;
   int Fy = TotalDims - EssentialN - 1;
   if (Fy > FTABLE_Y)
     Fy = FTABLE_Y;
   --Fy;
   double FTarget = FTable[Fy][Fx];
   if (Config->MagicSamples > 0 &&
       TotalDims >= Config->MagicSamples * (1.0 - kMagicSampleMargin) &&
       TotalDims <= Config->MagicSamples * (1.0 + kMagicSampleMargin)) {
     // Give magic-sized clusters a magic FTable boost.
     FTarget += kFTableBoostMargin;
   }
   if (F < FTarget) {
     return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);
   }
   return NULL;
 }

 PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,
                               CLUSTER *Cluster,
                               STATISTICS *Statistics,
                               BUCKETS *Buckets) {
   PROTOTYPE *Proto = NULL;
   int i;

   // check that each dimension is a normal distribution
   for (i = 0; i < Clusterer->SampleSize; i++) {
     if (Clusterer->ParamDesc[i].NonEssential)
       continue;

     FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),
       Cluster->Mean[i],
       sqrt ((FLOAT64) (Statistics->AvgVariance)));
     if (!DistributionOK (Buckets))
       break;
   }
   // if all dimensions matched a normal distribution, make a proto
   if (i >= Clusterer->SampleSize)
     Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics);
   return (Proto);
 }                                // MakeSphericalProto

 PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,
                                CLUSTER *Cluster,
                                STATISTICS *Statistics,
                                BUCKETS *Buckets) {
   PROTOTYPE *Proto = NULL;
   int i;

   // check that each dimension is a normal distribution
   for (i = 0; i < Clusterer->SampleSize; i++) {
     if (Clusterer->ParamDesc[i].NonEssential)
       continue;

     FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),
       Cluster->Mean[i],
       sqrt ((FLOAT64) Statistics->
       CoVariance[i * (Clusterer->SampleSize + 1)]));
     if (!DistributionOK (Buckets))
       break;
   }
   // if all dimensions matched a normal distribution, make a proto
   if (i >= Clusterer->SampleSize)
     Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);
   return (Proto);
 }                                // MakeEllipticalProto

 PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,
                           CLUSTER *Cluster,
                           STATISTICS *Statistics,
                           BUCKETS *NormalBuckets,
                           FLOAT64 Confidence) {
   PROTOTYPE *Proto;
   int i;
   BUCKETS *UniformBuckets = NULL;
   BUCKETS *RandomBuckets = NULL;

   // create a mixed proto to work on - initially assume all dimensions normal*/
   Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics);

   // find the proper distribution for each dimension
   for (i = 0; i < Clusterer->SampleSize; i++) {
     if (Clusterer->ParamDesc[i].NonEssential)
       continue;

     FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),
       Proto->Mean[i],
       sqrt ((FLOAT64) Proto->Variance.Elliptical[i]));
     if (DistributionOK (NormalBuckets))
       continue;

     if (RandomBuckets == NULL)
       RandomBuckets =
         GetBuckets(Clusterer, D_random, Cluster->SampleCount, Confidence);
     MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i]));
     FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),
       Proto->Mean[i], Proto->Variance.Elliptical[i]);
     if (DistributionOK (RandomBuckets))
       continue;

     if (UniformBuckets == NULL)
       UniformBuckets =
         GetBuckets(Clusterer, uniform, Cluster->SampleCount, Confidence);
     MakeDimUniform(i, Proto, Statistics);
     FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),
       Proto->Mean[i], Proto->Variance.Elliptical[i]);
     if (DistributionOK (UniformBuckets))
       continue;
     break;
   }
   // if any dimension failed to match a distribution, discard the proto
   if (i < Clusterer->SampleSize) {
     FreePrototype(Proto);
     Proto = NULL;
   }
   return (Proto);
 }                                // MakeMixedProto

 void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) {
   Proto->Distrib[i] = D_random;
   Proto->Mean[i] = ParamDesc->MidRange;
   Proto->Variance.Elliptical[i] = ParamDesc->HalfRange;

   // subtract out the previous magnitude of this dimension from the total
   Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];
   Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range;
   Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
   Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);

   // note that the proto Weight is irrelevant for D_random protos
 }                                // MakeDimRandom

 void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) {
   Proto->Distrib[i] = uniform;
   Proto->Mean[i] = Proto->Cluster->Mean[i] +
     (Statistics->Min[i] + Statistics->Max[i]) / 2;
   Proto->Variance.Elliptical[i] =
     (Statistics->Max[i] - Statistics->Min[i]) / 2;
   if (Proto->Variance.Elliptical[i] < MINVARIANCE)
     Proto->Variance.Elliptical[i] = MINVARIANCE;

   // subtract out the previous magnitude of this dimension from the total
   Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];
   Proto->Magnitude.Elliptical[i] =
     1.0 / (2.0 * Proto->Variance.Elliptical[i]);
   Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
   Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);

   // note that the proto Weight is irrelevant for uniform protos
 }                                // MakeDimUniform

 STATISTICS *
 ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) {
   STATISTICS *Statistics;
   int i, j;
   FLOAT32 *CoVariance;
   FLOAT32 *Distance;
   LIST SearchState;
   SAMPLE *Sample;
   uinT32 SampleCountAdjustedForBias;

   // allocate memory to hold the statistics results
   Statistics = (STATISTICS *) Emalloc (sizeof (STATISTICS));
   Statistics->CoVariance = (FLOAT32 *) Emalloc (N * N * sizeof (FLOAT32));
   Statistics->Min = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
   Statistics->Max = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));

   // allocate temporary memory to hold the sample to mean distances
   Distance = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));

   // initialize the statistics
   Statistics->AvgVariance = 1.0;
   CoVariance = Statistics->CoVariance;
   for (i = 0; i < N; i++) {
     Statistics->Min[i] = 0.0;
     Statistics->Max[i] = 0.0;
     for (j = 0; j < N; j++, CoVariance++)
       *CoVariance = 0;
   }
   // find each sample in the cluster and merge it into the statistics
   InitSampleSearch(SearchState, Cluster);
   while ((Sample = NextSample (&SearchState)) != NULL) {
     for (i = 0; i < N; i++) {
       Distance[i] = Sample->Mean[i] - Cluster->Mean[i];
       if (ParamDesc[i].Circular) {
         if (Distance[i] > ParamDesc[i].HalfRange)
           Distance[i] -= ParamDesc[i].Range;
         if (Distance[i] < -ParamDesc[i].HalfRange)
           Distance[i] += ParamDesc[i].Range;
       }
       if (Distance[i] < Statistics->Min[i])
         Statistics->Min[i] = Distance[i];
       if (Distance[i] > Statistics->Max[i])
         Statistics->Max[i] = Distance[i];
     }
     CoVariance = Statistics->CoVariance;
     for (i = 0; i < N; i++)
       for (j = 0; j < N; j++, CoVariance++)
         *CoVariance += Distance[i] * Distance[j];
   }
   // normalize the variances by the total number of samples
   // use SampleCount-1 instead of SampleCount to get an unbiased estimate
   // also compute the geometic mean of the diagonal variances
   // ensure that clusters with only 1 sample are handled correctly
   if (Cluster->SampleCount > 1)
     SampleCountAdjustedForBias = Cluster->SampleCount - 1;
   else
     SampleCountAdjustedForBias = 1;
   CoVariance = Statistics->CoVariance;
   for (i = 0; i < N; i++)
   for (j = 0; j < N; j++, CoVariance++) {
     *CoVariance /= SampleCountAdjustedForBias;
     if (j == i) {
       if (*CoVariance < MINVARIANCE)
         *CoVariance = MINVARIANCE;
       Statistics->AvgVariance *= *CoVariance;
     }
   }
   Statistics->AvgVariance = (float)pow((double)Statistics->AvgVariance,
                                        1.0 / N);

   // release temporary memory and return
   free(Distance);
   return (Statistics);
 }                                // ComputeStatistics

 PROTOTYPE *NewSphericalProto(uinT16 N,
                              CLUSTER *Cluster,
                              STATISTICS *Statistics) {
   PROTOTYPE *Proto;

   Proto = NewSimpleProto (N, Cluster);

   Proto->Variance.Spherical = Statistics->AvgVariance;
   if (Proto->Variance.Spherical < MINVARIANCE)
     Proto->Variance.Spherical = MINVARIANCE;

   Proto->Magnitude.Spherical =
     1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
   Proto->TotalMagnitude = (float)pow((double)Proto->Magnitude.Spherical,
                                      (double) N);
   Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
   Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);

   return (Proto);
 }                                // NewSphericalProto

 PROTOTYPE *NewEllipticalProto(inT16 N,
                               CLUSTER *Cluster,
                               STATISTICS *Statistics) {
   PROTOTYPE *Proto;
   FLOAT32 *CoVariance;
   int i;

   Proto = NewSimpleProto (N, Cluster);
   Proto->Variance.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
   Proto->Magnitude.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
   Proto->Weight.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));

   CoVariance = Statistics->CoVariance;
   Proto->TotalMagnitude = 1.0;
   for (i = 0; i < N; i++, CoVariance += N + 1) {
     Proto->Variance.Elliptical[i] = *CoVariance;
     if (Proto->Variance.Elliptical[i] < MINVARIANCE)
       Proto->Variance.Elliptical[i] = MINVARIANCE;

     Proto->Magnitude.Elliptical[i] =
       1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
     Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
     Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
   }
   Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
   Proto->Style = elliptical;
   return (Proto);
 }                                // NewEllipticalProto

 PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics) {
   PROTOTYPE *Proto;
   int i;

   Proto = NewEllipticalProto (N, Cluster, Statistics);
   Proto->Distrib = (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));

   for (i = 0; i < N; i++) {
     Proto->Distrib[i] = normal;
   }
   Proto->Style = mixed;
   return (Proto);
 }                                // NewMixedProto

 PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) {
   PROTOTYPE *Proto;
   int i;

   Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
   Proto->Mean = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));

   for (i = 0; i < N; i++)
     Proto->Mean[i] = Cluster->Mean[i];
   Proto->Distrib = NULL;

   Proto->Significant = TRUE;
   Proto->Merged = FALSE;
   Proto->Style = spherical;
   Proto->NumSamples = Cluster->SampleCount;
   Proto->Cluster = Cluster;
   Proto->Cluster->Prototype = TRUE;
   return (Proto);
 }                                // NewSimpleProto

 BOOL8
 Independent (PARAM_DESC ParamDesc[],
 inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) {
   int i, j;
   FLOAT32 *VARii;                // points to ith on-diagonal element
   FLOAT32 *VARjj;                // points to jth on-diagonal element
   FLOAT32 CorrelationCoeff;

   VARii = CoVariance;
   for (i = 0; i < N; i++, VARii += N + 1) {
     if (ParamDesc[i].NonEssential)
       continue;

     VARjj = VARii + N + 1;
     CoVariance = VARii + 1;
     for (j = i + 1; j < N; j++, CoVariance++, VARjj += N + 1) {
       if (ParamDesc[j].NonEssential)
         continue;

       if ((*VARii == 0.0) || (*VARjj == 0.0))
         CorrelationCoeff = 0.0;
       else
         CorrelationCoeff =
           sqrt (sqrt (*CoVariance * *CoVariance / (*VARii * *VARjj)));
       if (CorrelationCoeff > Independence)
         return (FALSE);
     }
   }
   return (TRUE);
 }                                // Independent

 BUCKETS *GetBuckets(CLUSTERER* clusterer,
                     DISTRIBUTION Distribution,
                     uinT32 SampleCount,
                     FLOAT64 Confidence) {
   // Get an old bucket structure with the same number of buckets.
   uinT16 NumberOfBuckets = OptimumNumberOfBuckets(SampleCount);
   BUCKETS *Buckets =
       clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS];

   // If a matching bucket structure is not found, make one and save it.
   if (Buckets == NULL) {
     Buckets = MakeBuckets(Distribution, SampleCount, Confidence);
     clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS] =
         Buckets;
   } else {
     // Just adjust the existing buckets.
     if (SampleCount != Buckets->SampleCount)
       AdjustBuckets(Buckets, SampleCount);
     if (Confidence != Buckets->Confidence) {
       Buckets->Confidence = Confidence;
       Buckets->ChiSquared = ComputeChiSquared(
           DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets),
           Confidence);
     }
     InitBuckets(Buckets);
   }
   return Buckets;
 }                                // GetBuckets

 BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
                      uinT32 SampleCount,
                      FLOAT64 Confidence) {
   const DENSITYFUNC DensityFunction[] =
     { NormalDensity, UniformDensity, UniformDensity };
   int i, j;
   BUCKETS *Buckets;
   FLOAT64 BucketProbability;
   FLOAT64 NextBucketBoundary;
   FLOAT64 Probability;
   FLOAT64 ProbabilityDelta;
   FLOAT64 LastProbDensity;
   FLOAT64 ProbDensity;
   uinT16 CurrentBucket;
   BOOL8 Symmetrical;

   // allocate memory needed for data structure
   Buckets = static_cast<BUCKETS*>(Emalloc(sizeof(BUCKETS)));
   Buckets->NumberOfBuckets = OptimumNumberOfBuckets(SampleCount);
   Buckets->SampleCount = SampleCount;
   Buckets->Confidence = Confidence;
   Buckets->Count = static_cast<uinT32*>(
       Emalloc(Buckets->NumberOfBuckets * sizeof(uinT32)));
   Buckets->ExpectedCount = static_cast<FLOAT32*>(
       Emalloc(Buckets->NumberOfBuckets * sizeof(FLOAT32)));

   // initialize simple fields
   Buckets->Distribution = Distribution;
   for (i = 0; i < Buckets->NumberOfBuckets; i++) {
     Buckets->Count[i] = 0;
     Buckets->ExpectedCount[i] = 0.0;
   }

   // all currently defined distributions are symmetrical
   Symmetrical = TRUE;
   Buckets->ChiSquared = ComputeChiSquared(
       DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), Confidence);

   if (Symmetrical) {
     // allocate buckets so that all have approx. equal probability
     BucketProbability = 1.0 / (FLOAT64) (Buckets->NumberOfBuckets);

     // distribution is symmetric so fill in upper half then copy
     CurrentBucket = Buckets->NumberOfBuckets / 2;
     if (Odd (Buckets->NumberOfBuckets))
       NextBucketBoundary = BucketProbability / 2;
     else
       NextBucketBoundary = BucketProbability;

     Probability = 0.0;
     LastProbDensity =
       (*DensityFunction[(int) Distribution]) (BUCKETTABLESIZE / 2);
     for (i = BUCKETTABLESIZE / 2; i < BUCKETTABLESIZE; i++) {
       ProbDensity = (*DensityFunction[(int) Distribution]) (i + 1);
       ProbabilityDelta = Integral (LastProbDensity, ProbDensity, 1.0);
       Probability += ProbabilityDelta;
       if (Probability > NextBucketBoundary) {
         if (CurrentBucket < Buckets->NumberOfBuckets - 1)
           CurrentBucket++;
         NextBucketBoundary += BucketProbability;
       }
       Buckets->Bucket[i] = CurrentBucket;
       Buckets->ExpectedCount[CurrentBucket] +=
         (FLOAT32) (ProbabilityDelta * SampleCount);
       LastProbDensity = ProbDensity;
     }
     // place any leftover probability into the last bucket
     Buckets->ExpectedCount[CurrentBucket] +=
       (FLOAT32) ((0.5 - Probability) * SampleCount);

     // copy upper half of distribution to lower half
     for (i = 0, j = BUCKETTABLESIZE - 1; i < j; i++, j--)
       Buckets->Bucket[i] =
         Mirror(Buckets->Bucket[j], Buckets->NumberOfBuckets);

     // copy upper half of expected counts to lower half
     for (i = 0, j = Buckets->NumberOfBuckets - 1; i <= j; i++, j--)
       Buckets->ExpectedCount[i] += Buckets->ExpectedCount[j];
   }
   return Buckets;
 }                                // MakeBuckets

 uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) {
   uinT8 Last, Next;
   FLOAT32 Slope;

   if (SampleCount < kCountTable[0])
     return kBucketsTable[0];

   for (Last = 0, Next = 1; Next < LOOKUPTABLESIZE; Last++, Next++) {
     if (SampleCount <= kCountTable[Next]) {
       Slope = (FLOAT32) (kBucketsTable[Next] - kBucketsTable[Last]) /
           (FLOAT32) (kCountTable[Next] - kCountTable[Last]);
       return ((uinT16) (kBucketsTable[Last] +
           Slope * (SampleCount - kCountTable[Last])));
     }
   }
   return kBucketsTable[Last];
 }                                // OptimumNumberOfBuckets

 FLOAT64
 ComputeChiSquared (uinT16 DegreesOfFreedom, FLOAT64 Alpha)
 #define CHIACCURACY     0.01
 #define MINALPHA  (1e-200)
 {
   static LIST ChiWith[MAXDEGREESOFFREEDOM + 1];

   CHISTRUCT *OldChiSquared;
   CHISTRUCT SearchKey;

   // limit the minimum alpha that can be used - if alpha is too small
   //      it may not be possible to compute chi-squared.
   Alpha = ClipToRange(Alpha, MINALPHA, 1.0);
   if (Odd (DegreesOfFreedom))
     DegreesOfFreedom++;

   /* find the list of chi-squared values which have already been computed
      for the specified number of degrees of freedom.  Search the list for
      the desired chi-squared. */
   SearchKey.Alpha = Alpha;
   OldChiSquared = (CHISTRUCT *) first_node (search (ChiWith[DegreesOfFreedom],
     &SearchKey, AlphaMatch));

   if (OldChiSquared == NULL) {
     OldChiSquared = NewChiStruct (DegreesOfFreedom, Alpha);
     OldChiSquared->ChiSquared = Solve (ChiArea, OldChiSquared,
       (FLOAT64) DegreesOfFreedom,
       (FLOAT64) CHIACCURACY);
     ChiWith[DegreesOfFreedom] = push (ChiWith[DegreesOfFreedom],
       OldChiSquared);
   }
   else {
     // further optimization might move OldChiSquared to front of list
   }

   return (OldChiSquared->ChiSquared);

 }                                // ComputeChiSquared

 FLOAT64 NormalDensity(inT32 x) {
   FLOAT64 Distance;

   Distance = x - kNormalMean;
   return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance);
 }                                // NormalDensity

 FLOAT64 UniformDensity(inT32 x) {
   static FLOAT64 UniformDistributionDensity = (FLOAT64) 1.0 / BUCKETTABLESIZE;

   if ((x >= 0.0) && (x <= BUCKETTABLESIZE))
     return UniformDistributionDensity;
   else
     return (FLOAT64) 0.0;
 }                                // UniformDensity

 FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) {
   return (f1 + f2) * Dx / 2.0;
 }                                // Integral

 void FillBuckets(BUCKETS *Buckets,
                  CLUSTER *Cluster,
                  uinT16 Dim,
                  PARAM_DESC *ParamDesc,
                  FLOAT32 Mean,
                  FLOAT32 StdDev) {
   uinT16 BucketID;
   int i;
   LIST SearchState;
   SAMPLE *Sample;

   // initialize the histogram bucket counts to 0
   for (i = 0; i < Buckets->NumberOfBuckets; i++)
     Buckets->Count[i] = 0;

   if (StdDev == 0.0) {
     /* if the standard deviation is zero, then we can't statistically
        analyze the cluster.  Use a pseudo-analysis: samples exactly on
        the mean are distributed evenly across all buckets.  Samples greater
        than the mean are placed in the last bucket; samples less than the
        mean are placed in the first bucket. */

     InitSampleSearch(SearchState, Cluster);
     i = 0;
     while ((Sample = NextSample (&SearchState)) != NULL) {
       if (Sample->Mean[Dim] > Mean)
         BucketID = Buckets->NumberOfBuckets - 1;
       else if (Sample->Mean[Dim] < Mean)
         BucketID = 0;
       else
         BucketID = i;
       Buckets->Count[BucketID] += 1;
       i++;
       if (i >= Buckets->NumberOfBuckets)
         i = 0;
     }
   }
   else {
     // search for all samples in the cluster and add to histogram buckets
     InitSampleSearch(SearchState, Cluster);
     while ((Sample = NextSample (&SearchState)) != NULL) {
       switch (Buckets->Distribution) {
         case normal:
           BucketID = NormalBucket (ParamDesc, Sample->Mean[Dim],
             Mean, StdDev);
           break;
         case D_random:
         case uniform:
           BucketID = UniformBucket (ParamDesc, Sample->Mean[Dim],
             Mean, StdDev);
           break;
         default:
           BucketID = 0;
       }
       Buckets->Count[Buckets->Bucket[BucketID]] += 1;
     }
   }
 }                                // FillBuckets

 uinT16 NormalBucket(PARAM_DESC *ParamDesc,
                     FLOAT32 x,
                     FLOAT32 Mean,
                     FLOAT32 StdDev) {
   FLOAT32 X;

   // wraparound circular parameters if necessary
   if (ParamDesc->Circular) {
     if (x - Mean > ParamDesc->HalfRange)
       x -= ParamDesc->Range;
     else if (x - Mean < -ParamDesc->HalfRange)
       x += ParamDesc->Range;
   }

   X = ((x - Mean) / StdDev) * kNormalStdDev + kNormalMean;
   if (X < 0)
     return 0;
   if (X > BUCKETTABLESIZE - 1)
     return ((uinT16) (BUCKETTABLESIZE - 1));
   return (uinT16) floor((FLOAT64) X);
 }                                // NormalBucket

 uinT16 UniformBucket(PARAM_DESC *ParamDesc,
                      FLOAT32 x,
                      FLOAT32 Mean,
                      FLOAT32 StdDev) {
   FLOAT32 X;

   // wraparound circular parameters if necessary
   if (ParamDesc->Circular) {
     if (x - Mean > ParamDesc->HalfRange)
       x -= ParamDesc->Range;
     else if (x - Mean < -ParamDesc->HalfRange)
       x += ParamDesc->Range;
   }

   X = ((x - Mean) / (2 * StdDev) * BUCKETTABLESIZE + BUCKETTABLESIZE / 2.0);
   if (X < 0)
     return 0;
   if (X > BUCKETTABLESIZE - 1)
     return (uinT16) (BUCKETTABLESIZE - 1);
   return (uinT16) floor((FLOAT64) X);
 }                                // UniformBucket

 BOOL8 DistributionOK(BUCKETS *Buckets) {
   FLOAT32 FrequencyDifference;
   FLOAT32 TotalDifference;
   int i;

   // compute how well the histogram matches the expected histogram
   TotalDifference = 0.0;
   for (i = 0; i < Buckets->NumberOfBuckets; i++) {
     FrequencyDifference = Buckets->Count[i] - Buckets->ExpectedCount[i];
     TotalDifference += (FrequencyDifference * FrequencyDifference) /
       Buckets->ExpectedCount[i];
   }

   // test to see if the difference is more than expected
   if (TotalDifference > Buckets->ChiSquared)
     return FALSE;
   else
     return TRUE;
 }                                // DistributionOK

 void FreeStatistics(STATISTICS *Statistics) {
   free(Statistics->CoVariance);
   free(Statistics->Min);
   free(Statistics->Max);
   free(Statistics);
 }                                // FreeStatistics

 void FreeBuckets(BUCKETS *buckets) {
   Efree(buckets->Count);
   Efree(buckets->ExpectedCount);
   Efree(buckets);
 }                                // FreeBuckets

 void FreeCluster(CLUSTER *Cluster) {
   if (Cluster != NULL) {
     FreeCluster (Cluster->Left);
     FreeCluster (Cluster->Right);
     free(Cluster);
   }
 }                                // FreeCluster

 uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets) {
   static uinT8 DegreeOffsets[] = { 3, 3, 1 };

   uinT16 AdjustedNumBuckets;

   AdjustedNumBuckets = HistogramBuckets - DegreeOffsets[(int) Distribution];
   if (Odd (AdjustedNumBuckets))
     AdjustedNumBuckets++;
   return (AdjustedNumBuckets);

 }                                // DegreesOfFreedom

 int NumBucketsMatch(void *arg1,    // BUCKETS *Histogram,
                     void *arg2) {  // uinT16 *DesiredNumberOfBuckets)
   BUCKETS *Histogram = (BUCKETS *) arg1;
   uinT16 *DesiredNumberOfBuckets = (uinT16 *) arg2;

   return (*DesiredNumberOfBuckets == Histogram->NumberOfBuckets);

 }                                // NumBucketsMatch

 int ListEntryMatch(void *arg1,    //ListNode
                    void *arg2) {  //Key
   return (arg1 == arg2);

 }                                // ListEntryMatch

 void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount) {
   int i;
   FLOAT64 AdjustFactor;

   AdjustFactor = (((FLOAT64) NewSampleCount) /
     ((FLOAT64) Buckets->SampleCount));

   for (i = 0; i < Buckets->NumberOfBuckets; i++) {
     Buckets->ExpectedCount[i] *= AdjustFactor;
   }

   Buckets->SampleCount = NewSampleCount;

 }                                // AdjustBuckets

 void InitBuckets(BUCKETS *Buckets) {
   int i;

   for (i = 0; i < Buckets->NumberOfBuckets; i++) {
     Buckets->Count[i] = 0;
   }

 }                                // InitBuckets

 int AlphaMatch(void *arg1,    //CHISTRUCT                             *ChiStruct,
                void *arg2) {  //CHISTRUCT                             *SearchKey)
   CHISTRUCT *ChiStruct = (CHISTRUCT *) arg1;
   CHISTRUCT *SearchKey = (CHISTRUCT *) arg2;

   return (ChiStruct->Alpha == SearchKey->Alpha);

 }                                // AlphaMatch

 CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha) {
   CHISTRUCT *NewChiStruct;

   NewChiStruct = (CHISTRUCT *) Emalloc (sizeof (CHISTRUCT));
   NewChiStruct->DegreesOfFreedom = DegreesOfFreedom;
   NewChiStruct->Alpha = Alpha;
   return (NewChiStruct);

 }                                // NewChiStruct

 FLOAT64
 Solve (SOLVEFUNC Function,
 void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy)
 #define INITIALDELTA    0.1
 #define  DELTARATIO     0.1
 {
   FLOAT64 x;
   FLOAT64 f;
   FLOAT64 Slope;
   FLOAT64 Delta;
   FLOAT64 NewDelta;
   FLOAT64 xDelta;
   FLOAT64 LastPosX, LastNegX;

   x = InitialGuess;
   Delta = INITIALDELTA;
   LastPosX = MAX_FLOAT32;
   LastNegX = -MAX_FLOAT32;
   f = (*Function) ((CHISTRUCT *) FunctionParams, x);
   while (Abs (LastPosX - LastNegX) > Accuracy) {
     // keep track of outer bounds of current estimate
     if (f < 0)
       LastNegX = x;
     else
       LastPosX = x;

     // compute the approx. slope of f(x) at the current point
     Slope =
       ((*Function) ((CHISTRUCT *) FunctionParams, x + Delta) - f) / Delta;

     // compute the next solution guess */
     xDelta = f / Slope;
     x -= xDelta;

     // reduce the delta used for computing slope to be a fraction of
     //the amount moved to get to the new guess
     NewDelta = Abs (xDelta) * DELTARATIO;
     if (NewDelta < Delta)
       Delta = NewDelta;

     // compute the value of the function at the new guess
     f = (*Function) ((CHISTRUCT *) FunctionParams, x);
   }
   return (x);

 }                                // Solve

 FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) {
   int i, N;
   FLOAT64 SeriesTotal;
   FLOAT64 Denominator;
   FLOAT64 PowerOfx;

   N = ChiParams->DegreesOfFreedom / 2 - 1;
   SeriesTotal = 1;
   Denominator = 1;
   PowerOfx = 1;
   for (i = 1; i <= N; i++) {
     Denominator *= 2 * i;
     PowerOfx *= x;
     SeriesTotal += PowerOfx / Denominator;
   }
   return ((SeriesTotal * exp (-0.5 * x)) - ChiParams->Alpha);

 }                                // ChiArea

 BOOL8
 MultipleCharSamples (CLUSTERER * Clusterer,
 CLUSTER * Cluster, FLOAT32 MaxIllegal)
 #define ILLEGAL_CHAR    2
 {
   static BOOL8 *CharFlags = NULL;
   static inT32 NumFlags = 0;
   int i;
   LIST SearchState;
   SAMPLE *Sample;
   inT32 CharID;
   inT32 NumCharInCluster;
   inT32 NumIllegalInCluster;
   FLOAT32 PercentIllegal;

   // initial estimate assumes that no illegal chars exist in the cluster
   NumCharInCluster = Cluster->SampleCount;
   NumIllegalInCluster = 0;

   if (Clusterer->NumChar > NumFlags) {
     free(CharFlags);
     NumFlags = Clusterer->NumChar;
     CharFlags = (BOOL8 *) Emalloc (NumFlags * sizeof (BOOL8));
   }

   for (i = 0; i < NumFlags; i++)
     CharFlags[i] = FALSE;

   // find each sample in the cluster and check if we have seen it before
   InitSampleSearch(SearchState, Cluster);
   while ((Sample = NextSample (&SearchState)) != NULL) {
     CharID = Sample->CharID;
     if (CharFlags[CharID] == FALSE) {
       CharFlags[CharID] = TRUE;
     }
     else {
       if (CharFlags[CharID] == TRUE) {
         NumIllegalInCluster++;
         CharFlags[CharID] = ILLEGAL_CHAR;
       }
       NumCharInCluster--;
       PercentIllegal = (FLOAT32) NumIllegalInCluster / NumCharInCluster;
       if (PercentIllegal > MaxIllegal) {
         destroy(SearchState);
         return (TRUE);
       }
     }
   }
   return (FALSE);

 }                                // MultipleCharSamples

 double InvertMatrix(const float* input, int size, float* inv) {
   // Allocate memory for the 2D arrays.
   GENERIC_2D_ARRAY<double> U(size, size, 0.0);
   GENERIC_2D_ARRAY<double> U_inv(size, size, 0.0);
   GENERIC_2D_ARRAY<double> L(size, size, 0.0);

   // Initialize the working matrices. U starts as input, L as I and U_inv as O.
   int row;
   int col;
   for (row = 0; row < size; row++) {
     for (col = 0; col < size; col++) {
       U[row][col] = input[row*size + col];
       L[row][col] = row == col ? 1.0 : 0.0;
       U_inv[row][col] = 0.0;
     }
   }

   // Compute forward matrix by inversion by LU decomposition of input.
   for (col = 0; col < size; ++col) {
     // Find best pivot
     int best_row = 0;
     double best_pivot = -1.0;
     for (row = col; row < size; ++row) {
       if (Abs(U[row][col]) > best_pivot) {
         best_pivot = Abs(U[row][col]);
         best_row = row;
       }
     }
     // Exchange pivot rows.
     if (best_row != col) {
       for (int k = 0; k < size; ++k) {
         double tmp = U[best_row][k];
         U[best_row][k] = U[col][k];
         U[col][k] = tmp;
         tmp = L[best_row][k];
         L[best_row][k] = L[col][k];
         L[col][k] = tmp;
       }
     }
     // Now do the pivot itself.
     for (row = col + 1; row < size; ++row) {
       double ratio = -U[row][col] / U[col][col];
       for (int j = col; j < size; ++j) {
         U[row][j] += U[col][j] * ratio;
       }
       for (int k = 0; k < size; ++k) {
         L[row][k] += L[col][k] * ratio;
       }
     }
   }
   // Next invert U.
   for (col = 0; col < size; ++col) {
     U_inv[col][col] = 1.0 / U[col][col];
     for (row = col - 1; row >= 0; --row) {
       double total = 0.0;
       for (int k = col; k > row; --k) {
         total += U[row][k] * U_inv[k][col];
       }
       U_inv[row][col] = -total / U[row][row];
     }
   }
   // Now the answer is U_inv.L.
   for (row = 0; row < size; row++) {
     for (col = 0; col < size; col++) {
       double sum = 0.0;
       for (int k = row; k < size; ++k) {
         sum += U_inv[row][k] * L[k][col];
       }
       inv[row*size + col] = sum;
     }
   }
   // Check matrix product.
   double error_sum = 0.0;
   for (row = 0; row < size; row++) {
     for (col = 0; col < size; col++) {
       double sum = 0.0;
       for (int k = 0; k < size; ++k) {
         sum += input[row*size + k] * inv[k *size + col];
       }
       if (row != col) {
         error_sum += Abs(sum);
       }
     }
   }
   return error_sum;
 }
CLUSTERER::ProtoList
LIST ProtoList
Definition: cluster.h:92

kdpair.h

CLUSTERCONFIG::ProtoStyle
PROTOSTYLE ProtoStyle
Definition: cluster.h:49

ClusteringContext::heap
ClusterHeap * heap
Definition: cluster.cpp:196

CLUSTERER::bucket_cache
BUCKETS * bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS+1 - MINBUCKETS]
Definition: cluster.h:95

Mirror
#define Mirror(N, R)
Definition: cluster.cpp:206

helpers.h

MINSAMPLES
#define MINSAMPLES
Definition: cluster.cpp:150

PROTOTYPE::Distrib
DISTRIBUTION * Distrib
Definition: cluster.h:77

PROTOTYPE::LogMagnitude
FLOAT32 LogMagnitude
Definition: cluster.h:80

spherical
Definition: cluster.h:45

MAXBUCKETS
#define MAXBUCKETS
Definition: cluster.h:27

InitBuckets
void InitBuckets(BUCKETS *Buckets)
Definition: cluster.cpp:2283

sample::Clustered
unsigned Clustered
Definition: cluster.h:33

Odd
#define Odd(N)
Definition: cluster.cpp:205

KDWalk
void KDWalk(KDTREE *Tree, void_proc action, void *context)
Definition: kdtree.cpp:330

MakeDimUniform
void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics)
Definition: cluster.cpp:1376

danerror.h

FillBuckets
void FillBuckets(BUCKETS *Buckets, CLUSTER *Cluster, uinT16 Dim, PARAM_DESC *ParamDesc, FLOAT32 Mean, FLOAT32 StdDev)
Definition: cluster.cpp:1984

list_rec
Definition: oldlist.h:127

KDTREE
Definition: kdtree.h:49

KDStore
void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data)
Definition: kdtree.cpp:218

tesseract::GenericHeap::Push
void Push(Pair *entry)
Definition: genericheap.h:95

STATISTICS::CoVariance
FLOAT32 * CoVariance
Definition: cluster.cpp:172

TRUE
#define TRUE
Definition: capi.h:45

FLOATUNION::Elliptical
FLOAT32 * Elliptical
Definition: cluster.h:64

StandardDeviation
FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension)
Definition: cluster.cpp:657

MakePotentialClusters
void MakePotentialClusters(ClusteringContext *context, CLUSTER *Cluster, inT32 Level)
Definition: cluster.cpp:765

MINALPHA
#define MINALPHA

inT32
int32_t inT32
Definition: host.h:38

AdjustBuckets
void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount)
Definition: cluster.cpp:2260

MakeBuckets
BUCKETS * MakeBuckets(DISTRIBUTION Distribution, uinT32 SampleCount, FLOAT64 Confidence)
Definition: cluster.cpp:1735

INITIALDELTA
#define INITIALDELTA

DENSITYFUNC
FLOAT64(* DENSITYFUNC)(inT32)
Definition: cluster.cpp:202

destroy
LIST destroy(LIST list)
Definition: oldlist.cpp:182

Abs
#define Abs(N)
Definition: cluster.cpp:207

MakeEllipticalProto
PROTOTYPE * MakeEllipticalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *Buckets)
Definition: cluster.cpp:1249

sample::Left
struct sample * Left
Definition: cluster.h:36

LOOKUPTABLESIZE
#define LOOKUPTABLESIZE
Definition: cluster.cpp:227

CHISTRUCT::ChiSquared
FLOAT64 ChiSquared
Definition: cluster.cpp:191

MakeKDTree
KDTREE * MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[])
Definition: kdtree.cpp:182

MINSAMPLESNEEDED
#define MINSAMPLESNEEDED
Definition: cluster.cpp:151

tesseract::KDPair::key
Key key
Definition: kdpair.h:46

ClusteringContext::next
inT32 next
Definition: cluster.cpp:199

PARAM_DESC
Definition: ocrfeatures.h:46

UniformDensity
FLOAT64 UniformDensity(inT32 x)
Definition: cluster.cpp:1939

MakeDegenerateProto
PROTOTYPE * MakeDegenerateProto(uinT16 N, CLUSTER *Cluster, STATISTICS *Statistics, PROTOSTYLE Style, inT32 MinSamples)
Definition: cluster.cpp:1063

size
voidpf void uLong size
Definition: ioapi.h:39

Integral
FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx)
Definition: cluster.cpp:1958

sample::Mean
FLOAT32 Mean[1]
Definition: cluster.h:39

PROTOTYPE::Merged
unsigned Merged
Definition: cluster.h:69

PARAM_DESC::MidRange
FLOAT32 MidRange
Definition: ocrfeatures.h:53

Emalloc
void * Emalloc(int Size)
Definition: emalloc.cpp:47

OptimumNumberOfBuckets
uinT16 OptimumNumberOfBuckets(uinT32 SampleCount)
Definition: cluster.cpp:1832

NumBucketsMatch
int NumBucketsMatch(void *arg1, void *arg2)
Definition: cluster.cpp:2227

PARAM_DESC::Range
FLOAT32 Range
Definition: ocrfeatures.h:51

MakeDimRandom
void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc)
Definition: cluster.cpp:1352

MINBUCKETS
#define MINBUCKETS
Definition: cluster.h:26

MakeMixedProto
PROTOTYPE * MakeMixedProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *NormalBuckets, FLOAT64 Confidence)
Definition: cluster.cpp:1291

tprintf
#define tprintf(...)
Definition: tprintf.h:31

DistributionOK
BOOL8 DistributionOK(BUCKETS *Buckets)
Definition: cluster.cpp:2125

ClusterSamples
LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
Definition: cluster.cpp:512

CLUSTERER::ParamDesc
PARAM_DESC * ParamDesc
Definition: cluster.h:88

MakeNewCluster
CLUSTER * MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster)
Definition: cluster.cpp:836

MAXDISTANCE
#define MAXDISTANCE

CLUSTERCONFIG::MinSamples
FLOAT32 MinSamples
Definition: cluster.h:50

BUCKETS::Confidence
FLOAT64 Confidence
Definition: cluster.cpp:180

BUCKETS::Count
uinT32 * Count
Definition: cluster.cpp:184

tesseract::KDPair::data
Data data
Definition: kdpair.h:45

CreateClusterTree
void CreateClusterTree(CLUSTERER *Clusterer)
Definition: cluster.cpp:698

automatic
Definition: cluster.h:45

KDNearestNeighborSearch
void KDNearestNeighborSearch(KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance, int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[])
Definition: kdtree.cpp:320

NIL_LIST
#define NIL_LIST
Definition: oldlist.h:126

CLUSTERCONFIG::MaxIllegal
FLOAT32 MaxIllegal
Definition: cluster.h:51

Config
CLUSTERCONFIG Config
Definition: commontraining.cpp:50

BUCKETS::Distribution
DISTRIBUTION Distribution
Definition: cluster.cpp:178

Solve
FLOAT64 Solve(SOLVEFUNC Function, void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy)
Definition: cluster.cpp:2352

PROTOTYPE::Weight
FLOATUNION Weight
Definition: cluster.h:83

PARAM_DESC::Circular
inT8 Circular
Definition: ocrfeatures.h:47

elliptical
Definition: cluster.h:45

InitSampleSearch
#define InitSampleSearch(S, C)
Definition: cluster.h:105

pop
LIST pop(LIST list)
Definition: oldlist.cpp:299

PARAM_DESC::Min
FLOAT32 Min
Definition: ocrfeatures.h:49

inT16
int16_t inT16
Definition: host.h:36

BUCKETTABLESIZE
#define BUCKETTABLESIZE
Definition: cluster.cpp:159

CLUSTERCONFIG::Confidence
FLOAT64 Confidence
Definition: cluster.h:54

MakeSphericalProto
PROTOTYPE * MakeSphericalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *Buckets)
Definition: cluster.cpp:1212

CLUSTERCONFIG::MagicSamples
int MagicSamples
Definition: cluster.h:55

MakePrototype
PROTOTYPE * MakePrototype(CLUSTERER *Clusterer, CLUSTERCONFIG *Config, CLUSTER *Cluster)
Definition: cluster.cpp:969

D_random
Definition: cluster.h:59

NewSimpleProto
PROTOTYPE * NewSimpleProto(inT16 N, CLUSTER *Cluster)
Definition: cluster.cpp:1600

ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:84

CHISTRUCT::DegreesOfFreedom
uinT16 DegreesOfFreedom
Definition: cluster.cpp:189

PROTOTYPE::Variance
FLOATUNION Variance
Definition: cluster.h:81

MakeClusterer
CLUSTERER * MakeClusterer(inT16 SampleSize, const PARAM_DESC ParamDesc[])
Definition: cluster.cpp:399

CLUSTERER::NumberOfSamples
inT32 NumberOfSamples
Definition: cluster.h:89

const.h

NewSphericalProto
PROTOTYPE * NewSphericalProto(uinT16 N, CLUSTER *Cluster, STATISTICS *Statistics)
Definition: cluster.cpp:1499

Mean
FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension)
Definition: cluster.cpp:644

MINVARIANCE
#define MINVARIANCE
Definition: cluster.cpp:141

TEMPCLUSTER::Neighbor
CLUSTER * Neighbor
Definition: cluster.cpp:164

CLUSTERER::SampleSize
inT16 SampleSize
Definition: cluster.h:87

ListEntryMatch
int ListEntryMatch(void *arg1, void *arg2)
Definition: cluster.cpp:2244

search
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:406

PARAM_DESC::NonEssential
inT8 NonEssential
Definition: ocrfeatures.h:48

TEMPCLUSTER::Cluster
CLUSTER * Cluster
Definition: cluster.cpp:163

DELTARATIO
#define DELTARATIO

ClusteringContext
Definition: cluster.cpp:195

PROTOTYPE::Style
unsigned Style
Definition: cluster.h:74

ClusterPair
tesseract::KDPairInc< float, TEMPCLUSTER * > ClusterPair
Definition: cluster.cpp:167

DISTRIBUTION_COUNT
Definition: cluster.h:59

uinT32
uint32_t uinT32
Definition: host.h:39

PI
#define PI
Definition: const.h:19

GetBuckets
BUCKETS * GetBuckets(CLUSTERER *clusterer, DISTRIBUTION Distribution, uinT32 SampleCount, FLOAT64 Confidence)
Definition: cluster.cpp:1688

genericheap.h

STATISTICS::Min
FLOAT32 * Min
Definition: cluster.cpp:173

BOOL8
unsigned char BOOL8
Definition: host.h:44

sample::CharID
inT32 CharID
Definition: cluster.h:38

FALSE
#define FALSE
Definition: capi.h:46

DISTRIBUTION
DISTRIBUTION
Definition: cluster.h:58

BUCKETS::SampleCount
uinT32 SampleCount
Definition: cluster.cpp:179

ClipToRange
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:122

CLUSTERER
Definition: cluster.h:86

FreeProtoList
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:573

TestEllipticalProto
PROTOTYPE * TestEllipticalProto(CLUSTERER *Clusterer, CLUSTERCONFIG *Config, CLUSTER *Cluster, STATISTICS *Statistics)
Definition: cluster.cpp:1105

ALREADYCLUSTERED
#define ALREADYCLUSTERED
Definition: cluster.h:133

sample::Right
struct sample * Right
Definition: cluster.h:37

PROTOSTYLE
PROTOSTYLE
Definition: cluster.h:44

MAX_FLOAT32
#define MAX_FLOAT32
Definition: host.h:66

PROTOTYPE::Mean
FLOAT32 * Mean
Definition: cluster.h:78

emalloc.h

GENERIC_2D_ARRAY< double >

SqrtOf2Pi
#define SqrtOf2Pi
Definition: cluster.cpp:217

BUCKETS::ExpectedCount
FLOAT32 * ExpectedCount
Definition: cluster.cpp:185

BUCKETS::NumberOfBuckets
uinT16 NumberOfBuckets
Definition: cluster.cpp:182

STATISTICS::AvgVariance
FLOAT32 AvgVariance
Definition: cluster.cpp:171

DegreesOfFreedom
uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets)
Definition: cluster.cpp:2205

first_node
#define first_node(l)
Definition: oldlist.h:139

destroy_nodes
void destroy_nodes(LIST list, void_dest destructor)
Definition: oldlist.cpp:199

FreeCluster
void FreeCluster(CLUSTER *Cluster)
Definition: cluster.cpp:2183

STATISTICS
Definition: cluster.cpp:170

NewEllipticalProto
PROTOTYPE * NewEllipticalProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics)
Definition: cluster.cpp:1532

CLUSTERCONFIG::Independence
FLOAT32 Independence
Definition: cluster.h:53

CLUSTERER::NumChar
inT32 NumChar
Definition: cluster.h:93

NextSample
CLUSTER * NextSample(LIST *SearchState)
Definition: cluster.cpp:620

FreeClusterer
void FreeClusterer(CLUSTERER *Clusterer)
Definition: cluster.cpp:546

sample::Prototype
unsigned Prototype
Definition: cluster.h:34

FLOAT32
float FLOAT32
Definition: host.h:42

FTABLE_Y
#define FTABLE_Y
Definition: cluster.cpp:31

FreePrototype
void FreePrototype(void *arg)
Definition: cluster.cpp:587

NormalBucket
uinT16 NormalBucket(PARAM_DESC *ParamDesc, FLOAT32 x, FLOAT32 Mean, FLOAT32 StdDev)
Definition: cluster.cpp:2056

ComputeStatistics
STATISTICS * ComputeStatistics(inT16 N, PARAM_DESC ParamDesc[], CLUSTER *Cluster)
Definition: cluster.cpp:1412

tprintf.h

mixed
Definition: cluster.h:45

ChiArea
FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x)
Definition: cluster.cpp:2418

FLOATUNION::Spherical
FLOAT32 Spherical
Definition: cluster.h:63

CLUSTERCONFIG
Definition: cluster.h:48

normal
Definition: cluster.h:59

UniformBucket
uinT16 UniformBucket(PARAM_DESC *ParamDesc, FLOAT32 x, FLOAT32 Mean, FLOAT32 StdDev)
Definition: cluster.cpp:2091

MAXDEGREESOFFREEDOM
#define MAXDEGREESOFFREEDOM
Definition: cluster.cpp:228

DoError
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42

CHIACCURACY
#define CHIACCURACY

int
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque

CHISTRUCT
Definition: cluster.cpp:188

KDDelete
void KDDelete(KDTREE *Tree, FLOAT32 Key[], void *Data)
Definition: kdtree.cpp:264

PARAM_DESC::Max
FLOAT32 Max
Definition: ocrfeatures.h:50

uinT8
uint8_t uinT8
Definition: host.h:35

FreeKDTree
void FreeKDTree(KDTREE *Tree)
Definition: kdtree.cpp:349

Independent
BOOL8 Independent(PARAM_DESC ParamDesc[], inT16 N, FLOAT32 *CoVariance, FLOAT32 Independence)
Definition: cluster.cpp:1641

ClusteringContext::candidates
TEMPCLUSTER * candidates
Definition: cluster.cpp:197

TEMPCLUSTER
Definition: cluster.cpp:162

PROTOTYPE::NumSamples
unsigned NumSamples
Definition: cluster.h:75

CHISTRUCT::Alpha
FLOAT64 Alpha
Definition: cluster.cpp:190

SOLVEFUNC
FLOAT64(* SOLVEFUNC)(CHISTRUCT *, double)
Definition: cluster.cpp:203

HOTELLING
#define HOTELLING
Definition: cluster.cpp:29

RootOf
#define RootOf(T)
Definition: kdtree.h:58

InvertMatrix
double InvertMatrix(const float *input, int size, float *inv)
Definition: cluster.cpp:2521

NewMixedProto
PROTOTYPE * NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics)
Definition: cluster.cpp:1576

STATISTICS::Max
FLOAT32 * Max
Definition: cluster.cpp:174

MAXNEIGHBORS
#define MAXNEIGHBORS

PROTOTYPE::Significant
unsigned Significant
Definition: cluster.h:68

FTABLE_X
#define FTABLE_X
Definition: cluster.cpp:30

MergeClusters
inT32 MergeClusters(inT16 N, register PARAM_DESC ParamDesc[], register inT32 n1, register inT32 n2, register FLOAT32 m[], register FLOAT32 m1[], register FLOAT32 m2[])

NormalDensity
FLOAT64 NormalDensity(inT32 x)
Definition: cluster.cpp:1923

NORMALEXTENT
#define NORMALEXTENT
Definition: cluster.cpp:160

PROTOTYPE::Cluster
CLUSTER * Cluster
Definition: cluster.h:76

cluster.h

PROTOTYPE
Definition: cluster.h:67

CLUSTERER::Root
CLUSTER * Root
Definition: cluster.h:91

sample::SampleCount
unsigned SampleCount
Definition: cluster.h:35

Efree
void Efree(void *ptr)
Definition: emalloc.cpp:79

CLUSTERER::KDTree
KDTREE * KDTree
Definition: cluster.h:90

sample
Definition: cluster.h:32

ClusterHeap
tesseract::GenericHeap< ClusterPair > ClusterHeap
Definition: cluster.cpp:168

FTable
const double FTable[FTABLE_Y][FTABLE_X]
Definition: cluster.cpp:34

tesseract::GenericHeap::Pop
bool Pop(Pair *entry)
Definition: genericheap.h:118

uniform
Definition: cluster.h:59

FreeStatistics
void FreeStatistics(STATISTICS *Statistics)
Definition: cluster.cpp:2153

AlphaMatch
int AlphaMatch(void *arg1, void *arg2)
Definition: cluster.cpp:2306

BUCKETS
Definition: cluster.cpp:177

iterate
#define iterate(l)
Definition: oldlist.h:159

ComputePrototypes
void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
Definition: cluster.cpp:924

tesseract::KDPairInc
Definition: kdpair.h:51

ClusteringContext::tree
KDTREE * tree
Definition: cluster.cpp:198

PROTOTYPE::Magnitude
FLOATUNION Magnitude
Definition: cluster.h:82

push
LIST push(LIST list, void *element)
Definition: oldlist.cpp:317

uinT16
uint16_t uinT16
Definition: host.h:37

PROTOTYPE::TotalMagnitude
FLOAT32 TotalMagnitude
Definition: cluster.h:79

ComputeChiSquared
FLOAT64 ComputeChiSquared(uinT16 DegreesOfFreedom, FLOAT64 Alpha)
Definition: cluster.cpp:1869

BUCKETS::Bucket
uinT16 Bucket[BUCKETTABLESIZE]
Definition: cluster.cpp:183

MultipleCharSamples
BOOL8 MultipleCharSamples(CLUSTERER *Clusterer, CLUSTER *Cluster, FLOAT32 MaxIllegal)
Definition: cluster.cpp:2465

void_proc
void(* void_proc)(...)
Definition: cutil.h:66

ILLEGAL_CHAR
#define ILLEGAL_CHAR

FLOAT64
double FLOAT64
Definition: host.h:43

tesseract::GenericHeap
Definition: genericheap.h:58

matrix.h

BUCKETS::ChiSquared
FLOAT64 ChiSquared
Definition: cluster.cpp:181

PARAM_DESC::HalfRange
FLOAT32 HalfRange
Definition: ocrfeatures.h:52

NewChiStruct
CHISTRUCT * NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha)
Definition: cluster.cpp:2326

MakeSample
SAMPLE * MakeSample(CLUSTERER *Clusterer, const FLOAT32 *Feature, inT32 CharID)
Definition: cluster.cpp:455

FreeBuckets
void FreeBuckets(BUCKETS *Buckets)
Definition: cluster.cpp:2165

FindNearestNeighbor
CLUSTER * FindNearestNeighbor(KDTREE *Tree, CLUSTER *Cluster, FLOAT32 *Distance)
Definition: cluster.cpp:798