tesseract  4.00.00dev
tesseract::LMConsistencyInfo Struct Reference

#include <lm_consistency.h>

Public Types

enum  ChartypeEnum { CT_NONE, CT_ALPHA, CT_DIGIT, CT_OTHER }
 

Public Member Functions

 LMConsistencyInfo (const LMConsistencyInfo *parent_info)
 
int NumInconsistentPunc () const
 
int NumInconsistentCase () const
 
int NumInconsistentChartype () const
 
bool Consistent () const
 
int NumInconsistentSpaces () const
 
int InconsistentXHeight () const
 
void ComputeXheightConsistency (const BLOB_CHOICE *b, bool is_punc)
 
float BodyMinXHeight () const
 
float BodyMaxXHeight () const
 

Public Attributes

int num_alphas
 
int num_digits
 
int num_punc
 
int num_other
 
ChartypeEnum chartype
 
EDGE_REF punc_ref
 
bool invalid_punc
 
int num_non_first_upper
 
int num_lower
 
int script_id
 
bool inconsistent_script
 
int num_inconsistent_spaces
 
bool inconsistent_font
 
float xht_lo [kNumPos]
 
float xht_hi [kNumPos]
 
inT16 xht_count [kNumPos]
 
inT16 xht_count_punc [kNumPos]
 
inT16 xht_sp
 
inT16 xpos_entropy
 
XHeightConsistencyEnum xht_decision
 

Static Public Attributes

static const int kShiftThresh = 1
 
static const int kMaxEntropy = 1
 
static const int kSUB = 0
 
static const int kNORM = 1
 
static const int kSUP = 2
 
static const int kNumPos = 3
 

Detailed Description

Definition at line 38 of file lm_consistency.h.

Member Enumeration Documentation

◆ ChartypeEnum

Constructor & Destructor Documentation

◆ LMConsistencyInfo()

tesseract::LMConsistencyInfo::LMConsistencyInfo ( const LMConsistencyInfo parent_info)
inlineexplicit

Definition at line 53 of file lm_consistency.h.

53  {
54  if (parent_info == NULL) {
55  // Initialize from scratch.
56  num_alphas = 0;
57  num_digits = 0;
58  num_punc = 0;
59  num_other = 0;
60  chartype = CT_NONE;
61  punc_ref = NO_EDGE;
62  invalid_punc = false;
64  num_lower = 0;
65  script_id = 0;
66  inconsistent_script = false;
68  inconsistent_font = false;
69  // Initialize XHeight stats.
70  for (int i = 0; i < kNumPos; i++) {
71  xht_count[i] = 0;
72  xht_count_punc[i] = 0;
73  xht_lo[i] = 0;
74  xht_hi[i] = 256; // kBlnCellHeight
75  }
76  xht_sp = -1; // This invalid value indicates that there was no parent.
77  xpos_entropy = 0;
79  } else {
80  // Copy parent info
81  *this = *parent_info;
82  }
83  }
XHeightConsistencyEnum xht_decision

Member Function Documentation

◆ BodyMaxXHeight()

float tesseract::LMConsistencyInfo::BodyMaxXHeight ( ) const
inline

Definition at line 111 of file lm_consistency.h.

111  {
112  if (InconsistentXHeight())
113  return static_cast<float>(MAX_INT16);
114  return xht_hi[kNORM];
115  }
#define MAX_INT16
Definition: host.h:61

◆ BodyMinXHeight()

float tesseract::LMConsistencyInfo::BodyMinXHeight ( ) const
inline

Definition at line 106 of file lm_consistency.h.

106  {
107  if (InconsistentXHeight())
108  return 0.0f;
109  return xht_lo[kNORM];
110  }

◆ ComputeXheightConsistency()

void tesseract::LMConsistencyInfo::ComputeXheightConsistency ( const BLOB_CHOICE b,
bool  is_punc 
)

Definition at line 29 of file lm_consistency.cpp.

30  {
32  return; // It isn't going to get any better.
33 
34  // Compute xheight consistency.
35  bool parent_null = xht_sp < 0;
36  int parent_sp = xht_sp;
37  // Debug strings.
40  } else if (b->yshift() < -LMConsistencyInfo::kShiftThresh) {
42  } else {
44  }
45  xht_count[xht_sp]++;
46  if (is_punc) xht_count_punc[xht_sp]++;
47  if (!parent_null) {
48  xpos_entropy += abs(parent_sp - xht_sp);
49  }
50  // TODO(eger): Figure out a better way to account for small caps.
51  // For the first character not y-shifted, we only care if it is too small.
52  // Too large is common in drop caps and small caps.
53  // inT16 small_xht = b->min_xheight();
54  // if (parent_vse == NULL && sp == LanguageModelConsistencyInfo::kNORM) {
55  // small_xht = 0;
56  // }
58  &(xht_lo[xht_sp]), &(xht_hi[xht_sp]));
59 
60 
61  // Compute xheight inconsistency kinds.
62  if (parent_null) {
63  if (xht_count[kNORM] == 1) {
65  } else {
67  }
68  return;
69  }
70 
71  // When we intersect the ranges of xheights in pixels for all characters in
72  // each position (subscript, normal, superscript),
73  // How much range must be left? 0? [exactly one pixel height for xheight] 1?
74  // TODO(eger): Extend this code to take a prior for the rest of the line.
75  const int kMinIntersectedXHeightRange = 0;
76  for (int i = 0; i < kNumPos; i++) {
77  if (xht_lo[i] > xht_hi[i] - kMinIntersectedXHeightRange) {
79  return;
80  }
81  }
82 
83  // Reject as improbable anything where there's much punctuation in subscript
84  // or superscript regions.
85  if (xht_count_punc[kSUB] > xht_count[kSUB] * 0.4 ||
86  xht_count_punc[kSUP] > xht_count[kSUP] * 0.4) {
88  return;
89  }
90 
91  // Now check that the subscript and superscript aren't too small relative to
92  // the mainline.
93  double mainline_xht = static_cast<double>(xht_lo[kNORM]);
94  double kMinSizeRatio = 0.4;
95  if (mainline_xht > 0.0 &&
96  (static_cast<double>(xht_hi[kSUB]) / mainline_xht < kMinSizeRatio ||
97  static_cast<double>(xht_hi[kSUP]) / mainline_xht < kMinSizeRatio)) {
99  return;
100  }
101  // TODO(eger): Check into inconsistency of super/subscript y offsets.
102  if (xpos_entropy > kMaxEntropy) {
104  return;
105  }
106  if (xht_count[kSUB] == 0 && xht_count[kSUP] == 0) {
108  return;
109  }
111 }
float yshift() const
Definition: ratngs.h:129
static const int kShiftThresh
static const int kMaxEntropy
XHeightConsistencyEnum xht_decision
void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper2)
Definition: helpers.h:153
float min_xheight() const
Definition: ratngs.h:123
float max_xheight() const
Definition: ratngs.h:126

◆ Consistent()

bool tesseract::LMConsistencyInfo::Consistent ( ) const
inline

Definition at line 94 of file lm_consistency.h.

◆ InconsistentXHeight()

int tesseract::LMConsistencyInfo::InconsistentXHeight ( ) const
inline

Definition at line 102 of file lm_consistency.h.

102  {
103  return xht_decision == XH_INCONSISTENT;
104  }
XHeightConsistencyEnum xht_decision

◆ NumInconsistentCase()

int tesseract::LMConsistencyInfo::NumInconsistentCase ( ) const
inline

◆ NumInconsistentChartype()

int tesseract::LMConsistencyInfo::NumInconsistentChartype ( ) const
inline

◆ NumInconsistentPunc()

int tesseract::LMConsistencyInfo::NumInconsistentPunc ( ) const
inline

Definition at line 84 of file lm_consistency.h.

◆ NumInconsistentSpaces()

int tesseract::LMConsistencyInfo::NumInconsistentSpaces ( ) const
inline

Definition at line 99 of file lm_consistency.h.

99  {
101  }

Member Data Documentation

◆ chartype

ChartypeEnum tesseract::LMConsistencyInfo::chartype

Definition at line 121 of file lm_consistency.h.

◆ inconsistent_font

bool tesseract::LMConsistencyInfo::inconsistent_font

Definition at line 129 of file lm_consistency.h.

◆ inconsistent_script

bool tesseract::LMConsistencyInfo::inconsistent_script

Definition at line 127 of file lm_consistency.h.

◆ invalid_punc

bool tesseract::LMConsistencyInfo::invalid_punc

Definition at line 123 of file lm_consistency.h.

◆ kMaxEntropy

const int tesseract::LMConsistencyInfo::kMaxEntropy = 1
static

Definition at line 47 of file lm_consistency.h.

◆ kNORM

const int tesseract::LMConsistencyInfo::kNORM = 1
static

Definition at line 50 of file lm_consistency.h.

◆ kNumPos

const int tesseract::LMConsistencyInfo::kNumPos = 3
static

Definition at line 51 of file lm_consistency.h.

◆ kShiftThresh

const int tesseract::LMConsistencyInfo::kShiftThresh = 1
static

Definition at line 43 of file lm_consistency.h.

◆ kSUB

const int tesseract::LMConsistencyInfo::kSUB = 0
static

Definition at line 50 of file lm_consistency.h.

◆ kSUP

const int tesseract::LMConsistencyInfo::kSUP = 2
static

Definition at line 50 of file lm_consistency.h.

◆ num_alphas

int tesseract::LMConsistencyInfo::num_alphas

Definition at line 117 of file lm_consistency.h.

◆ num_digits

int tesseract::LMConsistencyInfo::num_digits

Definition at line 118 of file lm_consistency.h.

◆ num_inconsistent_spaces

int tesseract::LMConsistencyInfo::num_inconsistent_spaces

Definition at line 128 of file lm_consistency.h.

◆ num_lower

int tesseract::LMConsistencyInfo::num_lower

Definition at line 125 of file lm_consistency.h.

◆ num_non_first_upper

int tesseract::LMConsistencyInfo::num_non_first_upper

Definition at line 124 of file lm_consistency.h.

◆ num_other

int tesseract::LMConsistencyInfo::num_other

Definition at line 120 of file lm_consistency.h.

◆ num_punc

int tesseract::LMConsistencyInfo::num_punc

Definition at line 119 of file lm_consistency.h.

◆ punc_ref

EDGE_REF tesseract::LMConsistencyInfo::punc_ref

Definition at line 122 of file lm_consistency.h.

◆ script_id

int tesseract::LMConsistencyInfo::script_id

Definition at line 126 of file lm_consistency.h.

◆ xht_count

inT16 tesseract::LMConsistencyInfo::xht_count[kNumPos]

Definition at line 133 of file lm_consistency.h.

◆ xht_count_punc

inT16 tesseract::LMConsistencyInfo::xht_count_punc[kNumPos]

Definition at line 134 of file lm_consistency.h.

◆ xht_decision

XHeightConsistencyEnum tesseract::LMConsistencyInfo::xht_decision

Definition at line 137 of file lm_consistency.h.

◆ xht_hi

float tesseract::LMConsistencyInfo::xht_hi[kNumPos]

Definition at line 132 of file lm_consistency.h.

◆ xht_lo

float tesseract::LMConsistencyInfo::xht_lo[kNumPos]

Definition at line 131 of file lm_consistency.h.

◆ xht_sp

inT16 tesseract::LMConsistencyInfo::xht_sp

Definition at line 135 of file lm_consistency.h.

◆ xpos_entropy

inT16 tesseract::LMConsistencyInfo::xpos_entropy

Definition at line 136 of file lm_consistency.h.


The documentation for this struct was generated from the following files: