tesseract  4.00.00dev
adaptions.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: adaptions.cpp (Formerly adaptions.c)
3  * Description: Functions used to adapt to blobs already confidently
4  * identified
5  * Author: Chris Newton
6  * Created: Thu Oct 7 10:17:28 BST 1993
7  *
8  * (C) Copyright 1992, Hewlett-Packard Ltd.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  *
19  **********************************************************************/
20 
21 #ifdef _MSC_VER
22 #pragma warning(disable:4244) // Conversion warnings
23 #pragma warning(disable:4305) // int/float warnings
24 #endif
25 
26 #ifdef __UNIX__
27 #include <assert.h>
28 #endif
29 #include <ctype.h>
30 #include <string.h>
31 #include "tessbox.h"
32 #include "tessvars.h"
33 #include "memry.h"
34 #include "reject.h"
35 #include "control.h"
36 #include "stopper.h"
37 #include "tesseractclass.h"
38 
39 // Include automatically generated configuration file if running autoconf.
40 #ifdef HAVE_CONFIG_H
41 #include "config_auto.h"
42 #endif
43 
44 namespace tesseract {
45 BOOL8 Tesseract::word_adaptable( //should we adapt?
46  WERD_RES *word,
47  uinT16 mode) {
49  tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
50  word->best_choice == NULL ? "" :
52  word->best_choice->rating(), word->best_choice->certainty());
53  }
54 
55  BOOL8 status = FALSE;
56  BITS16 flags(mode);
57 
58  enum MODES
59  {
60  ADAPTABLE_WERD,
61  ACCEPTABLE_WERD,
62  CHECK_DAWGS,
63  CHECK_SPACES,
64  CHECK_ONE_ELL_CONFLICT,
65  CHECK_AMBIG_WERD
66  };
67 
68  /*
69  0: NO adaption
70  */
71  if (mode == 0) {
72  if (tessedit_adaption_debug) tprintf("adaption disabled\n");
73  return FALSE;
74  }
75 
76  if (flags.bit (ADAPTABLE_WERD)) {
77  status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
78  if (tessedit_adaption_debug && !status) {
79  tprintf("tess_would_adapt bit is false\n");
80  }
81  }
82 
83  if (flags.bit (ACCEPTABLE_WERD)) {
84  status |= word->tess_accepted;
85  if (tessedit_adaption_debug && !status) {
86  tprintf("tess_accepted bit is false\n");
87  }
88  }
89 
90  if (!status) { // If not set then
91  return FALSE; // ignore other checks
92  }
93 
94  if (flags.bit (CHECK_DAWGS) &&
95  (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
96  (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
97  (word->best_choice->permuter () != USER_DAWG_PERM) &&
98  (word->best_choice->permuter () != NUMBER_PERM)) {
99  if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
100  return FALSE;
101  }
102 
103  if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE)) {
104  if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
105  return FALSE;
106  }
107 
108  if (flags.bit (CHECK_SPACES) &&
109  (strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) {
110  if (tessedit_adaption_debug) tprintf("word contains spaces\n");
111  return FALSE;
112  }
113 
114  if (flags.bit (CHECK_AMBIG_WERD) &&
116  if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
117  return FALSE;
118  }
119 
121  tprintf("returning status %d\n", status);
122  }
123  return status;
124 }
125 
126 } // namespace tesseract
BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map)
Definition: reject.cpp:292
Definition: bits16.h:25
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
const char * string() const
Definition: strngs.cpp:198
uinT8 permuter() const
Definition: ratngs.h:344
unsigned char BOOL8
Definition: host.h:44
#define FALSE
Definition: capi.h:46
const char int mode
Definition: ioapi.h:38
bool dangerous_ambig_found() const
Definition: ratngs.h:361
BOOL8 tess_would_adapt
Definition: pageres.h:281
BOOL8 tess_accepted
Definition: pageres.h:280
float certainty() const
Definition: ratngs.h:328
const STRING & unichar_string() const
Definition: ratngs.h:539
BOOL8 bit(uinT8 bit_num) const
Definition: bits16.h:56
uint16_t uinT16
Definition: host.h:37
BOOL8 word_adaptable(WERD_RES *word, uinT16 mode)
Definition: adaptions.cpp:45
float rating() const
Definition: ratngs.h:325