tesseract  4.00.00dev
devanagari_processing.h
Go to the documentation of this file.
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: shobhitsaxena@google.com (Shobhit Saxena)
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
14 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
15 
16 #include "allheaders.h"
17 #include "debugpixa.h"
18 #include "ocrblock.h"
19 #include "params.h"
20 
21 struct Pix;
22 struct Box;
23 struct Boxa;
24 
25 extern
27  "Debug level for split shiro-rekha process.");
28 
29 extern
31  "Whether to create a debug image for split shiro-rekha process.");
32 
33 class TBOX;
34 
35 namespace tesseract {
36 
38  public:
40  hist_ = NULL;
41  length_ = 0;
42  }
43 
45  Clear();
46  }
47 
48  void Clear() {
49  if (hist_) {
50  delete[] hist_;
51  }
52  length_ = 0;
53  }
54 
55  int* hist() const { return hist_; }
56 
57  int length() const {
58  return length_;
59  }
60 
61  // Methods to construct histograms from images. These clear any existing data.
62  void ConstructVerticalCountHist(Pix* pix);
63  void ConstructHorizontalCountHist(Pix* pix);
64 
65  // This method returns the global-maxima for the histogram. The frequency of
66  // the global maxima is returned in count, if specified.
67  int GetHistogramMaximum(int* count) const;
68 
69  private:
70  int* hist_;
71  int length_;
72 };
73 
75  public:
77  NO_SPLIT = 0, // No splitting is performed for the phase.
78  MINIMAL_SPLIT, // Blobs are split minimally.
79  MAXIMAL_SPLIT // Blobs are split maximally.
80  };
81 
83  virtual ~ShiroRekhaSplitter();
84 
85  // Top-level method to perform splitting based on current settings.
86  // Returns true if a split was actually performed.
87  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
88  // splitting. If false, the ocr_split_strategy_ is used.
89  bool Split(bool split_for_pageseg, DebugPixa* pixa_debug);
90 
91  // Clears the memory held by this object.
92  void Clear();
93 
94  // Refreshes the words in the segmentation block list by using blobs in the
95  // input blob list.
96  // The segmentation block list must be set.
97  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
98 
99  // Returns true if the split strategies for pageseg and ocr are different.
101  return pageseg_split_strategy_ != ocr_split_strategy_;
102  }
103 
104  // This only keeps a copy of the block list pointer. At split call, the list
105  // object should still be alive. This block list is used as a golden
106  // segmentation when performing splitting.
107  void set_segmentation_block_list(BLOCK_LIST* block_list) {
108  segmentation_block_list_ = block_list;
109  }
110 
111  static const int kUnspecifiedXheight = -1;
112 
113  void set_global_xheight(int xheight) {
114  global_xheight_ = xheight;
115  }
116 
117  void set_perform_close(bool perform) {
118  perform_close_ = perform;
119  }
120 
121  // Returns the image obtained from shiro-rekha splitting. The returned object
122  // is owned by this class. Callers may want to clone the returned pix to keep
123  // it alive beyond the life of ShiroRekhaSplitter object.
124  Pix* splitted_image() {
125  return splitted_image_;
126  }
127 
128  // On setting the input image, a clone of it is owned by this class.
129  void set_orig_pix(Pix* pix);
130 
131  // Returns the input image provided to the object. This object is owned by
132  // this class. Callers may want to clone the returned pix to work with it.
133  Pix* orig_pix() {
134  return orig_pix_;
135  }
136 
138  return ocr_split_strategy_;
139  }
140 
142  ocr_split_strategy_ = strategy;
143  }
144 
146  return pageseg_split_strategy_;
147  }
148 
150  pageseg_split_strategy_ = strategy;
151  }
152 
153  BLOCK_LIST* segmentation_block_list() {
154  return segmentation_block_list_;
155  }
156 
157  // This method returns the computed mode-height of blobs in the pix.
158  // It also prunes very small blobs from calculation. Could be used to provide
159  // a global xheight estimate for images which have the same point-size text.
160  static int GetModeHeight(Pix* pix);
161 
162  private:
163  // Method to perform a close operation on the input image. The xheight
164  // estimate decides the size of sel used.
165  static void PerformClose(Pix* pix, int xheight_estimate);
166 
167  // This method resolves the cc bbox to a particular row and returns the row's
168  // xheight. This uses block_list_ if available, else just returns the
169  // global_xheight_ estimate currently set in the object.
170  int GetXheightForCC(Box* cc_bbox);
171 
172  // Returns a list of regions (boxes) which should be cleared in the original
173  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
174  // (or less) word only. Xheight measure could be the global estimate, the row
175  // estimate, or unspecified. If unspecified, over splitting may occur, since a
176  // conservative estimate of stroke width along with an associated multiplier
177  // is used in its place. It is advisable to have a specified xheight when
178  // splitting for classification/training.
179  void SplitWordShiroRekha(SplitStrategy split_strategy,
180  Pix* pix,
181  int xheight,
182  int word_left,
183  int word_top,
184  Boxa* regions_to_clear);
185 
186  // Returns a new box object for the corresponding TBOX, based on the original
187  // image's coordinate system.
188  Box* GetBoxForTBOX(const TBOX& tbox) const;
189 
190  // This method returns y-extents of the shiro-rekha computed from the input
191  // word image.
192  static void GetShiroRekhaYExtents(Pix* word_pix,
193  int* shirorekha_top,
194  int* shirorekha_bottom,
195  int* shirorekha_ylevel);
196 
197  Pix* orig_pix_; // Just a clone of the input image passed.
198  Pix* splitted_image_; // Image produced after the last splitting round. The
199  // object is owned by this class.
200  SplitStrategy pageseg_split_strategy_;
201  SplitStrategy ocr_split_strategy_;
202  Pix* debug_image_;
203  // This block list is used as a golden segmentation when performing splitting.
204  BLOCK_LIST* segmentation_block_list_;
205  int global_xheight_;
206  bool perform_close_; // Whether a morphological close operation should be
207  // performed before CCs are run through splitting.
208 };
209 
210 } // namespace tesseract.
211 
212 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
void set_segmentation_block_list(BLOCK_LIST *block_list)
void set_ocr_split_strategy(SplitStrategy strategy)
bool devanagari_split_debugimage
void set_pageseg_split_strategy(SplitStrategy strategy)
int devanagari_split_debuglevel
int GetHistogramMaximum(int *count) const
Definition: rect.h:30
SplitStrategy ocr_split_strategy() const
SplitStrategy pageseg_split_strategy() const
#define INT_VAR_H(name, val, comment)
Definition: params.h:264
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:267
int count(LIST var_list)
Definition: oldlist.cpp:103