tesseract  4.00.00dev
UNICHAR::const_iterator Class Reference

#include <unichar.h>

Public Member Functions

const_iteratoroperator++ ()
 
int operator* () const
 
int get_utf8 (char *buf) const
 
int utf8_len () const
 
bool is_legal () const
 
const char * utf8_data () const
 

Friends

class UNICHAR
 
bool operator== (const CI &lhs, const CI &rhs)
 
bool operator!= (const CI &lhs, const CI &rhs)
 

Detailed Description

Definition at line 102 of file unichar.h.

Member Function Documentation

◆ get_utf8()

int UNICHAR::const_iterator::get_utf8 ( char *  buf) const

Definition at line 174 of file unichar.cpp.

174  {
175  ASSERT_HOST(it_ != NULL);
176  const int len = utf8_step(it_);
177  if (len == 0) {
178  tprintf("WARNING: Illegal UTF8 encountered\n");
179  utf8_output[0] = ' ';
180  return 1;
181  }
182  strncpy(utf8_output, it_, len);
183  return len;
184 }
#define tprintf(...)
Definition: tprintf.h:31
#define ASSERT_HOST(x)
Definition: errcode.h:84
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:134

◆ is_legal()

bool UNICHAR::const_iterator::is_legal ( ) const

Definition at line 196 of file unichar.cpp.

196  {
197  return utf8_step(it_) > 0;
198 }
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:134

◆ operator*()

int UNICHAR::const_iterator::operator* ( ) const

Definition at line 163 of file unichar.cpp.

163  {
164  ASSERT_HOST(it_ != NULL);
165  const int len = utf8_step(it_);
166  if (len == 0) {
167  tprintf("WARNING: Illegal UTF8 encountered\n");
168  return ' ';
169  }
170  UNICHAR uch(it_, len);
171  return uch.first_uni();
172 }
#define tprintf(...)
Definition: tprintf.h:31
#define ASSERT_HOST(x)
Definition: errcode.h:84
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:134

◆ operator++()

UNICHAR::const_iterator & UNICHAR::const_iterator::operator++ ( )

Definition at line 149 of file unichar.cpp.

149  {
150  ASSERT_HOST(it_ != NULL);
151  int step = utf8_step(it_);
152  if (step == 0) {
153  tprintf("ERROR: Illegal UTF8 encountered.\n");
154  for (int i = 0; i < 5 && it_[i] != '\0'; ++i) {
155  tprintf("Index %d char = 0x%x\n", i, it_[i]);
156  }
157  step = 1;
158  }
159  it_ += step;
160  return *this;
161 }
#define tprintf(...)
Definition: tprintf.h:31
#define ASSERT_HOST(x)
Definition: errcode.h:84
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:134

◆ utf8_data()

const char* UNICHAR::const_iterator::utf8_data ( ) const
inline

Definition at line 130 of file unichar.h.

130 { return it_; }

◆ utf8_len()

int UNICHAR::const_iterator::utf8_len ( ) const

Definition at line 186 of file unichar.cpp.

186  {
187  ASSERT_HOST(it_ != NULL);
188  const int len = utf8_step(it_);
189  if (len == 0) {
190  tprintf("WARNING: Illegal UTF8 encountered\n");
191  return 1;
192  }
193  return len;
194 }
#define tprintf(...)
Definition: tprintf.h:31
#define ASSERT_HOST(x)
Definition: errcode.h:84
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:134

Friends And Related Function Documentation

◆ operator!=

bool operator!= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 136 of file unichar.h.

136  {
137  return !(lhs == rhs);
138  }

◆ operator==

bool operator== ( const CI lhs,
const CI rhs 
)
friend

Definition at line 133 of file unichar.h.

133  {
134  return lhs.it_ == rhs.it_;
135  }

◆ UNICHAR

friend class UNICHAR
friend

Definition at line 141 of file unichar.h.


The documentation for this class was generated from the following files: