22 #ifndef TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ 23 #define TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ 25 #include <unordered_map> 40 memset(code_, 0,
sizeof(code_));
44 void Set(
int index,
int value) {
46 if (length_ <= index) length_ = index + 1;
50 void Set3(
int code0,
int code1,
int code2) {
59 int length()
const {
return length_; }
64 if (fp->
FWrite(&self_normalized_,
sizeof(self_normalized_), 1) != 1)
66 if (fp->
FWrite(&length_,
sizeof(length_), 1) != 1)
return false;
67 if (fp->
FWrite(code_,
sizeof(code_[0]), length_) != length_)
return false;
73 if (fp->
FRead(&self_normalized_,
sizeof(self_normalized_), 1) != 1)
75 if (fp->
FReadEndian(&length_,
sizeof(length_), 1) != 1)
return false;
76 if (fp->
FReadEndian(code_,
sizeof(code_[0]), length_) != length_)
81 if (length_ != other.length_)
return false;
82 for (
int i = 0; i < length_; ++i) {
83 if (code_[i] != other.code_[i])
return false;
91 for (
int i = 0; i < code.length_; ++i) {
92 result ^= code(i) << (7 * i);
101 inT8 self_normalized_;
147 static const int kFirstHangul = 0xac00;
149 static const int kNumHangul = 11172;
152 static const int kLCount = 19;
153 static const int kVCount = 21;
154 static const int kTCount = 28;
160 bool ComputeEncoding(
const UNICHARSET& unicharset,
int null_id,
161 STRING* radical_stroke_table);
164 void SetupPassThrough(
const UNICHARSET& unicharset);
176 int EncodeUnichar(
int unichar_id,
RecodedCharID* code)
const;
190 auto it = next_codes_.find(code);
191 return it == next_codes_.end() ? NULL : it->second;
196 auto it = final_codes_.find(code);
197 return it == final_codes_.end() ? NULL : it->second;
218 static bool DecomposeHangul(
int unicode,
int* leading,
int* vowel,
223 void DefragmentCodeValues(
int encoded_null);
225 void ComputeCodeRange();
235 std::unordered_map<RecodedCharID, int, RecodedCharID::RecodedCharIDHash>
241 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
246 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
255 #endif // TESSERACT_CCUTIL_UNICHARCOMPRESS_H_
bool IsValidFirstCode(int code) const
bool operator==(const RecodedCharID &other) const
bool self_normalized() const
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
static const int kMaxCodeLen
const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
void Set3(int code0, int code1, int code2)
int FReadEndian(void *buffer, int size, int count)
bool DeSerialize(TFile *fp)
void set_self_normalized(bool value)
int FWrite(const void *buffer, int size, int count)
void Set(int index, int value)
size_t operator()(const RecodedCharID &code) const
void Truncate(int length)
int operator()(int index) const
bool Serialize(TFile *fp) const
int FRead(void *buffer, int size, int count)