110 if (!DecodeRadicalStrokeTable(radical_stroke_table, &radical_map))
127 int hangul_offset = unicharset.
size();
133 int han_offset = hangul_offset + kTotalJamos;
134 int max_num_strokes = -1;
135 for (
int u = 0;
u <= unicharset.
size(); ++
u) {
136 bool self_normalized =
false;
139 if (
u == unicharset.
size()) {
141 self_normalized =
true;
152 if (
u < unicharset.
size() &&
154 unicodes.
size() == 1) {
156 int unicode = unicodes[0];
157 int leading, vowel, trailing;
158 auto it = radical_map.find(unicode);
159 if (it != radical_map.end()) {
164 int radical = radicals.
unichar_to_id(it->second.radical.string());
165 int num_strokes = it->second.num_strokes;
166 int num_samples = radical_counts[it->second]++;
167 if (num_strokes > max_num_strokes) max_num_strokes = num_strokes;
168 code.Set3(radical + han_offset, num_strokes + han_offset,
169 num_samples + han_offset);
173 code.Set3(leading + hangul_offset, vowel +
kLCount + hangul_offset,
178 if (code.length() == 0) {
188 for (
int i = 0; i < unicodes.
size(); ++i) {
189 int position = code.length();
191 tprintf(
"Unichar %d=%s->%s is too long to encode!!\n",
u,
196 int uni = unicodes[i];
198 char* utf8 = unichar.utf8_str();
203 if (direct_set.
size() > unicharset.
size()) {
205 tprintf(
"Code space expanded from original unicharset!!\n");
211 code.set_self_normalized(self_normalized);
212 encoder_.push_back(code);
219 int num_radicals = radicals.
size();
220 for (
int u = 0;
u < unicharset.
size(); ++
u) {
221 RecodedCharID* code = &encoder_[
u];
222 if ((*code)(0) >= han_offset) {
223 code->Set(1, (*code)(1) + num_radicals);
224 code->Set(2, (*code)(2) + num_radicals + max_num_strokes + 1);
227 DefragmentCodeValues(null_id >= 0 ? 1 : -1);
bool contains_unichar(const char *const unichar_repr) const
std::unordered_map< int, RadicalStroke > RSMap
static const int kMaxCodeLen
const char * id_to_unichar(UNICHAR_ID id) const
bool has_special_codes() const
static bool DecomposeHangul(int unicode, int *leading, int *vowel, int *trailing)
static bool UTF8ToUnicode(const char *utf8_str, GenericVector< int > *unicodes)
std::unordered_map< RadicalStroke, int, RadicalStrokedHash > RSCounts
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
void unichar_insert(const char *const unichar_repr)