#include <tessdatamanager.h>
Definition at line 121 of file tessdatamanager.h.
◆ TessdataManager() [1/2]
tesseract::TessdataManager::TessdataManager |
( |
| ) |
|
|
inline |
Definition at line 123 of file tessdatamanager.h.
123 : reader_(
nullptr), is_loaded_(
false), swap_(
false) {}
◆ TessdataManager() [2/2]
tesseract::TessdataManager::TessdataManager |
( |
FileReader |
reader | ) |
|
|
inlineexplicit |
Definition at line 124 of file tessdatamanager.h.
125 : reader_(reader), is_loaded_(
false), swap_(
false) {}
◆ ~TessdataManager()
tesseract::TessdataManager::~TessdataManager |
( |
| ) |
|
|
inline |
◆ Clear()
void tesseract::TessdataManager::Clear |
( |
| ) |
|
◆ CombineDataFiles()
bool tesseract::TessdataManager::CombineDataFiles |
( |
const char * |
language_data_path_prefix, |
|
|
const char * |
output_filename |
|
) |
| |
Reads all the standard tesseract config and data files for a language at the given path and bundles them up into one binary data file. Returns true if the combined traineddata file was successfully written.
Definition at line 146 of file tessdatamanager.cpp.
154 filename += kTessdataFileSuffixes[i];
155 FILE *fp = fopen(filename.
string(),
"rb");
169 "Error: traineddata file must contain at least (a unicharset file" 170 "and inttemp) OR an lstm file.\n");
174 return SaveFile(output_filename,
nullptr);
bool IsLSTMAvailable() const
const char * string() const
bool IsBaseAvailable() const
bool SaveFile(const STRING &filename, FileWriter writer) const
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type)
◆ Directory()
void tesseract::TessdataManager::Directory |
( |
| ) |
const |
Definition at line 125 of file tessdatamanager.cpp.
128 if (!entries_[i].empty()) {
129 tprintf(
"%d:%s:size=%d, offset=%d\n", i, kTessdataFileSuffixes[i],
130 entries_[i].
size(), offset);
131 offset += entries_[i].
size();
◆ ExtractToFile()
bool tesseract::TessdataManager::ExtractToFile |
( |
const char * |
filename | ) |
|
Extracts tessdata component implied by the name of the input file from the combined traineddata loaded into TessdataManager. Writes the extracted component to the file indicated by the file name. E.g. if the filename given is somepath/somelang.unicharset, unicharset will be extracted from the data loaded into the TessdataManager and will be written to somepath/somelang.unicharset.
- Returns
- true if the component was successfully extracted, false if the component was not present in the traineddata loaded into TessdataManager.
Definition at line 196 of file tessdatamanager.cpp.
200 if (entries_[type].empty())
return false;
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type)
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
◆ GetComponent()
Definition at line 138 of file tessdatamanager.cpp.
139 if (!is_loaded_ && !
Init(data_file_name_.
string()))
return false;
140 if (entries_[type].empty())
return false;
141 fp->Open(&entries_[type][0], entries_[type].
size());
const char * string() const
bool Init(const char *data_file_name)
◆ GetDataFileName()
const STRING& tesseract::TessdataManager::GetDataFileName |
( |
| ) |
const |
|
inline |
◆ Init()
bool tesseract::TessdataManager::Init |
( |
const char * |
data_file_name | ) |
|
Opens and reads the given data file right now.
- Returns
- true on success.
Definition at line 36 of file tessdatamanager.cpp.
38 if (reader_ ==
nullptr) {
41 if (!(*reader_)(data_file_name, &data))
return false;
bool LoadMemBuffer(const char *name, const char *data, int size)
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
◆ is_loaded()
bool tesseract::TessdataManager::is_loaded |
( |
| ) |
const |
|
inline |
◆ IsBaseAvailable()
bool tesseract::TessdataManager::IsBaseAvailable |
( |
| ) |
const |
|
inline |
◆ IsLSTMAvailable()
bool tesseract::TessdataManager::IsLSTMAvailable |
( |
| ) |
const |
|
inline |
◆ LoadMemBuffer()
bool tesseract::TessdataManager::LoadMemBuffer |
( |
const char * |
name, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
Definition at line 47 of file tessdatamanager.cpp.
49 data_file_name_ = name;
53 if (fp.FRead(&num_entries,
sizeof(num_entries), 1) != 1)
return false;
54 swap_ = num_entries > kMaxNumTessdataEntries || num_entries < 0;
56 if (swap_)
ReverseN(&num_entries,
sizeof(num_entries));
57 if (num_entries > kMaxNumTessdataEntries || num_entries < 0)
return false;
60 if (fp.FReadEndian(&offset_table[0],
sizeof(offset_table[0]), num_entries) !=
64 if (offset_table[i] >= 0) {
65 inT64 entry_size =
size - offset_table[i];
67 while (j < num_entries && offset_table[j] == -1) ++j;
68 if (j < num_entries) entry_size = offset_table[j] - offset_table[i];
70 if (fp.FRead(&entries_[i][0], 1, entry_size) != entry_size)
return false;
void resize_no_init(int size)
void ReverseN(void *ptr, int num_bytes)
◆ OverwriteComponents()
bool tesseract::TessdataManager::OverwriteComponents |
( |
const char * |
new_traineddata_filename, |
|
|
char ** |
component_filenames, |
|
|
int |
num_new_components |
|
) |
| |
Gets the individual components from the data_file_ with which the class was initialized. Overwrites the components specified by component_filenames. Writes the updated traineddata file to new_traineddata_filename.
Definition at line 177 of file tessdatamanager.cpp.
182 for (
int i = 0; i < num_new_components; ++i) {
186 tprintf(
"Failed to read component file:%s\n", component_filenames[i]);
193 return SaveFile(new_traineddata_filename,
nullptr);
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type)
bool SaveFile(const STRING &filename, FileWriter writer) const
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
◆ SaveFile()
bool tesseract::TessdataManager::SaveFile |
( |
const STRING & |
filename, |
|
|
FileWriter |
writer |
|
) |
| const |
Definition at line 78 of file tessdatamanager.cpp.
83 if (writer ==
nullptr)
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
void Serialize(GenericVector< char > *data) const
◆ Serialize()
void tesseract::TessdataManager::Serialize |
( |
GenericVector< char > * |
data | ) |
const |
Definition at line 90 of file tessdatamanager.cpp.
96 if (entries_[i].empty()) {
100 offset += entries_[i].
size();
107 fp.FWrite(&num_entries,
sizeof(num_entries), 1);
108 fp.FWrite(offset_table,
sizeof(offset_table), 1);
110 if (!entries_[i].empty()) {
111 fp.FWrite(&entries_[i][0], entries_[i].
size(), 1);
void init_to_size(int size, T t)
◆ swap()
bool tesseract::TessdataManager::swap |
( |
| ) |
const |
|
inline |
◆ TessdataTypeFromFileName()
bool tesseract::TessdataManager::TessdataTypeFromFileName |
( |
const char * |
filename, |
|
|
TessdataType * |
type |
|
) |
| |
|
static |
Tries to determine tessdata component file suffix from filename, returns true on success.
Definition at line 217 of file tessdatamanager.cpp.
220 const char *suffix = strrchr(
filename,
'.');
221 if (suffix ==
nullptr || *(++suffix) ==
'\0')
return false;
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type)
◆ TessdataTypeFromFileSuffix()
bool tesseract::TessdataManager::TessdataTypeFromFileSuffix |
( |
const char * |
suffix, |
|
|
TessdataType * |
type |
|
) |
| |
|
static |
Fills type with TessdataType of the tessdata component represented by the given file name. E.g. tessdata/eng.unicharset -> TESSDATA_UNICHARSET.
- Returns
- true if the tessdata component type could be determined from the given file name.
Definition at line 204 of file tessdatamanager.cpp.
207 if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) {
212 tprintf(
"TessdataManager can't determine which tessdata" 213 " component is represented by %s\n", suffix);
The documentation for this class was generated from the following files: