tesseract  4.00.00dev
strngs.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: strngs.h (Formerly strings.h)
3  * Description: STRING class definition.
4  * Author: Ray Smith
5  * Created: Fri Feb 15 09:15:01 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef STRNGS_H
21 #define STRNGS_H
22 
23 #include <assert.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include "platform.h"
27 #include "memry.h"
28 
29 namespace tesseract {
30 class TFile;
31 } // namespace tesseract.
32 
33 // STRING_IS_PROTECTED means that string[index] = X is invalid
34 // because you have to go through strings interface to modify it.
35 // This allows the string to ensure internal integrity and maintain
36 // its own string length. Unfortunately this is not possible because
37 // STRINGS are used as direct-manipulation data buffers for things
38 // like length arrays and many places cast away the const on string()
39 // to mutate the string. Turning this off means that internally we
40 // cannot assume we know the strlen.
41 #define STRING_IS_PROTECTED 0
42 
43 template <typename T> class GenericVector;
44 
46 {
47  public:
48  STRING();
49  STRING(const STRING &string);
50  STRING(const char *string);
51  STRING(const char *data, int length);
52  ~STRING ();
53 
54  // Writes to the given file. Returns false in case of error.
55  bool Serialize(FILE* fp) const;
56  // Reads from the given file. Returns false in case of error.
57  // If swap is true, assumes a big/little-endian swap is needed.
58  bool DeSerialize(bool swap, FILE* fp);
59  // Writes to the given file. Returns false in case of error.
60  bool Serialize(tesseract::TFile* fp) const;
61  // Reads from the given file. Returns false in case of error.
62  // If swap is true, assumes a big/little-endian swap is needed.
63  bool DeSerialize(tesseract::TFile* fp);
64  // As DeSerialize, but only seeks past the data - hence a static method.
65  static bool SkipDeSerialize(tesseract::TFile* fp);
66 
67  BOOL8 contains(const char c) const;
68  inT32 length() const;
69  inT32 size() const { return length(); }
70  // Workaround to avoid g++ -Wsign-compare warnings.
72  const inT32 len = length();
73  assert(0 <= len);
74  return static_cast<uinT32>(len);
75  }
76  const char *string() const;
77  const char *c_str() const;
78 
79  inline char* strdup() const {
80  inT32 len = length() + 1;
81  return strncpy(new char[len], GetCStr(), len);
82  }
83 
84 #if STRING_IS_PROTECTED
85  const char &operator[] (inT32 index) const;
86  // len is number of chars in s to insert starting at index in this string
87  void insert_range(inT32 index, const char*s, int len);
88  void erase_range(inT32 index, int len);
89 #else
90  char &operator[] (inT32 index) const;
91 #endif
92  void split(const char c, GenericVector<STRING> *splited);
93  void truncate_at(inT32 index);
94 
95  BOOL8 operator== (const STRING & string) const;
96  BOOL8 operator!= (const STRING & string) const;
97  BOOL8 operator!= (const char *string) const;
98 
99  STRING & operator= (const char *string);
100  STRING & operator= (const STRING & string);
101 
102  STRING operator+ (const STRING & string) const;
103  STRING operator+ (const char ch) const;
104 
105  STRING & operator+= (const char *string);
106  STRING & operator+= (const STRING & string);
107  STRING & operator+= (const char ch);
108 
109  // Assignment for strings which are not null-terminated.
110  void assign(const char *cstr, int len);
111 
112  // Appends the given string and int (as a %d) to this.
113  // += cannot be used for ints as there as a char += operator that would
114  // be ambiguous, and ints usually need a string before or between them
115  // anyway.
116  void add_str_int(const char* str, int number);
117  // Appends the given string and double (as a %.8g) to this.
118  void add_str_double(const char* str, double number);
119 
120  // ensure capacity but keep pointer encapsulated
121  inline void ensure(inT32 min_capacity) { ensure_cstr(min_capacity); }
122 
123  private:
124  typedef struct STRING_HEADER {
125  // How much space was allocated in the string buffer for char data.
126  int capacity_;
127 
128  // used_ is how much of the capacity is currently being used,
129  // including a '\0' terminator.
130  //
131  // If used_ is 0 then string is NULL (not even the '\0')
132  // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
133  // else strlen is >= 0 (not NULL) but needs to be computed.
134  // this condition is set when encapsulation is violated because
135  // an API returned a mutable string.
136  //
137  // capacity_ - used_ = excess capacity that the string can grow
138  // without reallocating
139  mutable int used_;
140  } STRING_HEADER;
141 
142  // To preserve the behavior of the old serialization, we only have space
143  // for one pointer in this structure. So we are embedding a data structure
144  // at the start of the storage that will hold additional state variables,
145  // then storing the actual string contents immediately after.
146  STRING_HEADER* data_;
147 
148  // returns the header part of the storage
149  inline STRING_HEADER* GetHeader() {
150  return data_;
151  }
152  inline const STRING_HEADER* GetHeader() const {
153  return data_;
154  }
155 
156  // returns the string data part of storage
157  inline char* GetCStr() { return ((char*)data_) + sizeof(STRING_HEADER); }
158 
159  inline const char* GetCStr() const {
160  return ((const char *)data_) + sizeof(STRING_HEADER);
161  }
162  inline bool InvariantOk() const {
163 #if STRING_IS_PROTECTED
164  return (GetHeader()->used_ == 0) ?
165  (string() == NULL) : (GetHeader()->used_ == (strlen(string()) + 1));
166 #else
167  return true;
168 #endif
169  }
170 
171  // Ensure string has requested capacity as optimization
172  // to avoid unnecessary reallocations.
173  // The return value is a cstr buffer with at least requested capacity
174  char* ensure_cstr(inT32 min_capacity);
175 
176  void FixHeader() const; // make used_ non-negative, even if const
177 
178  char* AllocData(int used, int capacity);
179  void DiscardData();
180 };
181 #endif
ICOORD operator+(const ICOORD &op1, const ICOORD &op2)
Definition: ipoints.h:68
int32_t inT32
Definition: host.h:38
#define TESS_API
Definition: platform.h:81
void ensure(inT32 min_capacity)
Definition: strngs.h:121
inT32 size() const
Definition: strngs.h:69
uint32_t uinT32
Definition: host.h:39
unsigned char BOOL8
Definition: host.h:44
Definition: strngs.h:45
char * strdup() const
Definition: strngs.h:79
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: ipoints.h:86
uinT32 unsigned_size() const
Definition: strngs.h:71