tesseract  4.00.00dev
genericvector.h
Go to the documentation of this file.
1 // File: genericvector.h
3 // Description: Generic vector class
4 // Author: Daria Antonova
5 // Created: Mon Jun 23 11:26:43 PDT 2008
6 //
7 // (C) Copyright 2007, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 //
20 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
21 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
22 
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 
27 #include "tesscallback.h"
28 #include "errcode.h"
29 #include "helpers.h"
30 #include "ndminx.h"
31 #include "serialis.h"
32 #include "strngs.h"
33 
34 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
35 // provides automatic deletion of pointers, [De]Serialize that works, and
36 // sort that works.
37 template <typename T>
38 class GenericVector {
39  public:
41  clear_cb_(NULL), compare_cb_(NULL) {}
42 
43  GenericVector(int size, T init_val) {
44  init(size);
45  init_to_size(size, init_val);
46  }
47 
48  // Copy
49  GenericVector(const GenericVector& other) {
50  this->init(other.size());
51  this->operator+=(other);
52  }
55 
57 
58  // Reserve some memory.
59  void reserve(int size);
60  // Double the size of the internal array.
61  void double_the_size();
62 
63  // Resizes to size and sets all values to t.
64  void init_to_size(int size, T t);
65  // Resizes to size without any initialization.
66  void resize_no_init(int size) {
67  reserve(size);
68  size_used_ = size;
69  }
70 
71  // Return the size used.
72  int size() const {
73  return size_used_;
74  }
75  // Workaround to avoid g++ -Wsign-compare warnings.
76  unsigned int unsigned_size() const {
77  static_assert(sizeof(size_used_) <= sizeof(unsigned int), "");
78  assert(0 <= size_used_);
79  return static_cast<unsigned int>(size_used_);
80  }
81  int size_reserved() const {
82  return size_reserved_;
83  }
84 
85  int length() const {
86  return size_used_;
87  }
88 
89  // Return true if empty.
90  bool empty() const {
91  return size_used_ == 0;
92  }
93 
94  // Return the object from an index.
95  T &get(int index) const;
96  T &back() const;
97  T &operator[](int index) const;
98  // Returns the last object and removes it.
99  T pop_back();
100 
101  // Return the index of the T object.
102  // This method NEEDS a compare_callback to be passed to
103  // set_compare_callback.
104  int get_index(T object) const;
105 
106  // Return true if T is in the array
107  bool contains(T object) const;
108 
109  // Return true if the index is valid
110  T contains_index(int index) const;
111 
112  // Push an element in the end of the array
113  int push_back(T object);
114  void operator+=(T t);
115 
116  // Push an element in the end of the array if the same
117  // element is not already contained in the array.
118  int push_back_new(T object);
119 
120  // Push an element in the front of the array
121  // Note: This function is O(n)
122  int push_front(T object);
123 
124  // Set the value at the given index
125  void set(T t, int index);
126 
127  // Insert t at the given index, push other elements to the right.
128  void insert(T t, int index);
129 
130  // Removes an element at the given index and
131  // shifts the remaining elements to the left.
132  void remove(int index);
133 
134  // Truncates the array to the given size by removing the end.
135  // If the current size is less, the array is not expanded.
136  void truncate(int size) {
137  if (size < size_used_)
138  size_used_ = size;
139  }
140 
141  // Add a callback to be called to delete the elements when the array took
142  // their ownership.
144 
145  // Add a callback to be called to compare the elements when needed (contains,
146  // get_id, ...)
148 
149  // Clear the array, calling the clear callback function if any.
150  // All the owned callbacks are also deleted.
151  // If you don't want the callbacks to be deleted, before calling clear, set
152  // the callback to NULL.
153  void clear();
154 
155  // Delete objects pointed to by data_[i]
156  void delete_data_pointers();
157 
158  // This method clears the current object, then, does a shallow copy of
159  // its argument, and finally invalidates its argument.
160  // Callbacks are moved to the current object;
161  void move(GenericVector<T>* from);
162 
163  // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
164  // The callback given must be permanent since they will be called more than
165  // once. The given callback will be deleted at the end.
166  // If the callbacks are NULL, then the data is simply read/written using
167  // fread (and swapping)/fwrite.
168  // Returns false on error or if the callback returns false.
169  // DEPRECATED. Use [De]Serialize[Classes] instead.
170  bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
171  bool read(tesseract::TFile* f,
173  // Writes a vector of simple types to the given file. Assumes that bitwise
174  // read/write of T will work. Returns false in case of error.
175  // TODO(rays) Change all callers to use TFile and remove deprecated methods.
176  bool Serialize(FILE* fp) const;
177  bool Serialize(tesseract::TFile* fp) const;
178  // Reads a vector of simple types from the given file. Assumes that bitwise
179  // read/write will work with ReverseN according to sizeof(T).
180  // Returns false in case of error.
181  // If swap is true, assumes a big/little-endian swap is needed.
182  // TFile is assumed to know about swapping.
183  bool DeSerialize(bool swap, FILE* fp);
184  bool DeSerialize(tesseract::TFile* fp);
185  // Skips the deserialization of the vector.
186  static bool SkipDeSerialize(tesseract::TFile* fp);
187  // Writes a vector of classes to the given file. Assumes the existence of
188  // bool T::Serialize(FILE* fp) const that returns false in case of error.
189  // Returns false in case of error.
190  bool SerializeClasses(FILE* fp) const;
191  bool SerializeClasses(tesseract::TFile* fp) const;
192  // Reads a vector of classes from the given file. Assumes the existence of
193  // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
194  // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
195  // this function. Returns false in case of error.
196  // If swap is true, assumes a big/little-endian swap is needed.
197  bool DeSerializeClasses(bool swap, FILE* fp);
199  // Calls SkipDeSerialize on the elements of the vector.
200  static bool SkipDeSerializeClasses(tesseract::TFile* fp);
201 
202  // Allocates a new array of double the current_size, copies over the
203  // information from data to the new location, deletes data and returns
204  // the pointed to the new larger array.
205  // This function uses memcpy to copy the data, instead of invoking
206  // operator=() for each element like double_the_size() does.
207  static T *double_the_size_memcpy(int current_size, T *data) {
208  T *data_new = new T[current_size * 2];
209  memcpy(data_new, data, sizeof(T) * current_size);
210  delete[] data;
211  return data_new;
212  }
213 
214  // Reverses the elements of the vector.
215  void reverse() {
216  for (int i = 0; i < size_used_ / 2; ++i)
217  Swap(&data_[i], &data_[size_used_ - 1 - i]);
218  }
219 
220  // Sorts the members of this vector using the less than comparator (cmp_lt),
221  // which compares the values. Useful for GenericVectors to primitive types.
222  // Will not work so great for pointers (unless you just want to sort some
223  // pointers). You need to provide a specialization to sort_cmp to use
224  // your type.
225  void sort();
226 
227  // Sort the array into the order defined by the qsort function comparator.
228  // The comparator function is as defined by qsort, ie. it receives pointers
229  // to two Ts and returns negative if the first element is to appear earlier
230  // in the result and positive if it is to appear later, with 0 for equal.
231  void sort(int (*comparator)(const void*, const void*)) {
232  qsort(data_, size_used_, sizeof(*data_), comparator);
233  }
234 
235  // Searches the array (assuming sorted in ascending order, using sort()) for
236  // an element equal to target and returns true if it is present.
237  // Use binary_search to get the index of target, or its nearest candidate.
238  bool bool_binary_search(const T& target) const {
239  int index = binary_search(target);
240  if (index >= size_used_)
241  return false;
242  return data_[index] == target;
243  }
244  // Searches the array (assuming sorted in ascending order, using sort()) for
245  // an element equal to target and returns the index of the best candidate.
246  // The return value is conceptually the largest index i such that
247  // data_[i] <= target or 0 if target < the whole vector.
248  // NOTE that this function uses operator> so really the return value is
249  // the largest index i such that data_[i] > target is false.
250  int binary_search(const T& target) const {
251  int bottom = 0;
252  int top = size_used_;
253  while (top - bottom > 1) {
254  int middle = (bottom + top) / 2;
255  if (data_[middle] > target)
256  top = middle;
257  else
258  bottom = middle;
259  }
260  return bottom;
261  }
262 
263  // Compact the vector by deleting elements using operator!= on basic types.
264  // The vector must be sorted.
265  void compact_sorted() {
266  if (size_used_ == 0)
267  return;
268 
269  // First element is in no matter what, hence the i = 1.
270  int last_write = 0;
271  for (int i = 1; i < size_used_; ++i) {
272  // Finds next unique item and writes it.
273  if (data_[last_write] != data_[i])
274  data_[++last_write] = data_[i];
275  }
276  // last_write is the index of a valid data cell, so add 1.
277  size_used_ = last_write + 1;
278  }
279 
280  // Compact the vector by deleting elements for which delete_cb returns
281  // true. delete_cb is a permanent callback and will be deleted.
283  int new_size = 0;
284  int old_index = 0;
285  // Until the callback returns true, the elements stay the same.
286  while (old_index < size_used_ && !delete_cb->Run(old_index++))
287  ++new_size;
288  // Now just copy anything else that gets false from delete_cb.
289  for (; old_index < size_used_; ++old_index) {
290  if (!delete_cb->Run(old_index)) {
291  data_[new_size++] = data_[old_index];
292  }
293  }
294  size_used_ = new_size;
295  delete delete_cb;
296  }
297 
298  T dot_product(const GenericVector<T>& other) const {
299  T result = static_cast<T>(0);
300  for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i)
301  result += data_[i] * other.data_[i];
302  return result;
303  }
304 
305  // Returns the index of what would be the target_index_th item in the array
306  // if the members were sorted, without actually sorting. Members are
307  // shuffled around, but it takes O(n) time.
308  // NOTE: uses operator< and operator== on the members.
309  int choose_nth_item(int target_index) {
310  // Make sure target_index is legal.
311  if (target_index < 0)
312  target_index = 0; // ensure legal
313  else if (target_index >= size_used_)
314  target_index = size_used_ - 1;
315  unsigned int seed = 1;
316  return choose_nth_item(target_index, 0, size_used_, &seed);
317  }
318 
319  // Swaps the elements with the given indices.
320  void swap(int index1, int index2) {
321  if (index1 != index2) {
322  T tmp = data_[index1];
323  data_[index1] = data_[index2];
324  data_[index2] = tmp;
325  }
326  }
327  // Returns true if all elements of *this are within the given range.
328  // Only uses operator<
329  bool WithinBounds(const T& rangemin, const T& rangemax) const {
330  for (int i = 0; i < size_used_; ++i) {
331  if (data_[i] < rangemin || rangemax < data_[i])
332  return false;
333  }
334  return true;
335  }
336 
337  protected:
338  // Internal recursive version of choose_nth_item.
339  int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
340 
341  // Init the object, allocating size memory.
342  void init(int size);
343 
344  // We are assuming that the object generally placed in thie
345  // vector are small enough that for efficiency it makes sense
346  // to start with a larger initial size.
347  static const int kDefaultVectorSize = 4;
350  T* data_;
352  // Mutable because Run method is not const
354 };
355 
356 namespace tesseract {
357 
358 // Function to read a GenericVector<char> from a whole file.
359 // Returns false on failure.
360 typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data);
361 // Function to write a GenericVector<char> to a whole file.
362 // Returns false on failure.
363 typedef bool (*FileWriter)(const GenericVector<char>& data,
364  const STRING& filename);
365 // The default FileReader loads the whole file into the vector of char,
366 // returning false on error.
367 inline bool LoadDataFromFile(const char *filename,
368  GenericVector<char>* data) {
369  bool result = false;
370  FILE* fp = fopen(filename, "rb");
371  if (fp != NULL) {
372  fseek(fp, 0, SEEK_END);
373  size_t size = ftell(fp);
374  fseek(fp, 0, SEEK_SET);
375  if (size > 0) {
376  data->resize_no_init(size);
377  result = fread(&(*data)[0], 1, size, fp) == size;
378  }
379  fclose(fp);
380  }
381  return result;
382 }
383 
384 inline bool LoadDataFromFile(const STRING& filename,
385  GenericVector<char>* data) {
386  return LoadDataFromFile(filename.string(), data);
387 }
388 
389 // The default FileWriter writes the vector of char to the filename file,
390 // returning false on error.
391 inline bool SaveDataToFile(const GenericVector<char>& data,
392  const STRING& filename) {
393  FILE* fp = fopen(filename.string(), "wb");
394  if (fp == NULL) return false;
395  bool result =
396  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
397  fclose(fp);
398  return result;
399 }
400 // Reads a file as a vector of STRING.
401 inline bool LoadFileLinesToStrings(const STRING& filename,
402  GenericVector<STRING>* lines) {
403  GenericVector<char> data;
404  if (!LoadDataFromFile(filename.string(), &data)) {
405  return false;
406  }
407  STRING lines_str(&data[0], data.size());
408  lines_str.split('\n', lines);
409  return true;
410 }
411 
412 template <typename T>
413 bool cmp_eq(T const & t1, T const & t2) {
414  return t1 == t2;
415 }
416 
417 // Used by sort()
418 // return < 0 if t1 < t2
419 // return 0 if t1 == t2
420 // return > 0 if t1 > t2
421 template <typename T>
422 int sort_cmp(const void* t1, const void* t2) {
423  const T* a = static_cast<const T *> (t1);
424  const T* b = static_cast<const T *> (t2);
425  if (*a < *b) {
426  return -1;
427  } else if (*b < *a) {
428  return 1;
429  } else {
430  return 0;
431  }
432 }
433 
434 // Used by PointerVector::sort()
435 // return < 0 if t1 < t2
436 // return 0 if t1 == t2
437 // return > 0 if t1 > t2
438 template <typename T>
439 int sort_ptr_cmp(const void* t1, const void* t2) {
440  const T* a = *static_cast<T * const *>(t1);
441  const T* b = *static_cast<T * const *>(t2);
442  if (*a < *b) {
443  return -1;
444  } else if (*b < *a) {
445  return 1;
446  } else {
447  return 0;
448  }
449 }
450 
451 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
452 // as it provides automatic deletion and correct serialization, with the
453 // corollary that all copy operations are deep copies of the pointed-to objects.
454 template<typename T>
455 class PointerVector : public GenericVector<T*> {
456  public:
458  explicit PointerVector(int size) : GenericVector<T*>(size) { }
460  // Clear must be called here, even though it is called again by the base,
461  // as the base will call the wrong clear.
462  clear();
463  }
464  // Copy must be deep, as the pointers will be automatically deleted on
465  // destruction.
466  PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
467  this->init(other.size());
468  this->operator+=(other);
469  }
471  this->reserve(this->size_used_ + other.size_used_);
472  for (int i = 0; i < other.size(); ++i) {
473  this->push_back(new T(*other.data_[i]));
474  }
475  return *this;
476  }
477 
479  if (&other != this) {
480  this->truncate(0);
481  this->operator+=(other);
482  }
483  return *this;
484  }
485 
486  // Removes an element at the given index and
487  // shifts the remaining elements to the left.
488  void remove(int index) {
489  delete GenericVector<T*>::data_[index];
491  }
492 
493  // Truncates the array to the given size by removing the end.
494  // If the current size is less, the array is not expanded.
495  void truncate(int size) {
496  for (int i = size; i < GenericVector<T*>::size_used_; ++i)
497  delete GenericVector<T*>::data_[i];
499  }
500 
501  // Compact the vector by deleting elements for which delete_cb returns
502  // true. delete_cb is a permanent callback and will be deleted.
504  int new_size = 0;
505  int old_index = 0;
506  // Until the callback returns true, the elements stay the same.
507  while (old_index < GenericVector<T*>::size_used_ &&
508  !delete_cb->Run(GenericVector<T*>::data_[old_index++]))
509  ++new_size;
510  // Now just copy anything else that gets false from delete_cb.
511  for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
512  if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
513  GenericVector<T*>::data_[new_size++] =
514  GenericVector<T*>::data_[old_index];
515  } else {
516  delete GenericVector<T*>::data_[old_index];
517  }
518  }
520  delete delete_cb;
521  }
522 
523  // Clear the array, calling the clear callback function if any.
524  // All the owned callbacks are also deleted.
525  // If you don't want the callbacks to be deleted, before calling clear, set
526  // the callback to NULL.
527  void clear() {
530  }
531 
532  // Writes a vector of (pointers to) classes to the given file. Assumes the
533  // existence of bool T::Serialize(FILE*) const that returns false in case of
534  // error. There is no Serialize for simple types, as you would have a
535  // normal GenericVector of those.
536  // Returns false in case of error.
537  bool Serialize(FILE* fp) const {
539  if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
540  for (int i = 0; i < used; ++i) {
541  inT8 non_null = GenericVector<T*>::data_[i] != NULL;
542  if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false;
543  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
544  }
545  return true;
546  }
547  bool Serialize(TFile* fp) const {
549  if (fp->FWrite(&used, sizeof(used), 1) != 1) return false;
550  for (int i = 0; i < used; ++i) {
551  inT8 non_null = GenericVector<T*>::data_[i] != NULL;
552  if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false;
553  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
554  }
555  return true;
556  }
557  // Reads a vector of (pointers to) classes to the given file. Assumes the
558  // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
559  // case of error. There is no Serialize for simple types, as you would have a
560  // normal GenericVector of those.
561  // If swap is true, assumes a big/little-endian swap is needed.
562  // Also needs T::T(), as new T is used in this function.
563  // Returns false in case of error.
564  bool DeSerialize(bool swap, FILE* fp) {
565  inT32 reserved;
566  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
567  if (swap) Reverse32(&reserved);
568  GenericVector<T*>::reserve(reserved);
569  truncate(0);
570  for (int i = 0; i < reserved; ++i) {
571  inT8 non_null;
572  if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
573  T* item = NULL;
574  if (non_null) {
575  item = new T;
576  if (!item->DeSerialize(swap, fp)) {
577  delete item;
578  return false;
579  }
580  this->push_back(item);
581  } else {
582  // Null elements should keep their place in the vector.
583  this->push_back(NULL);
584  }
585  }
586  return true;
587  }
588  bool DeSerialize(TFile* fp) {
589  inT32 reserved;
590  if (!DeSerializeSize(fp, &reserved)) return false;
591  GenericVector<T*>::reserve(reserved);
592  truncate(0);
593  for (int i = 0; i < reserved; ++i) {
594  if (!DeSerializeElement(fp)) return false;
595  }
596  return true;
597  }
598  // Enables deserialization of a selection of elements. Note that in order to
599  // retain the integrity of the stream, the caller must call some combination
600  // of DeSerializeElement and DeSerializeSkip of the exact number returned in
601  // *size, assuming a true return.
602  static bool DeSerializeSize(TFile* fp, inT32* size) {
603  return fp->FReadEndian(size, sizeof(*size), 1) == 1;
604  }
605  // Reads and appends to the vector the next element of the serialization.
607  inT8 non_null;
608  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
609  T* item = NULL;
610  if (non_null) {
611  item = new T;
612  if (!item->DeSerialize(fp)) {
613  delete item;
614  return false;
615  }
616  this->push_back(item);
617  } else {
618  // Null elements should keep their place in the vector.
619  this->push_back(NULL);
620  }
621  return true;
622  }
623  // Skips the next element of the serialization.
624  static bool DeSerializeSkip(TFile* fp) {
625  inT8 non_null;
626  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
627  if (non_null) {
628  if (!T::SkipDeSerialize(fp)) return false;
629  }
630  return true;
631  }
632 
633  // Sorts the items pointed to by the members of this vector using
634  // t::operator<().
635  void sort() { this->GenericVector<T*>::sort(&sort_ptr_cmp<T>); }
636 };
637 
638 } // namespace tesseract
639 
640 // A useful vector that uses operator== to do comparisons.
641 template <typename T>
642 class GenericVectorEqEq : public GenericVector<T> {
643  public:
646  NewPermanentTessCallback(tesseract::cmp_eq<T>));
647  }
648  GenericVectorEqEq(int size) : GenericVector<T>(size) {
650  NewPermanentTessCallback(tesseract::cmp_eq<T>));
651  }
652 };
653 
654 template <typename T>
655 void GenericVector<T>::init(int size) {
656  size_used_ = 0;
657  size_reserved_ = 0;
658  data_ = 0;
659  clear_cb_ = 0;
660  compare_cb_ = 0;
661  reserve(size);
662 }
663 
664 template <typename T>
666  clear();
667 }
668 
669 // Reserve some memory. If the internal array contains elements, they are
670 // copied.
671 template <typename T>
673  if (size_reserved_ >= size || size <= 0)
674  return;
675  if (size < kDefaultVectorSize) size = kDefaultVectorSize;
676  T* new_array = new T[size];
677  for (int i = 0; i < size_used_; ++i)
678  new_array[i] = data_[i];
679  delete[] data_;
680  data_ = new_array;
682 }
683 
684 template <typename T>
686  if (size_reserved_ == 0) {
688  }
689  else {
690  reserve(2 * size_reserved_);
691  }
692 }
693 
694 // Resizes to size and sets all values to t.
695 template <typename T>
696 void GenericVector<T>::init_to_size(int size, T t) {
697  reserve(size);
698  size_used_ = size;
699  for (int i = 0; i < size; ++i)
700  data_[i] = t;
701 }
702 
703 
704 // Return the object from an index.
705 template <typename T>
706 T &GenericVector<T>::get(int index) const {
707  ASSERT_HOST(index >= 0 && index < size_used_);
708  return data_[index];
709 }
710 
711 template <typename T>
712 T &GenericVector<T>::operator[](int index) const {
713  assert(index >= 0 && index < size_used_);
714  return data_[index];
715 }
716 
717 template <typename T>
719  ASSERT_HOST(size_used_ > 0);
720  return data_[size_used_ - 1];
721 }
722 // Returns the last object and removes it.
723 template <typename T>
725  ASSERT_HOST(size_used_ > 0);
726  return data_[--size_used_];
727 }
728 
729 // Return the object from an index.
730 template <typename T>
731 void GenericVector<T>::set(T t, int index) {
732  ASSERT_HOST(index >= 0 && index < size_used_);
733  data_[index] = t;
734 }
735 
736 // Shifts the rest of the elements to the right to make
737 // space for the new elements and inserts the given element
738 // at the specified index.
739 template <typename T>
740 void GenericVector<T>::insert(T t, int index) {
741  ASSERT_HOST(index >= 0 && index <= size_used_);
742  if (size_reserved_ == size_used_)
743  double_the_size();
744  for (int i = size_used_; i > index; --i) {
745  data_[i] = data_[i-1];
746  }
747  data_[index] = t;
748  size_used_++;
749 }
750 
751 // Removes an element at the given index and
752 // shifts the remaining elements to the left.
753 template <typename T>
754 void GenericVector<T>::remove(int index) {
755  ASSERT_HOST(index >= 0 && index < size_used_);
756  for (int i = index; i < size_used_ - 1; ++i) {
757  data_[i] = data_[i+1];
758  }
759  size_used_--;
760 }
761 
762 // Return true if the index is valindex
763 template <typename T>
765  return index >= 0 && index < size_used_;
766 }
767 
768 // Return the index of the T object.
769 template <typename T>
770 int GenericVector<T>::get_index(T object) const {
771  for (int i = 0; i < size_used_; ++i) {
772  ASSERT_HOST(compare_cb_ != NULL);
773  if (compare_cb_->Run(object, data_[i]))
774  return i;
775  }
776  return -1;
777 }
778 
779 // Return true if T is in the array
780 template <typename T>
781 bool GenericVector<T>::contains(T object) const {
782  return get_index(object) != -1;
783 }
784 
785 // Add an element in the array
786 template <typename T>
788  int index = 0;
789  if (size_used_ == size_reserved_)
790  double_the_size();
791  index = size_used_++;
792  data_[index] = object;
793  return index;
794 }
795 
796 template <typename T>
798  int index = get_index(object);
799  if (index >= 0)
800  return index;
801  return push_back(object);
802 }
803 
804 // Add an element in the array (front)
805 template <typename T>
807  if (size_used_ == size_reserved_)
808  double_the_size();
809  for (int i = size_used_; i > 0; --i)
810  data_[i] = data_[i-1];
811  data_[0] = object;
812  ++size_used_;
813  return 0;
814 }
815 
816 template <typename T>
818  push_back(t);
819 }
820 
821 template <typename T>
823  this->reserve(size_used_ + other.size_used_);
824  for (int i = 0; i < other.size(); ++i) {
825  this->operator+=(other.data_[i]);
826  }
827  return *this;
828 }
829 
830 template <typename T>
832  if (&other != this) {
833  this->truncate(0);
834  this->operator+=(other);
835  }
836  return *this;
837 }
838 
839 // Add a callback to be called to delete the elements when the array took
840 // their ownership.
841 template <typename T>
843  clear_cb_ = cb;
844 }
845 
846 // Add a callback to be called to delete the elements when the array took
847 // their ownership.
848 template <typename T>
851  compare_cb_ = cb;
852 }
853 
854 // Clear the array, calling the callback function if any.
855 template <typename T>
857  if (size_reserved_ > 0) {
858  if (clear_cb_ != NULL)
859  for (int i = 0; i < size_used_; ++i)
860  clear_cb_->Run(data_[i]);
861  delete[] data_;
862  data_ = NULL;
863  size_used_ = 0;
864  size_reserved_ = 0;
865  }
866  if (clear_cb_ != NULL) {
867  delete clear_cb_;
868  clear_cb_ = NULL;
869  }
870  if (compare_cb_ != NULL) {
871  delete compare_cb_;
872  compare_cb_ = NULL;
873  }
874 }
875 
876 template <typename T>
878  for (int i = 0; i < size_used_; ++i)
879  if (data_[i]) {
880  delete data_[i];
881  }
882 }
883 
884 
885 template <typename T>
887  FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const {
888  if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false;
889  if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
890  if (cb != NULL) {
891  for (int i = 0; i < size_used_; ++i) {
892  if (!cb->Run(f, data_[i])) {
893  delete cb;
894  return false;
895  }
896  }
897  delete cb;
898  } else {
899  if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) return false;
900  }
901  return true;
902 }
903 
904 template <typename T>
907  inT32 reserved;
908  if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
909  reserve(reserved);
910  if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) return false;
911  if (cb != NULL) {
912  for (int i = 0; i < size_used_; ++i) {
913  if (!cb->Run(f, data_ + i)) {
914  delete cb;
915  return false;
916  }
917  }
918  delete cb;
919  } else {
920  if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_)
921  return false;
922  }
923  return true;
924 }
925 
926 // Writes a vector of simple types to the given file. Assumes that bitwise
927 // read/write of T will work. Returns false in case of error.
928 template <typename T>
929 bool GenericVector<T>::Serialize(FILE* fp) const {
930  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
931  if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) return false;
932  return true;
933 }
934 template <typename T>
936  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
937  if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false;
938  return true;
939 }
940 
941 // Reads a vector of simple types from the given file. Assumes that bitwise
942 // read/write will work with ReverseN according to sizeof(T).
943 // Returns false in case of error.
944 // If swap is true, assumes a big/little-endian swap is needed.
945 template <typename T>
946 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
947  inT32 reserved;
948  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
949  if (swap) Reverse32(&reserved);
950  reserve(reserved);
951  size_used_ = reserved;
952  if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) return false;
953  if (swap) {
954  for (int i = 0; i < size_used_; ++i)
955  ReverseN(&data_[i], sizeof(data_[i]));
956  }
957  return true;
958 }
959 template <typename T>
961  inT32 reserved;
962  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
963  reserve(reserved);
964  size_used_ = reserved;
965  return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
966 }
967 template <typename T>
969  inT32 reserved;
970  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
971  return fp->FRead(NULL, sizeof(T), reserved) == reserved;
972 }
973 
974 // Writes a vector of classes to the given file. Assumes the existence of
975 // bool T::Serialize(FILE* fp) const that returns false in case of error.
976 // Returns false in case of error.
977 template <typename T>
979  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
980  for (int i = 0; i < size_used_; ++i) {
981  if (!data_[i].Serialize(fp)) return false;
982  }
983  return true;
984 }
985 template <typename T>
987  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
988  for (int i = 0; i < size_used_; ++i) {
989  if (!data_[i].Serialize(fp)) return false;
990  }
991  return true;
992 }
993 
994 // Reads a vector of classes from the given file. Assumes the existence of
995 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
996 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
997 // this function. Returns false in case of error.
998 // If swap is true, assumes a big/little-endian swap is needed.
999 template <typename T>
1000 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
1001  inT32 reserved;
1002  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
1003  if (swap) Reverse32(&reserved);
1004  T empty;
1005  init_to_size(reserved, empty);
1006  for (int i = 0; i < reserved; ++i) {
1007  if (!data_[i].DeSerialize(swap, fp)) return false;
1008  }
1009  return true;
1010 }
1011 template <typename T>
1013  inT32 reserved;
1014  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
1015  T empty;
1016  init_to_size(reserved, empty);
1017  for (int i = 0; i < reserved; ++i) {
1018  if (!data_[i].DeSerialize(fp)) return false;
1019  }
1020  return true;
1021 }
1022 template <typename T>
1024  inT32 reserved;
1025  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
1026  for (int i = 0; i < reserved; ++i) {
1027  if (!T::SkipDeSerialize(fp)) return false;
1028  }
1029  return true;
1030 }
1031 
1032 // This method clear the current object, then, does a shallow copy of
1033 // its argument, and finally invalidates its argument.
1034 template <typename T>
1036  this->clear();
1037  this->data_ = from->data_;
1038  this->size_reserved_ = from->size_reserved_;
1039  this->size_used_ = from->size_used_;
1040  this->compare_cb_ = from->compare_cb_;
1041  this->clear_cb_ = from->clear_cb_;
1042  from->data_ = NULL;
1043  from->clear_cb_ = NULL;
1044  from->compare_cb_ = NULL;
1045  from->size_used_ = 0;
1046  from->size_reserved_ = 0;
1047 }
1048 
1049 template <typename T>
1051  sort(&tesseract::sort_cmp<T>);
1052 }
1053 
1054 // Internal recursive version of choose_nth_item.
1055 // The algorithm used comes from "Algorithms" by Sedgewick:
1056 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
1057 // The principle is to choose a random pivot, and move everything less than
1058 // the pivot to its left, and everything greater than the pivot to the end
1059 // of the array, then recurse on the part that contains the desired index, or
1060 // just return the answer if it is in the equal section in the middle.
1061 // The random pivot guarantees average linear time for the same reason that
1062 // n times vector::push_back takes linear time on average.
1063 // target_index, start and and end are all indices into the full array.
1064 // Seed is a seed for rand_r for thread safety purposes. Its value is
1065 // unimportant as the random numbers do not affect the result except
1066 // between equal answers.
1067 template <typename T>
1068 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
1069  unsigned int* seed) {
1070  // Number of elements to process.
1071  int num_elements = end - start;
1072  // Trivial cases.
1073  if (num_elements <= 1)
1074  return start;
1075  if (num_elements == 2) {
1076  if (data_[start] < data_[start + 1]) {
1077  return target_index > start ? start + 1 : start;
1078  } else {
1079  return target_index > start ? start : start + 1;
1080  }
1081  }
1082  // Place the pivot at start.
1083  #ifndef rand_r // _MSC_VER, ANDROID
1084  srand(*seed);
1085  #define rand_r(seed) rand()
1086  #endif // _MSC_VER
1087  int pivot = rand_r(seed) % num_elements + start;
1088  swap(pivot, start);
1089  // The invariant condition here is that items [start, next_lesser) are less
1090  // than the pivot (which is at index next_lesser) and items
1091  // [prev_greater, end) are greater than the pivot, with items
1092  // [next_lesser, prev_greater) being equal to the pivot.
1093  int next_lesser = start;
1094  int prev_greater = end;
1095  for (int next_sample = start + 1; next_sample < prev_greater;) {
1096  if (data_[next_sample] < data_[next_lesser]) {
1097  swap(next_lesser++, next_sample++);
1098  } else if (data_[next_sample] == data_[next_lesser]) {
1099  ++next_sample;
1100  } else {
1101  swap(--prev_greater, next_sample);
1102  }
1103  }
1104  // Now the invariant is set up, we recurse on just the section that contains
1105  // the desired index.
1106  if (target_index < next_lesser)
1107  return choose_nth_item(target_index, start, next_lesser, seed);
1108  else if (target_index < prev_greater)
1109  return next_lesser; // In equal bracket.
1110  else
1111  return choose_nth_item(target_index, prev_greater, end, seed);
1112 }
1113 
1114 
1115 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_
int sort_cmp(const void *t1, const void *t2)
bool DeSerialize(bool swap, FILE *fp)
void set(T t, int index)
void Swap(T *p1, T *p2)
Definition: helpers.h:97
int get_index(T object) const
unsigned int unsigned_size() const
Definition: genericvector.h:76
bool DeSerializeElement(TFile *fp)
int push_front(T object)
bool DeSerialize(TFile *fp)
int32_t inT32
Definition: host.h:38
T contains_index(int index) const
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
void init_to_size(int size, T t)
GenericVector(int size, T init_val)
Definition: genericvector.h:43
void reserve(int size)
GenericVector< T > & operator+=(const GenericVector &other)
GenericVector(const GenericVector &other)
Definition: genericvector.h:49
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
voidpf void uLong size
Definition: ioapi.h:39
void sort(int(*comparator)(const void *, const void *))
void swap(int index1, int index2)
virtual R Run(A1)=0
void remove(int index)
virtual R Run(A1, A2)=0
T & operator[](int index) const
bool cmp_eq(T const &t1, T const &t2)
int push_back(T object)
bool bool_binary_search(const T &target) const
virtual void Run(A1)=0
const char * string() const
Definition: strngs.cpp:198
void resize_no_init(int size)
Definition: genericvector.h:66
void move(GenericVector< T > *from)
int FReadEndian(void *buffer, int size, int count)
Definition: serialis.cpp:97
static bool SkipDeSerializeClasses(tesseract::TFile *fp)
bool empty() const
Definition: genericvector.h:90
void truncate(int size)
#define SEEK_SET
Definition: ioapi.c:29
GenericVectorEqEq(int size)
int size() const
Definition: genericvector.h:72
static bool DeSerializeSize(TFile *fp, inT32 *size)
static T * double_the_size_memcpy(int current_size, T *data)
#define ASSERT_HOST(x)
Definition: errcode.h:84
static bool DeSerializeSkip(TFile *fp)
PointerVector< T > & operator=(const PointerVector &other)
T dot_product(const GenericVector< T > &other) const
void insert(T t, int index)
void compact(TessResultCallback1< bool, const T *> *delete_cb)
bool WithinBounds(const T &rangemin, const T &rangemax) const
Definition: strngs.h:45
int sort_ptr_cmp(const void *t1, const void *t2)
bool contains(T object) const
void double_the_size()
bool LoadFileLinesToStrings(const STRING &filename, GenericVector< STRING > *lines)
PointerVector< T > & operator+=(const PointerVector &other)
bool write(FILE *f, TessResultCallback2< bool, FILE *, T const &> *cb) const
T & get(int index) const
int binary_search(const T &target) const
int FWrite(const void *buffer, int size, int count)
Definition: serialis.cpp:148
int length() const
Definition: genericvector.h:85
static bool SkipDeSerialize(tesseract::TFile *fp)
PointerVector(const PointerVector &other)
bool LoadDataFromFile(const STRING &filename, GenericVector< char > *data)
void compact_sorted()
int8_t inT8
Definition: host.h:34
T & back() const
#define MIN(x, y)
Definition: ndminx.h:28
bool(* FileWriter)(const GenericVector< char > &data, const STRING &filename)
void compact(TessResultCallback1< bool, int > *delete_cb)
bool DeSerialize(bool swap, FILE *fp)
#define rand_r(seed)
const char * filename
Definition: ioapi.h:38
bool Serialize(FILE *fp) const
GenericVector< T > & operator=(const GenericVector &other)
int size_reserved() const
Definition: genericvector.h:81
bool read(tesseract::TFile *f, TessResultCallback2< bool, tesseract::TFile *, T *> *cb)
inT32 size_reserved_
bool Serialize(FILE *fp) const
#define SEEK_END
Definition: ioapi.c:25
void Reverse32(void *ptr)
Definition: helpers.h:200
TessCallback1< T > * clear_cb_
void delete_data_pointers()
int choose_nth_item(int target_index)
TessResultCallback2< bool, T const &, T const & > * compare_cb_
int push_back_new(T object)
bool DeSerializeClasses(bool swap, FILE *fp)
bool SerializeClasses(FILE *fp) const
static const int kDefaultVectorSize
void split(const char c, GenericVector< STRING > *splited)
Definition: strngs.cpp:286
bool Serialize(TFile *fp) const
void set_clear_callback(TessCallback1< T > *cb)
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:184
void set_compare_callback(TessResultCallback2< bool, T const &, T const &> *cb)
void init(int size)
int FRead(void *buffer, int size, int count)
Definition: serialis.cpp:108