tesseract  4.00.00dev
openclwrapper.h
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use this file except in compliance with the License.
3 // You may obtain a copy of the License at
4 // http://www.apache.org/licenses/LICENSE-2.0
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 
11 #ifndef TESSERACT_OPENCL_OPENCLWRAPPER_H_
12 #define TESSERACT_OPENCL_OPENCLWRAPPER_H_
13 
14 #include <stdio.h>
15 #include "allheaders.h"
16 #include "pix.h"
17 #include "tprintf.h"
18 
19 // including CL/cl.h doesn't occur until USE_OPENCL defined below
20 
21 // platform preprocessor commands
22 #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \
23  defined(__CYGWIN__) || defined(__MINGW32__)
24 #define ON_WINDOWS 1
25 #define ON_LINUX 0
26 #define ON_APPLE 0
27 #define ON_OTHER 0
28 #define IF_WINDOWS(X) X
29 #define IF_LINUX(X)
30 #define IF_APPLE(X)
31 #define IF_OTHER(X)
32 #define NOT_WINDOWS(X)
33 #elif defined( __linux__ )
34 #define ON_WINDOWS 0
35 #define ON_LINUX 1
36 #define ON_APPLE 0
37 #define ON_OTHER 0
38 #define IF_WINDOWS(X)
39 #define IF_LINUX(X) X
40 #define IF_APPLE(X)
41 #define IF_OTHER(X)
42 #define NOT_WINDOWS(X) X
43 #elif defined( __APPLE__ )
44 #define ON_WINDOWS 0
45 #define ON_LINUX 0
46 #define ON_APPLE 1
47 #define ON_OTHER 0
48 #define IF_WINDOWS(X)
49 #define IF_LINUX(X)
50 #define IF_APPLE(X) X
51 #define IF_OTHER(X)
52 #define NOT_WINDOWS(X) X
53 #else
54 #define ON_WINDOWS 0
55 #define ON_LINUX 0
56 #define ON_APPLE 0
57 #define ON_OTHER 1
58 #define IF_WINDOWS(X)
59 #define IF_LINUX(X)
60 #define IF_APPLE(X)
61 #define IF_OTHER(X) X
62 #define NOT_WINDOWS(X) X
63 #endif
64 
65 #if ON_LINUX
66 #include <time.h>
67 #endif
68 
69 /************************************************************************************
70  * enable/disable reporting of performance
71  * PERF_REPORT_LEVEL
72  * 0 - no reporting
73  * 1 - no reporting
74  * 2 - report total function call time for functions we're tracking
75  * 3 - optionally report breakdown of function calls (kernel launch, kernel time, data copies)
76  ************************************************************************************/
77 #define PERF_COUNT_VERBOSE 1
78 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
79 
80 
81 #if ON_WINDOWS
82 
83 #if PERF_COUNT_VERBOSE >= 2
84 #define PERF_COUNT_START(FUNCT_NAME) \
85  char *funct_name = FUNCT_NAME; \
86  double elapsed_time_sec; \
87  LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
88  QueryPerformanceFrequency(&freq); \
89  QueryPerformanceCounter(&time_funct_start); \
90  time_sub_start = time_funct_start; \
91  time_sub_end = time_funct_start;
92 
93 #define PERF_COUNT_END \
94  QueryPerformanceCounter(&time_funct_end); \
95  elapsed_time_sec = (time_funct_end.QuadPart - time_funct_start.QuadPart) / \
96  (double)(freq.QuadPart); \
97  printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
98 #else
99 #define PERF_COUNT_START(FUNCT_NAME)
100 #define PERF_COUNT_END
101 #endif
102 
103 #if PERF_COUNT_VERBOSE >= 3
104 #define PERF_COUNT_SUB(SUB) \
105  QueryPerformanceCounter(&time_sub_end); \
106  elapsed_time_sec = (time_sub_end.QuadPart - time_sub_start.QuadPart) / \
107  (double)(freq.QuadPart); \
108  printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
109  time_sub_start = time_sub_end;
110 #else
111 #define PERF_COUNT_SUB(SUB)
112 #endif
113 
114 
115 // not on windows
116 #else
117 
118 #if PERF_COUNT_VERBOSE >= 2
119 #define PERF_COUNT_START(FUNCT_NAME) \
120  char *funct_name = FUNCT_NAME; \
121  double elapsed_time_sec; \
122  timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
123  clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \
124  time_sub_start = time_funct_start; \
125  time_sub_end = time_funct_start;
126 
127 #define PERF_COUNT_END \
128  clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \
129  elapsed_time_sec = \
130  (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + \
131  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; \
132  printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
133 #else
134 #define PERF_COUNT_START(FUNCT_NAME)
135 #define PERF_COUNT_END
136 #endif
137 
138 #if PERF_COUNT_VERBOSE >= 3
139 #define PERF_COUNT_SUB(SUB) \
140  clock_gettime(CLOCK_MONOTONIC, &time_sub_end); \
141  elapsed_time_sec = \
142  (time_sub_end.tv_sec - time_sub_start.tv_sec) * 1.0 + \
143  (time_sub_end.tv_nsec - time_sub_start.tv_nsec) / 1000000000.0; \
144  printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
145  time_sub_start = time_sub_end;
146 #else
147 #define PERF_COUNT_SUB(SUB)
148 #endif
149 
150 #endif
151 /**************************************************************************
152  * enable/disable use of OpenCL
153  **************************************************************************/
154 
155 #ifdef USE_OPENCL
156 #include "opencl_device_selection.h"
157 
158 #ifndef strcasecmp
159 #define strcasecmp strcmp
160 #endif
161 
162 #define MAX_KERNEL_STRING_LEN 64
163 #define MAX_CLFILE_NUM 50
164 #define MAX_CLKERNEL_NUM 200
165 #define MAX_KERNEL_NAME_LEN 64
166 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
167 #define GROUPSIZE_X 16
168 #define GROUPSIZE_Y 16
169 #define GROUPSIZE_HMORX 256
170 #define GROUPSIZE_HMORY 1
171 
172 typedef struct _KernelEnv
173 {
174  cl_context mpkContext;
175  cl_command_queue mpkCmdQueue;
176  cl_program mpkProgram;
177  cl_kernel mpkKernel;
178  char mckKernelName[150];
179 } KernelEnv;
180 
181 typedef struct _OpenCLEnv
182 {
183  cl_platform_id mpOclPlatformID;
184  cl_context mpOclContext;
185  cl_device_id mpOclDevsID;
186  cl_command_queue mpOclCmdQueue;
187 } OpenCLEnv;
188 typedef int ( *cl_kernel_function )( void **userdata, KernelEnv *kenv );
189 
190 #define CHECK_OPENCL(status,name) \
191 if( status != CL_SUCCESS ) \
192 { \
193  printf ("OpenCL error code is %d at when %s .\n", status, name); \
194 }
195 
196 
197 typedef struct _GPUEnv
198 {
199  //share vb in all modules in hb library
200  cl_platform_id mpPlatformID;
201  cl_device_type mDevType;
202  cl_context mpContext;
203  cl_device_id *mpArryDevsID;
204  cl_device_id mpDevID;
205  cl_command_queue mpCmdQueue;
206  cl_kernel mpArryKernels[MAX_CLFILE_NUM];
207  cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file
208  char mArryKnelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256
209  mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
210  cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
211  int mnKernelCount, mnFileCount, // only one kernel file
212  mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
213  int mnKhrFp64Flag;
214  int mnAmdFp64Flag;
215 
216 } GPUEnv;
217 
218 
219 class OpenclDevice
220 {
221 
222 public:
223  static GPUEnv gpuEnv;
224  static int isInited;
225  OpenclDevice();
226  ~OpenclDevice();
227  static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
228  static int InitOpenclRunEnv( int argc ); // RegistOpenclKernel, double flags, compile kernels
229  static int InitOpenclRunEnv_DeviceSelection( int argc ); // RegistOpenclKernel, double flags, compile kernels
230  static int RegistOpenclKernel();
231  static int ReleaseOpenclRunEnv();
232  static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
233  static int CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption );
234  static int CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName );
235  static int GeneratBinFromKernelSource( cl_program program, const char * clFileName );
236  static int WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes );
237  static int BinaryGenerated( const char * clFileName, FILE ** fhandle );
238  //static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption );
239  static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
240  static int composeRGBPixelCl(int *tiffdata,int *line,int h,int w);
241 
242 /* OpenCL implementations of Morphological operations*/
243 
244  //Initialiation of OCL buffers used in Morph operations
245  static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs);
246  static void releaseMorphCLBuffers();
247 
248  static void pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline,
249  Pix **pix_hline, Pix **pixClosed,
250  bool getpixClosed, l_int32 close_hsize,
251  l_int32 close_vsize, l_int32 open_hsize,
252  l_int32 open_vsize, l_int32 line_hsize,
253  l_int32 line_vsize);
254 
255  //int InitOpenclAttr( OpenCLEnv * env );
256  //int ReleaseKernel( KernelEnv * env );
257  static int SetKernelEnv( KernelEnv *envInfo );
258  //int CreateKernel( char * kernelname, KernelEnv * env );
259  //int RunKernel( const char *kernelName, void **userdata );
260  //int ConvertToString( const char *filename, char **source );
261  //int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
262  //int RegisterKernelWrapper( const char *kernelName, cl_kernel_function function );
263  //int RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata );
264  //int GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function );
265 
266  static int LoadOpencl();
267 #ifdef WIN32
268  //static int OpenclInite();
269  static void FreeOpenclDll();
270 #endif
271 
272  inline static int AddKernelConfig( int kCount, const char *kName );
273 
274  /* for binarization */
275  static int HistogramRectOCL(unsigned char *imagedata, int bytes_per_pixel,
276  int bytes_per_line, int left, int top,
277  int width, int height, int kHistogramSize,
278  int *histogramAllChannels);
279 
280  static int ThresholdRectToPixOCL(unsigned char *imagedata,
281  int bytes_per_pixel, int bytes_per_line,
282  int *thresholds, int *hi_values, Pix **pix,
283  int rect_height, int rect_width,
284  int rect_top, int rect_left);
285 
286  static ds_device getDeviceSelection();
287  static ds_device selectedDevice;
288  static bool deviceIsSelected;
289  static bool selectedDeviceIsOpenCL();
290 };
291 
292 #endif // USE_OPENCL
293 #endif // TESSERACT_OPENCL_OPENCLWRAPPER_H_
const int kHistogramSize
Definition: otsuthr.h:27
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque