13 #include <sys/types.h> 26 #include <mach/mach_time.h> 30 #define CALLOC LEPT_CALLOC 31 #define FREE LEPT_FREE 36 GPUEnv OpenclDevice::gpuEnv;
38 bool OpenclDevice::deviceIsSelected =
false;
39 ds_device OpenclDevice::selectedDevice;
41 int OpenclDevice::isInited = 0;
43 static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC;
45 static const l_uint32 lmask32[] = {
46 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000, 0xfc000000,
47 0xfe000000, 0xff000000, 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
48 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, 0xffff8000, 0xffffc000,
49 0xffffe000, 0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
50 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, 0xfffffff8, 0xfffffffc,
51 0xfffffffe, 0xffffffff};
53 static const l_uint32 rmask32[] = {
54 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
55 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
56 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff,
57 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
58 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
59 0x7fffffff, 0xffffffff};
61 struct tiff_transform {
69 static struct tiff_transform tiff_orientation_transforms[] = {
80 static const l_int32 MAX_PAGES_IN_TIFF_FILE = 3000;
82 static cl_mem pixsCLBuffer, pixdCLBuffer, pixdCLIntermediate;
83 static cl_mem pixThBuffer;
84 static cl_int clStatus;
85 static KernelEnv rEnv;
87 #define DS_TAG_VERSION "<version>" 88 #define DS_TAG_VERSION_END "</version>" 89 #define DS_TAG_DEVICE "<device>" 90 #define DS_TAG_DEVICE_END "</device>" 91 #define DS_TAG_SCORE "<score>" 92 #define DS_TAG_SCORE_END "</score>" 93 #define DS_TAG_DEVICE_TYPE "<type>" 94 #define DS_TAG_DEVICE_TYPE_END "</type>" 95 #define DS_TAG_DEVICE_NAME "<name>" 96 #define DS_TAG_DEVICE_NAME_END "</name>" 97 #define DS_TAG_DEVICE_DRIVER_VERSION "<driver>" 98 #define DS_TAG_DEVICE_DRIVER_VERSION_END "</driver>" 100 #define DS_DEVICE_NATIVE_CPU_STRING "native_cpu" 102 #define DS_DEVICE_NAME_LENGTH 256 104 typedef enum { DS_EVALUATE_ALL, DS_EVALUATE_NEW_ONLY } ds_evaluation_type;
107 unsigned int numDevices;
114 DS_INVALID_PROFILE = 1000,
116 DS_INVALID_PERF_EVALUATOR_TYPE,
117 DS_INVALID_PERF_EVALUATOR,
118 DS_PERF_EVALUATOR_ERROR,
120 DS_UNKNOWN_DEVICE_TYPE,
121 DS_PROFILE_FILE_ERROR,
122 DS_SCORE_SERIALIZER_ERROR,
123 DS_SCORE_DESERIALIZER_ERROR
130 typedef ds_status (*ds_perf_evaluator)(ds_device *device,
void *data);
133 typedef ds_status (*ds_score_release)(
void *score);
134 static ds_status releaseDSProfile(ds_profile *profile, ds_score_release sr) {
135 ds_status status = DS_SUCCESS;
136 if (profile !=
nullptr) {
137 if (profile->devices !=
nullptr && sr !=
nullptr) {
139 for (i = 0; i < profile->numDevices; i++) {
140 free(profile->devices[i].oclDeviceName);
141 free(profile->devices[i].oclDriverVersion);
142 status = sr(profile->devices[i].score);
143 if (status != DS_SUCCESS)
break;
145 free(profile->devices);
152 static ds_status initDSProfile(ds_profile **p,
const char *version) {
154 cl_uint numPlatforms;
155 cl_platform_id *platforms =
nullptr;
156 cl_device_id *devices =
nullptr;
157 ds_status status = DS_SUCCESS;
161 if (p ==
nullptr)
return DS_INVALID_PROFILE;
163 ds_profile *profile = (ds_profile *)malloc(
sizeof(ds_profile));
164 if (profile ==
nullptr)
return DS_MEMORY_ERROR;
166 memset(profile, 0,
sizeof(ds_profile));
168 clGetPlatformIDs(0,
nullptr, &numPlatforms);
170 if (numPlatforms > 0) {
171 platforms = (cl_platform_id *)malloc(numPlatforms *
sizeof(cl_platform_id));
172 if (platforms ==
nullptr) {
173 status = DS_MEMORY_ERROR;
176 clGetPlatformIDs(numPlatforms, platforms,
nullptr);
180 for (i = 0; i < (
unsigned int)numPlatforms; i++) {
182 clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0,
nullptr, &num);
186 if (numDevices > 0) {
187 devices = (cl_device_id *)malloc(numDevices *
sizeof(cl_device_id));
188 if (devices ==
nullptr) {
189 status = DS_MEMORY_ERROR;
194 profile->numDevices =
197 (ds_device *)malloc(profile->numDevices *
sizeof(ds_device));
198 if (profile->devices ==
nullptr) {
199 profile->numDevices = 0;
200 status = DS_MEMORY_ERROR;
203 memset(profile->devices, 0, profile->numDevices *
sizeof(ds_device));
206 for (i = 0; i < (
unsigned int)numPlatforms; i++) {
209 clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, devices, &num);
210 for (j = 0; j < num; j++, next++) {
211 char buffer[DS_DEVICE_NAME_LENGTH];
214 profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE;
215 profile->devices[next].oclDeviceID = devices[j];
217 clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME,
218 DS_DEVICE_NAME_LENGTH, &buffer,
nullptr);
219 length = strlen(buffer);
220 profile->devices[next].oclDeviceName = (
char *)malloc(length + 1);
221 memcpy(profile->devices[next].oclDeviceName, buffer, length + 1);
223 clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION,
224 DS_DEVICE_NAME_LENGTH, &buffer,
nullptr);
225 length = strlen(buffer);
226 profile->devices[next].oclDriverVersion = (
char *)malloc(length + 1);
227 memcpy(profile->devices[next].oclDriverVersion, buffer, length + 1);
230 profile->devices[next].type = DS_DEVICE_NATIVE_CPU;
231 profile->version = version;
236 if (status == DS_SUCCESS) {
240 free(profile->devices);
247 static ds_status profileDevices(ds_profile *profile,
248 const ds_evaluation_type type,
249 ds_perf_evaluator evaluator,
250 void *evaluatorData,
unsigned int *numUpdates) {
251 ds_status status = DS_SUCCESS;
253 unsigned int updates = 0;
255 if (profile ==
nullptr) {
256 return DS_INVALID_PROFILE;
258 if (evaluator ==
nullptr) {
259 return DS_INVALID_PERF_EVALUATOR;
262 for (i = 0; i < profile->numDevices; i++) {
263 ds_status evaluatorStatus;
266 case DS_EVALUATE_NEW_ONLY:
267 if (profile->devices[i].score !=
nullptr)
break;
269 case DS_EVALUATE_ALL:
270 evaluatorStatus = evaluator(profile->devices + i, evaluatorData);
271 if (evaluatorStatus != DS_SUCCESS) {
272 status = evaluatorStatus;
278 return DS_INVALID_PERF_EVALUATOR_TYPE;
282 if (numUpdates) *numUpdates = updates;
286 static const char *findString(
const char *contentStart,
const char *contentEnd,
287 const char *
string) {
289 const char *currentPosition;
290 const char *found =
nullptr;
291 stringLength = strlen(
string);
292 currentPosition = contentStart;
293 for (currentPosition = contentStart; currentPosition < contentEnd;
295 if (*currentPosition ==
string[0]) {
296 if (currentPosition + stringLength < contentEnd) {
297 if (strncmp(currentPosition,
string, stringLength) == 0) {
298 found = currentPosition;
307 static ds_status readProFile(
const char *fileName,
char **content,
308 size_t *contentSize) {
314 FILE *input = fopen(fileName,
"rb");
315 if (input ==
nullptr) {
316 return DS_FILE_ERROR;
322 char *binary = (
char *)malloc(size);
323 if (binary ==
nullptr) {
325 return DS_FILE_ERROR;
327 fread(binary,
sizeof(
char), size, input);
335 typedef ds_status (*ds_score_deserializer)(ds_device *device,
336 const unsigned char *serializedScore,
337 unsigned int serializedScoreSize);
339 static ds_status readProfileFromFile(ds_profile *profile,
340 ds_score_deserializer deserializer,
342 ds_status status = DS_SUCCESS;
343 char *contentStart =
nullptr;
344 const char *contentEnd =
nullptr;
347 if (profile ==
nullptr)
return DS_INVALID_PROFILE;
349 status = readProFile(file, &contentStart, &contentSize);
350 if (status == DS_SUCCESS) {
351 const char *currentPosition;
352 const char *dataStart;
354 size_t versionStringLength;
356 contentEnd = contentStart + contentSize;
357 currentPosition = contentStart;
360 dataStart = findString(currentPosition, contentEnd, DS_TAG_VERSION);
361 if (dataStart ==
nullptr) {
362 status = DS_PROFILE_FILE_ERROR;
365 dataStart += strlen(DS_TAG_VERSION);
367 dataEnd = findString(dataStart, contentEnd, DS_TAG_VERSION_END);
368 if (dataEnd ==
nullptr) {
369 status = DS_PROFILE_FILE_ERROR;
373 versionStringLength = strlen(profile->version);
374 if (versionStringLength != (dataEnd - dataStart) ||
375 strncmp(profile->version, dataStart, versionStringLength) != 0) {
377 status = DS_PROFILE_FILE_ERROR;
380 currentPosition = dataEnd + strlen(DS_TAG_VERSION_END);
386 const char *deviceTypeStart;
387 const char *deviceTypeEnd;
388 ds_device_type deviceType;
390 const char *deviceNameStart;
391 const char *deviceNameEnd;
393 const char *deviceScoreStart;
394 const char *deviceScoreEnd;
396 const char *deviceDriverStart;
397 const char *deviceDriverEnd;
399 dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE);
400 if (dataStart ==
nullptr) {
404 dataStart += strlen(DS_TAG_DEVICE);
405 dataEnd = findString(dataStart, contentEnd, DS_TAG_DEVICE_END);
406 if (dataEnd ==
nullptr) {
407 status = DS_PROFILE_FILE_ERROR;
412 deviceTypeStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_TYPE);
413 if (deviceTypeStart ==
nullptr) {
414 status = DS_PROFILE_FILE_ERROR;
417 deviceTypeStart += strlen(DS_TAG_DEVICE_TYPE);
419 findString(deviceTypeStart, contentEnd, DS_TAG_DEVICE_TYPE_END);
420 if (deviceTypeEnd ==
nullptr) {
421 status = DS_PROFILE_FILE_ERROR;
424 memcpy(&deviceType, deviceTypeStart,
sizeof(ds_device_type));
427 if (deviceType == DS_DEVICE_OPENCL_DEVICE) {
428 deviceNameStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_NAME);
429 if (deviceNameStart ==
nullptr) {
430 status = DS_PROFILE_FILE_ERROR;
433 deviceNameStart += strlen(DS_TAG_DEVICE_NAME);
435 findString(deviceNameStart, contentEnd, DS_TAG_DEVICE_NAME_END);
436 if (deviceNameEnd ==
nullptr) {
437 status = DS_PROFILE_FILE_ERROR;
442 findString(dataStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION);
443 if (deviceDriverStart ==
nullptr) {
444 status = DS_PROFILE_FILE_ERROR;
447 deviceDriverStart += strlen(DS_TAG_DEVICE_DRIVER_VERSION);
448 deviceDriverEnd = findString(deviceDriverStart, contentEnd,
449 DS_TAG_DEVICE_DRIVER_VERSION_END);
450 if (deviceDriverEnd ==
nullptr) {
451 status = DS_PROFILE_FILE_ERROR;
456 for (i = 0; i < profile->numDevices; i++) {
457 if (profile->devices[i].type == DS_DEVICE_OPENCL_DEVICE) {
458 size_t actualDeviceNameLength;
459 size_t driverVersionLength;
461 actualDeviceNameLength = strlen(profile->devices[i].oclDeviceName);
462 driverVersionLength = strlen(profile->devices[i].oclDriverVersion);
463 if (actualDeviceNameLength == (deviceNameEnd - deviceNameStart) &&
464 driverVersionLength == (deviceDriverEnd - deviceDriverStart) &&
465 strncmp(profile->devices[i].oclDeviceName, deviceNameStart,
466 actualDeviceNameLength) == 0 &&
467 strncmp(profile->devices[i].oclDriverVersion, deviceDriverStart,
468 driverVersionLength) == 0) {
470 findString(dataStart, contentEnd, DS_TAG_SCORE);
471 if (deviceNameStart ==
nullptr) {
472 status = DS_PROFILE_FILE_ERROR;
475 deviceScoreStart += strlen(DS_TAG_SCORE);
477 findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END);
478 status = deserializer(profile->devices + i,
479 (
const unsigned char *)deviceScoreStart,
480 deviceScoreEnd - deviceScoreStart);
481 if (status != DS_SUCCESS) {
487 }
else if (deviceType == DS_DEVICE_NATIVE_CPU) {
488 for (i = 0; i < profile->numDevices; i++) {
489 if (profile->devices[i].type == DS_DEVICE_NATIVE_CPU) {
490 deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE);
491 if (deviceScoreStart ==
nullptr) {
492 status = DS_PROFILE_FILE_ERROR;
495 deviceScoreStart += strlen(DS_TAG_SCORE);
497 findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END);
498 status = deserializer(profile->devices + i,
499 (
const unsigned char *)deviceScoreStart,
500 deviceScoreEnd - deviceScoreStart);
501 if (status != DS_SUCCESS) {
509 currentPosition = dataEnd + strlen(DS_TAG_DEVICE_END);
517 typedef ds_status (*ds_score_serializer)(ds_device *device,
518 void **serializedScore,
519 unsigned int *serializedScoreSize);
520 static ds_status writeProfileToFile(ds_profile *profile,
521 ds_score_serializer serializer,
523 ds_status status = DS_SUCCESS;
525 if (profile ==
nullptr)
return DS_INVALID_PROFILE;
527 FILE *profileFile = fopen(file,
"wb");
528 if (profileFile ==
nullptr) {
529 status = DS_FILE_ERROR;
534 fwrite(DS_TAG_VERSION,
sizeof(
char), strlen(DS_TAG_VERSION), profileFile);
535 fwrite(profile->version,
sizeof(
char), strlen(profile->version),
537 fwrite(DS_TAG_VERSION_END,
sizeof(
char), strlen(DS_TAG_VERSION_END),
539 fwrite(
"\n",
sizeof(
char), 1, profileFile);
541 for (i = 0; i < profile->numDevices && status == DS_SUCCESS; i++) {
542 void *serializedScore;
543 unsigned int serializedScoreSize;
545 fwrite(DS_TAG_DEVICE,
sizeof(
char), strlen(DS_TAG_DEVICE), profileFile);
547 fwrite(DS_TAG_DEVICE_TYPE,
sizeof(
char), strlen(DS_TAG_DEVICE_TYPE),
549 fwrite(&profile->devices[i].type,
sizeof(ds_device_type), 1, profileFile);
550 fwrite(DS_TAG_DEVICE_TYPE_END,
sizeof(
char),
551 strlen(DS_TAG_DEVICE_TYPE_END), profileFile);
553 switch (profile->devices[i].type) {
554 case DS_DEVICE_NATIVE_CPU: {
565 case DS_DEVICE_OPENCL_DEVICE: {
566 fwrite(DS_TAG_DEVICE_NAME,
sizeof(
char), strlen(DS_TAG_DEVICE_NAME),
568 fwrite(profile->devices[i].oclDeviceName,
sizeof(
char),
569 strlen(profile->devices[i].oclDeviceName), profileFile);
570 fwrite(DS_TAG_DEVICE_NAME_END,
sizeof(
char),
571 strlen(DS_TAG_DEVICE_NAME_END), profileFile);
573 fwrite(DS_TAG_DEVICE_DRIVER_VERSION,
sizeof(
char),
574 strlen(DS_TAG_DEVICE_DRIVER_VERSION), profileFile);
575 fwrite(profile->devices[i].oclDriverVersion,
sizeof(
char),
576 strlen(profile->devices[i].oclDriverVersion), profileFile);
577 fwrite(DS_TAG_DEVICE_DRIVER_VERSION_END,
sizeof(
char),
578 strlen(DS_TAG_DEVICE_DRIVER_VERSION_END), profileFile);
581 status = DS_UNKNOWN_DEVICE_TYPE;
585 fwrite(DS_TAG_SCORE,
sizeof(
char), strlen(DS_TAG_SCORE), profileFile);
586 status = serializer(profile->devices + i, &serializedScore,
587 &serializedScoreSize);
588 if (status == DS_SUCCESS && serializedScore !=
nullptr &&
589 serializedScoreSize > 0) {
590 fwrite(serializedScore,
sizeof(
char), serializedScoreSize, profileFile);
591 free(serializedScore);
593 fwrite(DS_TAG_SCORE_END,
sizeof(
char), strlen(DS_TAG_SCORE_END),
595 fwrite(DS_TAG_DEVICE_END,
sizeof(
char), strlen(DS_TAG_DEVICE_END),
597 fwrite(
"\n",
sizeof(
char), 1, profileFile);
605 static void legalizeFileName(
char *fileName) {
607 const char *invalidChars =
610 for (
int i = 0; i < strlen(invalidChars); i++) {
612 invalidStr[0] = invalidChars[i];
613 invalidStr[1] =
'\0';
619 for (
char *pos = strstr(fileName, invalidStr); pos !=
nullptr;
620 pos = strstr(pos + 1, invalidStr)) {
628 static void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) {
631 gpuInfo->mnIsUserCreated = 1;
633 gpuInfo->mpDevID = device;
634 gpuInfo->mpArryDevsID =
new cl_device_id[1];
635 gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID;
637 clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE,
638 sizeof(cl_device_type), &gpuInfo->mDevType, &size);
639 CHECK_OPENCL( clStatus,
"populateGPUEnv::getDeviceInfo(TYPE)");
642 clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM,
643 sizeof(cl_platform_id), &gpuInfo->mpPlatformID, &size);
644 CHECK_OPENCL( clStatus,
"populateGPUEnv::getDeviceInfo(PLATFORM)");
646 cl_context_properties props[3];
647 props[0] = CL_CONTEXT_PLATFORM;
648 props[1] = (cl_context_properties) gpuInfo->mpPlatformID;
650 gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID,
nullptr,
652 CHECK_OPENCL( clStatus,
"populateGPUEnv::createContext");
654 cl_command_queue_properties queueProperties = 0;
655 gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus );
656 CHECK_OPENCL( clStatus,
"populateGPUEnv::createCommandQueue");
659 int OpenclDevice::LoadOpencl()
662 HINSTANCE HOpenclDll =
nullptr;
663 void *OpenclDll =
nullptr;
665 OpenclDll =
static_cast<HINSTANCE
>(HOpenclDll);
666 OpenclDll = LoadLibrary(
"openCL.dll");
667 if (!static_cast<HINSTANCE>(OpenclDll)) {
668 fprintf(stderr,
"[OD] Load opencl.dll failed!\n");
669 FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
672 fprintf(stderr,
"[OD] Load opencl.dll successful!\n");
676 int OpenclDevice::SetKernelEnv( KernelEnv *envInfo )
678 envInfo->mpkContext = gpuEnv.mpContext;
679 envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
680 envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
685 static cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer,
686 size_t nElements, cl_mem_flags flags,
689 cl_mem membuffer = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (flags),
690 nElements *
sizeof(l_uint32), hostbuffer, pStatus);
696 Pix *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, Pix *pixd, Pix *pixs,
697 int elements, cl_mem_flags flags,
bool memcopy =
false,
699 PROCNAME(
"mapOutputCLBuffer");
702 if ((pixd = pixCreateTemplate(pixs)) ==
nullptr)
703 (Pix *)ERROR_PTR(
"pixd not made", procName,
nullptr);
705 if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs),
706 pixGetDepth(pixs))) ==
nullptr)
707 (Pix *)ERROR_PTR(
"pixd not made", procName,
nullptr);
710 l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer(
711 rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0,
712 elements *
sizeof(l_uint32), 0,
nullptr,
nullptr,
nullptr);
715 memcpy(pixGetData(pixd), pValues, elements *
sizeof(l_uint32));
717 pixSetData(pixd, pValues);
720 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0,
nullptr,
724 clFinish(rEnv.mpkCmdQueue);
730 static cl_mem allocateIntBuffer(KernelEnv rEnv,
const l_uint32 *_pValues,
731 size_t nElements, cl_int *pStatus,
735 clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(CL_MEM_READ_WRITE),
736 nElements *
sizeof(l_int32),
nullptr, pStatus);
738 if (_pValues !=
nullptr) {
739 l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer(
740 rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
741 nElements *
sizeof(l_int32), 0,
nullptr,
nullptr,
nullptr);
743 memcpy(pValues, _pValues, nElements *
sizeof(l_int32));
745 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, xValues, pValues, 0,
nullptr,
748 if (sync) clFinish(rEnv.mpkCmdQueue);
755 void OpenclDevice::releaseMorphCLBuffers()
757 if (pixdCLIntermediate !=
nullptr) clReleaseMemObject(pixdCLIntermediate);
758 if (pixsCLBuffer !=
nullptr) clReleaseMemObject(pixsCLBuffer);
759 if (pixdCLBuffer !=
nullptr) clReleaseMemObject(pixdCLBuffer);
760 if (pixThBuffer !=
nullptr) clReleaseMemObject(pixThBuffer);
761 pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer =
nullptr;
764 int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs)
766 SetKernelEnv( &rEnv );
768 if (pixThBuffer !=
nullptr) {
769 pixsCLBuffer = allocateZeroCopyBuffer(rEnv,
nullptr, wpl * h,
770 CL_MEM_ALLOC_HOST_PTR, &clStatus);
774 clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0,
775 sizeof(l_uint32) * wpl * h, 0,
nullptr,
nullptr);
780 l_uint32* srcdata = (l_uint32*) malloc(wpl*h*
sizeof(l_uint32));
781 memcpy(srcdata, pixGetData(pixs), wpl*h*
sizeof(l_uint32));
783 pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl*h, CL_MEM_USE_HOST_PTR, &clStatus);
786 pixdCLBuffer = allocateZeroCopyBuffer(rEnv,
nullptr, wpl * h,
787 CL_MEM_ALLOC_HOST_PTR, &clStatus);
789 pixdCLIntermediate = allocateZeroCopyBuffer(
790 rEnv,
nullptr, wpl * h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
792 return (
int)clStatus;
795 int OpenclDevice::InitEnv()
802 if( 1 == LoadOpencl() )
809 InitOpenclRunEnv_DeviceSelection( 0 );
815 int OpenclDevice::ReleaseOpenclRunEnv()
817 ReleaseOpenclEnv( &gpuEnv );
823 inline int OpenclDevice::AddKernelConfig(
int kCount,
const char *kName )
826 fprintf(stderr,
"Error: ( KCount < 1 ) AddKernelConfig\n" );
827 strcpy( gpuEnv.mArrykernelNames[kCount-1], kName );
828 gpuEnv.mnKernelCount++;
831 int OpenclDevice::RegistOpenclKernel()
833 if ( !gpuEnv.mnIsUserCreated )
834 memset( &gpuEnv, 0,
sizeof(gpuEnv) );
836 gpuEnv.mnFileCount = 0;
837 gpuEnv.mnKernelCount = 0UL;
839 AddKernelConfig( 1, (
const char*)
"oclAverageSub1" );
843 int OpenclDevice::InitOpenclRunEnv_DeviceSelection(
int argc ) {
847 ds_device bestDevice_DS = getDeviceSelection( );
849 cl_device_id bestDevice = bestDevice_DS.oclDeviceID;
851 if (selectedDeviceIsOpenCL() ) {
853 populateGPUEnvFromDevice( &gpuEnv, bestDevice );
854 gpuEnv.mnFileCount = 0;
855 gpuEnv.mnKernelCount = 0UL;
857 CompileKernelFile(&gpuEnv,
"");
869 OpenclDevice::OpenclDevice()
874 OpenclDevice::~OpenclDevice()
879 int OpenclDevice::ReleaseOpenclEnv( GPUEnv *gpuInfo )
889 for ( i = 0; i < gpuEnv.mnFileCount; i++ )
891 if ( gpuEnv.mpArryPrograms[i] )
893 clStatus = clReleaseProgram( gpuEnv.mpArryPrograms[i] );
894 CHECK_OPENCL( clStatus,
"clReleaseProgram" );
895 gpuEnv.mpArryPrograms[i] =
nullptr;
898 if ( gpuEnv.mpCmdQueue )
900 clReleaseCommandQueue( gpuEnv.mpCmdQueue );
901 gpuEnv.mpCmdQueue =
nullptr;
903 if ( gpuEnv.mpContext )
905 clReleaseContext( gpuEnv.mpContext );
906 gpuEnv.mpContext =
nullptr;
909 gpuInfo->mnIsUserCreated = 0;
910 delete[] gpuInfo->mpArryDevsID;
913 int OpenclDevice::BinaryGenerated(
const char * clFileName, FILE ** fhandle )
920 char fileName[256] = {0}, cl_name[128] = {0};
921 char deviceName[1024];
922 clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,
923 sizeof(deviceName), deviceName,
nullptr);
924 CHECK_OPENCL(clStatus,
"clGetDeviceInfo");
925 str = (
char *)strstr(clFileName, (
char *)
".cl");
926 memcpy(cl_name, clFileName, str - clFileName);
927 cl_name[str - clFileName] =
'\0';
928 sprintf(fileName,
"%s-%s.bin", cl_name, deviceName);
929 legalizeFileName(fileName);
930 fd = fopen(fileName,
"rb");
931 status = (fd !=
nullptr) ? 1 : 0;
938 int OpenclDevice::CachedOfKernerPrg(
const GPUEnv *gpuEnvCached,
const char * clFileName )
941 for ( i = 0; i < gpuEnvCached->mnFileCount; i++ )
943 if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 )
945 if (gpuEnvCached->mpArryPrograms[i] !=
nullptr) {
953 int OpenclDevice::WriteBinaryToFile(
const char* fileName,
const char* birary,
size_t numBytes )
955 FILE *output =
nullptr;
956 output = fopen(fileName,
"wb");
957 if (output ==
nullptr) {
961 fwrite( birary,
sizeof(
char), numBytes, output );
967 int OpenclDevice::GeneratBinFromKernelSource( cl_program program,
const char * clFileName )
973 cl_device_id *mpArryDevsID;
974 char **binaries, *str =
nullptr;
976 clStatus = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
977 sizeof(numDevices), &numDevices,
nullptr);
978 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
980 mpArryDevsID = (cl_device_id*) malloc(
sizeof(cl_device_id) * numDevices );
981 if (mpArryDevsID ==
nullptr) {
985 clStatus = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
986 sizeof(cl_device_id) * numDevices, mpArryDevsID,
988 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
991 binarySizes = (
size_t*) malloc(
sizeof(
size_t) * numDevices );
994 clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
995 sizeof(
size_t) * numDevices, binarySizes,
nullptr);
996 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
999 binaries = (
char**) malloc(
sizeof(
char *) * numDevices );
1000 if (binaries ==
nullptr) {
1004 for ( i = 0; i < numDevices; i++ )
1006 if ( binarySizes[i] != 0 )
1008 binaries[i] = (
char*) malloc(
sizeof(
char) * binarySizes[i] );
1009 if (binaries[i] ==
nullptr) {
1015 binaries[i] =
nullptr;
1019 clStatus = clGetProgramInfo(program, CL_PROGRAM_BINARIES,
1020 sizeof(
char *) * numDevices, binaries,
nullptr);
1021 CHECK_OPENCL(clStatus,
"clGetProgramInfo");
1024 for ( i = 0; i < numDevices; i++ )
1026 char fileName[256] = { 0 }, cl_name[128] = { 0 };
1028 if ( binarySizes[i] != 0 )
1030 char deviceName[1024];
1031 clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
1032 sizeof(deviceName), deviceName,
nullptr);
1033 CHECK_OPENCL( clStatus,
"clGetDeviceInfo" );
1035 str = (
char*) strstr( clFileName, (
char*)
".cl" );
1036 memcpy( cl_name, clFileName, str - clFileName );
1037 cl_name[str - clFileName] =
'\0';
1038 sprintf( fileName,
"%s-%s.bin", cl_name, deviceName );
1039 legalizeFileName(fileName);
1040 if ( !WriteBinaryToFile( fileName, binaries[i], binarySizes[i] ) )
1042 printf(
"[OD] write binary[%s] failed\n", fileName);
1045 printf(
"[OD] write binary[%s] successfully\n", fileName);
1050 for ( i = 0; i < numDevices; i++ )
1053 binaries[i] =
nullptr;
1060 binarySizes =
nullptr;
1063 mpArryDevsID =
nullptr;
1068 int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo,
const char *buildOption )
1071 cl_int clStatus = 0;
1073 char *buildLog =
nullptr, *binary;
1075 size_t source_size[1];
1076 int b_error, binary_status, binaryExisted, idx;
1078 cl_device_id *mpArryDevsID;
1080 const char*
filename =
"kernel.cl";
1082 if ( CachedOfKernerPrg(gpuInfo, filename) == 1 )
1087 idx = gpuInfo->mnFileCount;
1091 source_size[0] = strlen( source );
1093 binaryExisted = BinaryGenerated( filename, &fd );
1095 if ( binaryExisted == 1 )
1097 clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
1098 sizeof(numDevices), &numDevices,
nullptr);
1099 CHECK_OPENCL(clStatus,
"clGetContextInfo");
1101 mpArryDevsID = (cl_device_id *)malloc(
sizeof(cl_device_id) * numDevices);
1102 if (mpArryDevsID ==
nullptr) {
1108 b_error |= fseek( fd, 0,
SEEK_END ) < 0;
1109 b_error |= ( length = ftell(fd) ) <= 0;
1110 b_error |= fseek( fd, 0,
SEEK_SET ) < 0;
1116 binary = (
char*) malloc( length + 2 );
1122 memset( binary, 0, length + 2 );
1123 b_error |= fread( binary, 1, length, fd ) != length;
1130 clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES,
1131 sizeof(cl_device_id) * numDevices,
1132 mpArryDevsID,
nullptr);
1133 CHECK_OPENCL( clStatus,
"clGetContextInfo" );
1136 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
1137 mpArryDevsID, &length, (
const unsigned char**) &binary,
1138 &binary_status, &clStatus );
1139 CHECK_OPENCL( clStatus,
"clCreateProgramWithBinary" );
1142 free( mpArryDevsID );
1143 mpArryDevsID =
nullptr;
1150 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( gpuInfo->mpContext, 1, &source,
1151 source_size, &clStatus);
1152 CHECK_OPENCL( clStatus,
"clCreateProgramWithSource" );
1156 if (gpuInfo->mpArryPrograms[idx] == (cl_program)
nullptr) {
1164 if (!gpuInfo->mnIsUserCreated)
1167 clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
1168 buildOption,
nullptr,
nullptr);
1174 clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
1175 buildOption,
nullptr,
nullptr);
1179 if ( clStatus != CL_SUCCESS )
1181 printf (
"BuildProgram error!\n");
1182 if ( !gpuInfo->mnIsUserCreated )
1184 clStatus = clGetProgramBuildInfo(
1185 gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
1186 CL_PROGRAM_BUILD_LOG, 0,
nullptr, &length);
1190 clStatus = clGetProgramBuildInfo(
1191 gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
1192 CL_PROGRAM_BUILD_LOG, 0,
nullptr, &length);
1194 if ( clStatus != CL_SUCCESS )
1196 printf(
"opencl create build log fail\n");
1199 buildLog = (
char*) malloc( length );
1200 if (buildLog == (
char *)
nullptr) {
1203 if ( !gpuInfo->mnIsUserCreated )
1205 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
1206 CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
1210 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
1211 CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
1213 if ( clStatus != CL_SUCCESS )
1215 printf(
"opencl program build info fail\n");
1219 fd1 = fopen(
"kernel-build.log",
"w+" );
1220 if (fd1 !=
nullptr) {
1221 fwrite(buildLog,
sizeof(
char), length, fd1);
1230 strcpy( gpuInfo->mArryKnelSrcFile[idx], filename );
1232 if ( binaryExisted == 0 ) {
1233 GeneratBinFromKernelSource( gpuInfo->mpArryPrograms[idx], filename );
1237 gpuInfo->mnFileCount += 1;
1242 l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl,l_uint32 *line)
1247 size_t globalThreads[2];
1248 size_t localThreads[2];
1254 gsize = (w + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1255 globalThreads[0] = gsize;
1256 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1257 globalThreads[1] = gsize;
1258 localThreads[0] = GROUPSIZE_X;
1259 localThreads[1] = GROUPSIZE_Y;
1261 SetKernelEnv( &rEnv );
1263 l_uint32 *pResult = (l_uint32 *)malloc(w*h *
sizeof(l_uint32));
1264 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"composeRGBPixel", &clStatus );
1265 CHECK_OPENCL(clStatus,
"clCreateKernel composeRGBPixel");
1268 valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w*h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
1269 outputCl = allocateZeroCopyBuffer(rEnv, pResult, w*h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
1272 clStatus = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &valuesCl);
1273 CHECK_OPENCL( clStatus,
"clSetKernelArg");
1274 clStatus = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(w), &w);
1275 CHECK_OPENCL( clStatus,
"clSetKernelArg" );
1276 clStatus = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(h), &h);
1277 CHECK_OPENCL( clStatus,
"clSetKernelArg" );
1278 clStatus = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1279 CHECK_OPENCL( clStatus,
"clSetKernelArg" );
1280 clStatus = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(cl_mem), &outputCl);
1281 CHECK_OPENCL( clStatus,
"clSetKernelArg");
1286 clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
nullptr,
1287 globalThreads, localThreads, 0,
nullptr,
nullptr);
1288 CHECK_OPENCL(clStatus,
"clEnqueueNDRangeKernel");
1291 void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ,
1292 0, w * h *
sizeof(l_uint32), 0,
nullptr,
nullptr,
1294 CHECK_OPENCL(clStatus,
"clEnqueueMapBuffer outputCl");
1295 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0,
nullptr,
nullptr);
1298 clFinish(rEnv.mpkCmdQueue);
1305 static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h)
1307 size_t globalThreads[2];
1311 size_t localThreads[2];
1314 gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1315 globalThreads[0] = gsize;
1316 globalThreads[1] = GROUPSIZE_HMORY;
1317 localThreads[0] = GROUPSIZE_HMORX;
1318 localThreads[1] = GROUPSIZE_HMORY;
1320 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateHor_5x5", &status );
1321 CHECK_OPENCL(status,
"clCreateKernel morphoDilateHor_5x5");
1323 status = clSetKernelArg(rEnv.mpkKernel,
1327 status = clSetKernelArg(rEnv.mpkKernel,
1331 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1332 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1334 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1335 nullptr, globalThreads, localThreads, 0,
1339 pixtemp = pixsCLBuffer;
1340 pixsCLBuffer = pixdCLBuffer;
1341 pixdCLBuffer = pixtemp;
1344 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1345 globalThreads[0] = gsize;
1346 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1347 globalThreads[1] = gsize;
1348 localThreads[0] = GROUPSIZE_X;
1349 localThreads[1] = GROUPSIZE_Y;
1351 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateVer_5x5", &status );
1352 CHECK_OPENCL(status,
"clCreateKernel morphoDilateVer_5x5");
1354 status = clSetKernelArg(rEnv.mpkKernel,
1358 status = clSetKernelArg(rEnv.mpkKernel,
1362 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1363 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1364 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1365 nullptr, globalThreads, localThreads, 0,
1372 static cl_int pixErodeCL_55(l_int32 wpl, l_int32 h)
1374 size_t globalThreads[2];
1378 l_uint32 fwmask, lwmask;
1379 size_t localThreads[2];
1381 lwmask = lmask32[31 - 2];
1382 fwmask = rmask32[31 - 2];
1385 gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1386 globalThreads[0] = gsize;
1387 globalThreads[1] = GROUPSIZE_HMORY;
1388 localThreads[0] = GROUPSIZE_HMORX;
1389 localThreads[1] = GROUPSIZE_HMORY;
1391 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeHor_5x5", &status );
1392 CHECK_OPENCL(status,
"clCreateKernel morphoErodeHor_5x5");
1394 status = clSetKernelArg(rEnv.mpkKernel,
1398 status = clSetKernelArg(rEnv.mpkKernel,
1402 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1403 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1405 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1406 nullptr, globalThreads, localThreads, 0,
1410 pixtemp = pixsCLBuffer;
1411 pixsCLBuffer = pixdCLBuffer;
1412 pixdCLBuffer = pixtemp;
1415 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1416 globalThreads[0] = gsize;
1417 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1418 globalThreads[1] = gsize;
1419 localThreads[0] = GROUPSIZE_X;
1420 localThreads[1] = GROUPSIZE_Y;
1422 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeVer_5x5", &status );
1423 CHECK_OPENCL(status,
"clCreateKernel morphoErodeVer_5x5");
1425 status = clSetKernelArg(rEnv.mpkKernel,
1429 status = clSetKernelArg(rEnv.mpkKernel,
1433 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1434 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1435 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(fwmask), &fwmask);
1436 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(lwmask), &lwmask);
1437 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1438 nullptr, globalThreads, localThreads, 0,
1446 pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
1448 l_int32 xp, yp, xn, yn;
1450 size_t globalThreads[2];
1454 size_t localThreads[2];
1457 OpenclDevice::SetKernelEnv( &rEnv );
1459 if (hsize == 5 && vsize == 5)
1462 status = pixDilateCL_55(wpl, h);
1466 sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
1468 selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
1471 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1472 globalThreads[0] = gsize;
1473 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1474 globalThreads[1] = gsize;
1475 localThreads[0] = GROUPSIZE_X;
1476 localThreads[1] = GROUPSIZE_Y;
1478 if (xp > 31 || xn > 31)
1482 clCreateKernel(rEnv.mpkProgram,
"morphoDilateHor", &status);
1483 CHECK_OPENCL(status,
"clCreateKernel morphoDilateHor");
1485 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1486 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1487 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(xp), &xp);
1488 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(xn), &xn);
1489 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(wpl), &wpl);
1490 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(h), &h);
1491 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1492 nullptr, globalThreads, localThreads, 0,
1495 if (yp > 0 || yn > 0) {
1496 pixtemp = pixsCLBuffer;
1497 pixsCLBuffer = pixdCLBuffer;
1498 pixdCLBuffer = pixtemp;
1501 else if (xp > 0 || xn > 0 )
1505 clCreateKernel(rEnv.mpkProgram,
"morphoDilateHor_32word", &status);
1506 CHECK_OPENCL(status,
"clCreateKernel morphoDilateHor_32word");
1507 isEven = (xp != xn);
1509 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1510 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1511 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(xp), &xp);
1512 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1513 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1514 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(isEven), &isEven);
1515 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1516 nullptr, globalThreads, localThreads, 0,
1519 if (yp > 0 || yn > 0) {
1520 pixtemp = pixsCLBuffer;
1521 pixsCLBuffer = pixdCLBuffer;
1522 pixdCLBuffer = pixtemp;
1526 if (yp > 0 || yn > 0)
1528 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateVer", &status );
1529 CHECK_OPENCL(status,
"clCreateKernel morphoDilateVer");
1531 status = clSetKernelArg(rEnv.mpkKernel,
1535 status = clSetKernelArg(rEnv.mpkKernel,
1539 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(yp), &yp);
1540 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1541 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1542 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(yn), &yn);
1543 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1544 nullptr, globalThreads, localThreads, 0,
1552 static cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) {
1553 l_int32 xp, yp, xn, yn;
1555 size_t globalThreads[2];
1556 size_t localThreads[2];
1560 char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC);
1561 l_uint32 rwmask, lwmask;
1564 sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
1566 selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
1568 OpenclDevice::SetKernelEnv(&rEnv);
1570 if (hsize == 5 && vsize == 5 && isAsymmetric) {
1572 status = pixErodeCL_55(wpl, h);
1576 lwmask = lmask32[31 - (xn & 31)];
1577 rwmask = rmask32[31 - (xp & 31)];
1580 gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1581 globalThreads[0] = gsize;
1582 gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1583 globalThreads[1] = gsize;
1584 localThreads[0] = GROUPSIZE_X;
1585 localThreads[1] = GROUPSIZE_Y;
1588 if (xp > 31 || xn > 31) {
1590 rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram,
"morphoErodeHor", &status);
1592 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1593 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1594 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(xp), &xp);
1595 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(xn), &xn);
1596 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(wpl), &wpl);
1597 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(h), &h);
1599 clSetKernelArg(rEnv.mpkKernel, 6,
sizeof(isAsymmetric), &isAsymmetric);
1600 status = clSetKernelArg(rEnv.mpkKernel, 7,
sizeof(rwmask), &rwmask);
1601 status = clSetKernelArg(rEnv.mpkKernel, 8,
sizeof(lwmask), &lwmask);
1602 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1603 nullptr, globalThreads, localThreads, 0,
1606 if (yp > 0 || yn > 0) {
1607 pixtemp = pixsCLBuffer;
1608 pixsCLBuffer = pixdCLBuffer;
1609 pixdCLBuffer = pixtemp;
1611 }
else if (xp > 0 || xn > 0) {
1613 clCreateKernel(rEnv.mpkProgram,
"morphoErodeHor_32word", &status);
1614 isEven = (xp != xn);
1616 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1617 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1618 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(xp), &xp);
1619 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1620 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1622 clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(isAsymmetric), &isAsymmetric);
1623 status = clSetKernelArg(rEnv.mpkKernel, 6,
sizeof(rwmask), &rwmask);
1624 status = clSetKernelArg(rEnv.mpkKernel, 7,
sizeof(lwmask), &lwmask);
1625 status = clSetKernelArg(rEnv.mpkKernel, 8,
sizeof(isEven), &isEven);
1626 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1627 nullptr, globalThreads, localThreads, 0,
1630 if (yp > 0 || yn > 0) {
1631 pixtemp = pixsCLBuffer;
1632 pixsCLBuffer = pixdCLBuffer;
1633 pixdCLBuffer = pixtemp;
1638 if (yp > 0 || yn > 0) {
1639 rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram,
"morphoErodeVer", &status);
1640 CHECK_OPENCL(status,
"clCreateKernel morphoErodeVer");
1642 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1643 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1644 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(yp), &yp);
1645 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1646 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1648 clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(isAsymmetric), &isAsymmetric);
1649 status = clSetKernelArg(rEnv.mpkKernel, 6,
sizeof(yn), &yn);
1650 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1651 nullptr, globalThreads, localThreads, 0,
1659 static cl_int pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
1665 status = pixErodeCL(hsize, vsize, wpl, h);
1667 pixtemp = pixsCLBuffer;
1668 pixsCLBuffer = pixdCLBuffer;
1669 pixdCLBuffer = pixtemp;
1671 status = pixDilateCL(hsize, vsize, wpl, h);
1677 static cl_int pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
1683 status = pixDilateCL(hsize, vsize, wpl, h);
1685 pixtemp = pixsCLBuffer;
1686 pixsCLBuffer = pixdCLBuffer;
1687 pixdCLBuffer = pixtemp;
1689 status = pixErodeCL(hsize, vsize, wpl, h);
1696 cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1,
1697 cl_mem buffer2, cl_mem outBuffer =
nullptr) {
1699 size_t globalThreads[2];
1701 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
1703 gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1704 globalThreads[0] = gsize;
1705 gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1706 globalThreads[1] = gsize;
1708 if (outBuffer !=
nullptr) {
1709 rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram,
"pixSubtract", &status);
1710 CHECK_OPENCL(status,
"clCreateKernel pixSubtract");
1713 clCreateKernel(rEnv.mpkProgram,
"pixSubtract_inplace", &status);
1714 CHECK_OPENCL(status,
"clCreateKernel pixSubtract_inplace");
1718 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &buffer1);
1719 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &buffer2);
1720 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1721 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1722 if (outBuffer !=
nullptr) {
1723 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(cl_mem), &outBuffer);
1726 clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
nullptr,
1727 globalThreads, localThreads, 0,
nullptr,
nullptr);
1734 void OpenclDevice::pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline,
1735 Pix **pix_hline, Pix **pixClosed,
1736 bool getpixClosed, l_int32 close_hsize,
1737 l_int32 close_vsize, l_int32 open_hsize,
1738 l_int32 open_vsize, l_int32 line_hsize,
1739 l_int32 line_vsize) {
1743 wpl = pixGetWpl(pixs);
1744 h = pixGetHeight(pixs);
1747 clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
1751 *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs,
1752 wpl * h, CL_MAP_READ,
true,
false);
1758 clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
1759 0,
sizeof(
int) * wpl * h, 0,
nullptr,
nullptr);
1762 pixtemp = pixsCLBuffer;
1763 pixsCLBuffer = pixdCLBuffer;
1764 pixdCLBuffer = pixtemp;
1766 clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
1769 pixtemp = pixsCLBuffer;
1770 pixsCLBuffer = pixdCLBuffer;
1771 pixdCLBuffer = pixdCLIntermediate;
1772 pixdCLIntermediate = pixtemp;
1774 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
1779 clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
1780 0,
sizeof(
int) * wpl * h, 0,
nullptr,
nullptr);
1782 pixtemp = pixsCLBuffer;
1783 pixsCLBuffer = pixdCLBuffer;
1784 pixdCLBuffer = pixtemp;
1788 clStatus = pixOpenCL(1, line_vsize, wpl, h);
1791 *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl * h,
1792 CL_MAP_READ,
true,
false);
1794 pixtemp = pixsCLBuffer;
1795 pixsCLBuffer = pixdCLIntermediate;
1796 pixdCLIntermediate = pixtemp;
1800 clStatus = pixOpenCL(line_hsize, 1, wpl, h);
1803 *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl * h,
1804 CL_MAP_READ,
true,
true);
1815 int OpenclDevice::HistogramRectOCL(
unsigned char *imageData,
1816 int bytes_per_pixel,
int bytes_per_line,
1820 int *histogramAllChannels) {
1825 SetKernelEnv(&histKern);
1826 KernelEnv histRedKern;
1827 SetKernelEnv(&histRedKern);
1833 cl_mem imageBuffer = clCreateBuffer(
1834 histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
1835 width * height * bytes_per_pixel *
sizeof(
char), imageData, &clStatus);
1836 CHECK_OPENCL(clStatus,
"clCreateBuffer imageBuffer");
1839 int block_size = 256;
1841 clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS,
1842 sizeof(numCUs), &numCUs,
nullptr);
1843 CHECK_OPENCL(clStatus,
"clCreateBuffer imageBuffer");
1845 int requestedOccupancy = 10;
1846 int numWorkGroups = numCUs * requestedOccupancy;
1847 int numThreads = block_size * numWorkGroups;
1848 size_t local_work_size[] = {
static_cast<size_t>(block_size)};
1849 size_t global_work_size[] = {
static_cast<size_t>(numThreads)};
1850 size_t red_global_work_size[] = {
1851 static_cast<size_t>(block_size *
kHistogramSize * bytes_per_pixel)};
1856 cl_mem histogramBuffer = clCreateBuffer(
1857 histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1858 kHistogramSize * bytes_per_pixel *
sizeof(
int), histogramAllChannels,
1860 CHECK_OPENCL(clStatus,
"clCreateBuffer histogramBuffer");
1864 int tmpHistogramBins =
kHistogramSize * bytes_per_pixel * histRed;
1866 cl_mem tmpHistogramBuffer =
1867 clCreateBuffer(histKern.mpkContext, CL_MEM_READ_WRITE,
1868 tmpHistogramBins *
sizeof(cl_uint),
nullptr, &clStatus);
1869 CHECK_OPENCL(clStatus,
"clCreateBuffer tmpHistogramBuffer");
1872 int *zeroBuffer =
new int[1];
1874 cl_mem atomicSyncBuffer = clCreateBuffer(
1875 histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1876 sizeof(cl_int), zeroBuffer, &clStatus);
1877 CHECK_OPENCL(clStatus,
"clCreateBuffer atomicSyncBuffer");
1878 delete[] zeroBuffer;
1880 if (bytes_per_pixel == 1) {
1881 histKern.mpkKernel = clCreateKernel(
1882 histKern.mpkProgram,
"kernel_HistogramRectOneChannel", &clStatus);
1883 CHECK_OPENCL(clStatus,
"clCreateKernel kernel_HistogramRectOneChannel");
1885 histRedKern.mpkKernel =
1886 clCreateKernel(histRedKern.mpkProgram,
1887 "kernel_HistogramRectOneChannelReduction", &clStatus);
1888 CHECK_OPENCL(clStatus,
1889 "clCreateKernel kernel_HistogramRectOneChannelReduction");
1891 histKern.mpkKernel = clCreateKernel( histKern.mpkProgram,
"kernel_HistogramRectAllChannels", &clStatus );
1892 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectAllChannels");
1894 histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram,
"kernel_HistogramRectAllChannelsReduction", &clStatus );
1895 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectAllChannelsReduction");
1901 ptr = clEnqueueMapBuffer(
1902 histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0,
1903 tmpHistogramBins *
sizeof(cl_uint), 0,
nullptr,
nullptr, &clStatus);
1904 CHECK_OPENCL( clStatus,
"clEnqueueMapBuffer tmpHistogramBuffer");
1906 memset(ptr, 0, tmpHistogramBins*
sizeof(cl_uint));
1907 clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0,
1912 clSetKernelArg(histKern.mpkKernel, 0,
sizeof(cl_mem), &imageBuffer);
1913 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
1914 cl_uint numPixels = width*height;
1916 clSetKernelArg(histKern.mpkKernel, 1,
sizeof(cl_uint), &numPixels);
1917 CHECK_OPENCL( clStatus,
"clSetKernelArg numPixels" );
1918 clStatus = clSetKernelArg(histKern.mpkKernel, 2,
sizeof(cl_mem),
1919 &tmpHistogramBuffer);
1920 CHECK_OPENCL( clStatus,
"clSetKernelArg tmpHistogramBuffer");
1923 int n = numThreads/bytes_per_pixel;
1924 clStatus = clSetKernelArg(histRedKern.mpkKernel, 0,
sizeof(cl_int), &n);
1925 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
1926 clStatus = clSetKernelArg(histRedKern.mpkKernel, 1,
sizeof(cl_mem),
1927 &tmpHistogramBuffer);
1928 CHECK_OPENCL( clStatus,
"clSetKernelArg tmpHistogramBuffer");
1929 clStatus = clSetKernelArg(histRedKern.mpkKernel, 2,
sizeof(cl_mem),
1931 CHECK_OPENCL( clStatus,
"clSetKernelArg histogramBuffer");
1935 clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1,
1936 nullptr, global_work_size, local_work_size, 0,
1938 CHECK_OPENCL(clStatus,
1939 "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels");
1940 clFinish(histKern.mpkCmdQueue);
1941 if (clStatus != 0) {
1945 clStatus = clEnqueueNDRangeKernel(
1946 histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1,
nullptr,
1947 red_global_work_size, local_work_size, 0,
nullptr,
nullptr);
1948 CHECK_OPENCL( clStatus,
"clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" );
1949 clFinish( histRedKern.mpkCmdQueue );
1950 if (clStatus != 0) {
1956 ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE,
1959 nullptr,
nullptr, &clStatus);
1960 CHECK_OPENCL( clStatus,
"clEnqueueMapBuffer histogramBuffer");
1961 if (clStatus != 0) {
1964 clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0,
1967 clReleaseMemObject(histogramBuffer);
1968 clReleaseMemObject(imageBuffer);
1979 int OpenclDevice::ThresholdRectToPixOCL(
unsigned char *imageData,
1980 int bytes_per_pixel,
int bytes_per_line,
1981 int *thresholds,
int *hi_values,
1982 Pix **pix,
int height,
int width,
1983 int top,
int left) {
1987 *pix = pixCreate(width, height, 1);
1988 uint32_t *pixData = pixGetData(*pix);
1989 int wpl = pixGetWpl(*pix);
1990 int pixSize = wpl * height *
sizeof(uint32_t);
1994 SetKernelEnv(&rEnv);
1997 int block_size = 256;
1999 clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS,
2000 sizeof(numCUs), &numCUs,
nullptr);
2001 CHECK_OPENCL(clStatus,
"clCreateBuffer imageBuffer");
2003 int requestedOccupancy = 10;
2004 int numWorkGroups = numCUs * requestedOccupancy;
2005 int numThreads = block_size * numWorkGroups;
2006 size_t local_work_size[] = {(size_t)block_size};
2007 size_t global_work_size[] = {(size_t)numThreads};
2014 cl_mem imageBuffer = clCreateBuffer(
2015 rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
2016 width * height * bytes_per_pixel *
sizeof(
char), imageData, &clStatus);
2017 CHECK_OPENCL(clStatus,
"clCreateBuffer imageBuffer");
2021 clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
2022 pixSize, pixData, &clStatus);
2023 CHECK_OPENCL(clStatus,
"clCreateBuffer pix");
2026 cl_mem thresholdsBuffer =
2027 clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
2028 bytes_per_pixel *
sizeof(
int), thresholds, &clStatus);
2029 CHECK_OPENCL(clStatus,
"clCreateBuffer thresholdBuffer");
2030 cl_mem hiValuesBuffer =
2031 clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
2032 bytes_per_pixel *
sizeof(
int), hi_values, &clStatus);
2033 CHECK_OPENCL(clStatus,
"clCreateBuffer hiValuesBuffer");
2036 if (bytes_per_pixel == 4) {
2038 clCreateKernel(rEnv.mpkProgram,
"kernel_ThresholdRectToPix", &clStatus);
2039 CHECK_OPENCL(clStatus,
"clCreateKernel kernel_ThresholdRectToPix");
2041 rEnv.mpkKernel = clCreateKernel(
2042 rEnv.mpkProgram,
"kernel_ThresholdRectToPix_OneChan", &clStatus);
2043 CHECK_OPENCL(clStatus,
"clCreateKernel kernel_ThresholdRectToPix_OneChan");
2047 clStatus = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &imageBuffer);
2048 CHECK_OPENCL(clStatus,
"clSetKernelArg imageBuffer");
2049 cl_uint numPixels = width * height;
2050 clStatus = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(
int), &height);
2051 CHECK_OPENCL(clStatus,
"clSetKernelArg height");
2052 clStatus = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(
int), &width);
2053 CHECK_OPENCL(clStatus,
"clSetKernelArg width");
2054 clStatus = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(
int), &wpl);
2055 CHECK_OPENCL(clStatus,
"clSetKernelArg wpl");
2057 clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(cl_mem), &thresholdsBuffer);
2058 CHECK_OPENCL(clStatus,
"clSetKernelArg thresholdsBuffer");
2059 clStatus = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(cl_mem), &hiValuesBuffer);
2060 CHECK_OPENCL(clStatus,
"clSetKernelArg hiValuesBuffer");
2061 clStatus = clSetKernelArg(rEnv.mpkKernel, 6,
sizeof(cl_mem), &pixThBuffer);
2062 CHECK_OPENCL(clStatus,
"clSetKernelArg pixThBuffer");
2066 clStatus = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1,
2067 nullptr, global_work_size, local_work_size,
2068 0,
nullptr,
nullptr);
2069 CHECK_OPENCL(clStatus,
"clEnqueueNDRangeKernel kernel_ThresholdRectToPix");
2070 clFinish(rEnv.mpkCmdQueue);
2072 if (clStatus != 0) {
2073 printf(
"Setting return value to -1\n");
2078 clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0,
2079 pixSize, 0,
nullptr,
nullptr, &clStatus);
2080 CHECK_OPENCL(clStatus,
"clEnqueueMapBuffer histogramBuffer");
2081 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0,
nullptr,
2084 clReleaseMemObject(imageBuffer);
2085 clReleaseMemObject(thresholdsBuffer);
2086 clReleaseMemObject(hiValuesBuffer);
2099 typedef struct _TessScoreEvaluationInputData {
2103 unsigned char *imageData;
2105 } TessScoreEvaluationInputData;
2107 static void populateTessScoreEvaluationInputData(TessScoreEvaluationInputData *input) {
2112 int numChannels = 4;
2113 input->height = height;
2114 input->width = width;
2115 input->numChannels = numChannels;
2116 unsigned char (*imageData4)[4] = (
unsigned char (*)[4]) malloc(height*width*numChannels*
sizeof(
unsigned char));
2117 input->imageData = (
unsigned char *) &imageData4[0];
2120 unsigned char pixelWhite[4] = { 0, 0, 0, 255};
2121 unsigned char pixelBlack[4] = {255, 255, 255, 255};
2122 for (
int p = 0; p < height*width; p++) {
2124 imageData4[p][0] = pixelWhite[0];
2125 imageData4[p][1] = pixelWhite[1];
2126 imageData4[p][2] = pixelWhite[2];
2127 imageData4[p][3] = pixelWhite[3];
2130 int maxLineWidth = 64;
2133 for (
int i = 0; i < numLines; i++) {
2134 int lineWidth = rand()%maxLineWidth;
2135 int vertLinePos = lineWidth + rand()%(width-2*lineWidth);
2137 for (
int row = vertLinePos-lineWidth/2; row < vertLinePos+lineWidth/2; row++) {
2138 for (
int col = 0; col < height; col++) {
2140 imageData4[row*width+col][0] = pixelBlack[0];
2141 imageData4[row*width+col][1] = pixelBlack[1];
2142 imageData4[row*width+col][2] = pixelBlack[2];
2143 imageData4[row*width+col][3] = pixelBlack[3];
2148 for (
int i = 0; i < numLines; i++) {
2149 int lineWidth = rand()%maxLineWidth;
2150 int horLinePos = lineWidth + rand()%(height-2*lineWidth);
2152 for (
int row = 0; row < width; row++) {
2153 for (
int col = horLinePos-lineWidth/2; col < horLinePos+lineWidth/2; col++) {
2156 imageData4[row*width+col][0] = pixelBlack[0];
2157 imageData4[row*width+col][1] = pixelBlack[1];
2158 imageData4[row*width+col][2] = pixelBlack[2];
2159 imageData4[row*width+col][3] = pixelBlack[3];
2164 float fractionBlack = 0.1;
2165 int numSpots = (height*width)*fractionBlack/(maxLineWidth*maxLineWidth/2/2);
2166 for (
int i = 0; i < numSpots; i++) {
2167 int lineWidth = rand()%maxLineWidth;
2168 int col = lineWidth + rand()%(width-2*lineWidth);
2169 int row = lineWidth + rand()%(height-2*lineWidth);
2171 for (
int r = row-lineWidth/2; r < row+lineWidth/2; r++) {
2172 for (
int c = col-lineWidth/2; c < col+lineWidth/2; c++) {
2175 imageData4[r*width+c][0] = pixelBlack[0];
2176 imageData4[r*width+c][1] = pixelBlack[1];
2177 imageData4[r*width+c][2] = pixelBlack[2];
2178 imageData4[r*width+c][3] = pixelBlack[3];
2183 input->pix = pixCreate(input->width, input->height, 1);
2186 typedef struct _TessDeviceScore {
2196 static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type) {
2199 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2200 QueryPerformanceFrequency(&freq);
2202 mach_timebase_info_data_t info = {0, 0};
2203 mach_timebase_info(&info);
2204 long long start, stop;
2206 timespec time_funct_start, time_funct_end;
2209 l_uint32 *tiffdata = (l_uint32 *)input.imageData;
2212 if (type == DS_DEVICE_OPENCL_DEVICE) {
2214 QueryPerformanceCounter(&time_funct_start);
2216 start = mach_absolute_time();
2218 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2221 OpenclDevice::gpuEnv = *env;
2222 int wpl = pixGetWpl(input.pix);
2223 OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height,
2226 QueryPerformanceCounter(&time_funct_end);
2227 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2229 stop = mach_absolute_time();
2230 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2232 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2233 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2238 QueryPerformanceCounter(&time_funct_start);
2240 start = mach_absolute_time();
2242 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2244 Pix *pix = pixCreate(input.width, input.height, 32);
2245 l_uint32 *pixData = pixGetData(pix);
2246 int wpl = pixGetWpl(pix);
2251 for (i = 0; i < input.height ; i++) {
2252 for (j = 0; j < input.width; j++) {
2253 l_uint32 tiffword = tiffdata[i * input.width + j];
2254 l_int32 rval = ((tiffword) & 0xff);
2255 l_int32 gval = (((tiffword) >> 8) & 0xff);
2256 l_int32 bval = (((tiffword) >> 16) & 0xff);
2257 l_uint32 value = (rval << 24) | (gval << 16) | (bval << 8);
2258 pixData[idx] = value;
2263 QueryPerformanceCounter(&time_funct_end);
2264 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2266 stop = mach_absolute_time();
2267 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2269 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2270 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2281 static double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
2284 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2285 QueryPerformanceFrequency(&freq);
2287 mach_timebase_info_data_t info = {0, 0};
2288 mach_timebase_info(&info);
2289 long long start, stop;
2291 timespec time_funct_start, time_funct_end;
2294 unsigned char pixelHi = (
unsigned char)255;
2299 int bytes_per_line = input.width*input.numChannels;
2300 int *histogramAllChannels =
new int[kHistogramSize*input.numChannels];
2303 if (type == DS_DEVICE_OPENCL_DEVICE) {
2305 QueryPerformanceCounter(&time_funct_start);
2307 start = mach_absolute_time();
2309 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2312 OpenclDevice::gpuEnv = *env;
2313 int wpl = pixGetWpl(input.pix);
2314 retVal = OpenclDevice::HistogramRectOCL(
2315 input.imageData, input.numChannels, bytes_per_line, top, left,
2316 input.width, input.height, kHistogramSize, histogramAllChannels);
2319 QueryPerformanceCounter(&time_funct_end);
2320 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2322 stop = mach_absolute_time();
2324 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2329 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2330 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2335 QueryPerformanceCounter(&time_funct_start);
2337 start = mach_absolute_time();
2339 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2341 for (
int ch = 0; ch < input.numChannels; ++ch) {
2343 input.width, input.height, histogram);
2346 QueryPerformanceCounter(&time_funct_end);
2347 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2349 stop = mach_absolute_time();
2350 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2352 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2353 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2359 delete[] histogramAllChannels;
2364 static void ThresholdRectToPix_Native(
const unsigned char* imagedata,
2365 int bytes_per_pixel,
2367 const int* thresholds,
2368 const int* hi_values,
2372 int width = pixGetWidth(*pix);
2373 int height = pixGetHeight(*pix);
2375 *pix = pixCreate(width, height, 1);
2376 uint32_t *pixdata = pixGetData(*pix);
2377 int wpl = pixGetWpl(*pix);
2378 const unsigned char* srcdata = imagedata + top * bytes_per_line +
2379 left * bytes_per_pixel;
2380 for (
int y = 0; y < height; ++y) {
2381 const uint8_t *linedata = srcdata;
2382 uint32_t *pixline = pixdata + y * wpl;
2383 for (
int x = 0; x < width; ++x, linedata += bytes_per_pixel) {
2384 bool white_result =
true;
2385 for (
int ch = 0; ch < bytes_per_pixel; ++ch) {
2386 if (hi_values[ch] >= 0 &&
2387 (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
2388 white_result =
false;
2393 CLEAR_DATA_BIT(pixline, x);
2395 SET_DATA_BIT(pixline, x);
2397 srcdata += bytes_per_line;
2401 static double thresholdRectToPixMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type) {
2405 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2406 QueryPerformanceFrequency(&freq);
2408 mach_timebase_info_data_t info = {0, 0};
2409 mach_timebase_info(&info);
2410 long long start, stop;
2412 timespec time_funct_start, time_funct_end;
2416 unsigned char pixelHi = (
unsigned char)255;
2417 int* thresholds =
new int[4];
2418 thresholds[0] = pixelHi/2;
2419 thresholds[1] = pixelHi/2;
2420 thresholds[2] = pixelHi/2;
2421 thresholds[3] = pixelHi/2;
2422 int *hi_values =
new int[4];
2423 thresholds[0] = pixelHi;
2424 thresholds[1] = pixelHi;
2425 thresholds[2] = pixelHi;
2426 thresholds[3] = pixelHi;
2430 int bytes_per_line = input.width*input.numChannels;
2433 if (type == DS_DEVICE_OPENCL_DEVICE) {
2435 QueryPerformanceCounter(&time_funct_start);
2437 start = mach_absolute_time();
2439 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2442 OpenclDevice::gpuEnv = *env;
2443 int wpl = pixGetWpl(input.pix);
2444 retVal = OpenclDevice::ThresholdRectToPixOCL(
2445 input.imageData, input.numChannels, bytes_per_line, thresholds,
2446 hi_values, &input.pix, input.height, input.width, top, left);
2449 QueryPerformanceCounter(&time_funct_end);
2450 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2452 stop = mach_absolute_time();
2454 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2461 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2462 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2470 QueryPerformanceCounter(&time_funct_start);
2472 start = mach_absolute_time();
2474 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2476 ThresholdRectToPix_Native( input.imageData, input.numChannels, bytes_per_line,
2477 thresholds, hi_values, &input.pix );
2480 QueryPerformanceCounter(&time_funct_end);
2481 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2483 stop = mach_absolute_time();
2484 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2486 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2487 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2492 delete[] thresholds;
2497 static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type) {
2501 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2502 QueryPerformanceFrequency(&freq);
2504 mach_timebase_info_data_t info = {0, 0};
2505 mach_timebase_info(&info);
2506 long long start, stop;
2508 timespec time_funct_start, time_funct_end;
2512 int resolution = 300;
2513 int wpl = pixGetWpl(input.pix);
2518 int closing_brick = max_line_width / 3;
2521 if (type == DS_DEVICE_OPENCL_DEVICE) {
2523 QueryPerformanceCounter(&time_funct_start);
2525 start = mach_absolute_time();
2527 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2529 Pix *src_pix = input.pix;
2530 OpenclDevice::gpuEnv = *env;
2531 OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
2532 Pix *pix_vline =
nullptr, *pix_hline =
nullptr, *pix_closed =
nullptr;
2533 OpenclDevice::pixGetLinesCL(
2534 nullptr, input.pix, &pix_vline, &pix_hline, &pix_closed,
true,
2535 closing_brick, closing_brick, max_line_width, max_line_width,
2536 min_line_length, min_line_length);
2538 OpenclDevice::releaseMorphCLBuffers();
2541 QueryPerformanceCounter(&time_funct_end);
2542 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2544 stop = mach_absolute_time();
2545 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2547 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2548 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2552 QueryPerformanceCounter(&time_funct_start);
2554 start = mach_absolute_time();
2556 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2560 Pix *src_pix = input.pix;
2562 pixCloseBrick(
nullptr, src_pix, closing_brick, closing_brick);
2564 pixOpenBrick(
nullptr, pix_closed, max_line_width, max_line_width);
2565 Pix *pix_hollow = pixSubtract(
nullptr, pix_closed, pix_solid);
2566 pixDestroy(&pix_solid);
2567 Pix *pix_vline = pixOpenBrick(
nullptr, pix_hollow, 1, min_line_length);
2568 Pix *pix_hline = pixOpenBrick(
nullptr, pix_hollow, min_line_length, 1);
2569 pixDestroy(&pix_hollow);
2572 QueryPerformanceCounter(&time_funct_end);
2573 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2575 stop = mach_absolute_time();
2576 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2578 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2579 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2595 static ds_status serializeScore( ds_device* device,
void **serializedScore,
unsigned int* serializedScoreSize ) {
2596 *serializedScoreSize =
sizeof(TessDeviceScore);
2597 *serializedScore =
new unsigned char[*serializedScoreSize];
2598 memcpy(*serializedScore, device->score, *serializedScoreSize);
2603 static ds_status deserializeScore( ds_device* device,
const unsigned char* serializedScore,
unsigned int serializedScoreSize ) {
2605 device->score =
new TessDeviceScore;
2606 memcpy(device->score, serializedScore, serializedScoreSize);
2610 static ds_status releaseScore(
void *score) {
2611 delete (TessDeviceScore *)score;
2616 static ds_status evaluateScoreForDevice( ds_device *device,
void *inputData) {
2619 printf(
"\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native" );
2620 GPUEnv *env =
nullptr;
2621 if (device->type == DS_DEVICE_OPENCL_DEVICE) {
2624 populateGPUEnvFromDevice( env, device->oclDeviceID);
2625 env->mnFileCount = 0;
2626 env->mnKernelCount = 0UL;
2628 OpenclDevice::gpuEnv = *env;
2629 OpenclDevice::CompileKernelFile(env,
"");
2632 TessScoreEvaluationInputData *input = (TessScoreEvaluationInputData *)inputData;
2635 double composeRGBPixelTime = composeRGBPixelMicroBench( env, *input, device->type );
2638 double histogramRectTime = histogramRectMicroBench( env, *input, device->type );
2641 double thresholdRectToPixTime = thresholdRectToPixMicroBench( env, *input, device->type );
2644 double getLineMasksMorphTime = getLineMasksMorphMicroBench( env, *input, device->type );
2649 float composeRGBPixelWeight = 1.2f;
2650 float histogramRectWeight = 2.4f;
2651 float thresholdRectToPixWeight = 4.5f;
2652 float getLineMasksMorphWeight = 5.0f;
2654 float weightedTime = composeRGBPixelWeight * composeRGBPixelTime +
2655 histogramRectWeight * histogramRectTime +
2656 thresholdRectToPixWeight * thresholdRectToPixTime +
2657 getLineMasksMorphWeight * getLineMasksMorphTime;
2658 device->score =
new TessDeviceScore;
2659 ((TessDeviceScore *)device->score)->time = weightedTime;
2661 printf(
"[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native" );
2662 printf(
"[DS]%25s: %f (w=%.1f)\n",
"composeRGBPixel", composeRGBPixelTime, composeRGBPixelWeight );
2663 printf(
"[DS]%25s: %f (w=%.1f)\n",
"HistogramRect", histogramRectTime, histogramRectWeight );
2664 printf(
"[DS]%25s: %f (w=%.1f)\n",
"ThresholdRectToPix", thresholdRectToPixTime, thresholdRectToPixWeight );
2665 printf(
"[DS]%25s: %f (w=%.1f)\n",
"getLineMasksMorph", getLineMasksMorphTime, getLineMasksMorphWeight );
2666 printf(
"[DS]%25s: %f\n",
"Score", ((TessDeviceScore *)device->score)->time );
2671 ds_device OpenclDevice::getDeviceSelection( ) {
2672 if (!deviceIsSelected) {
2675 if (1 == LoadOpencl()) {
2680 ds_profile *profile;
2681 status = initDSProfile(&profile,
"v0.1");
2684 const char *fileName =
"tesseract_opencl_profile_devices.dat";
2685 status = readProfileFromFile(profile, deserializeScore, fileName);
2686 if (status != DS_SUCCESS) {
2688 printf(
"[DS] Profile file not available (%s); performing profiling.\n",
2692 TessScoreEvaluationInputData input;
2693 populateTessScoreEvaluationInputData(&input);
2696 unsigned int numUpdates;
2697 status = profileDevices(profile, DS_EVALUATE_ALL,
2698 evaluateScoreForDevice, &input, &numUpdates);
2701 if (status == DS_SUCCESS) {
2702 status = writeProfileToFile(profile, serializeScore, fileName);
2704 if (status == DS_SUCCESS) {
2705 printf(
"[DS] Scores written to file (%s).\n", fileName);
2708 "[DS] Error saving scores to file (%s); scores not written to " 2714 "[DS] Unable to evaluate performance; scores not written to " 2719 printf(
"[DS] Profile read from file (%s).\n", fileName);
2724 float bestTime = FLT_MAX;
2725 int bestDeviceIdx = -1;
2726 for (
int d = 0; d < profile->numDevices; d++) {
2727 ds_device device = profile->devices[d];
2728 TessDeviceScore score = *(TessDeviceScore *)device.score;
2730 float time = score.time;
2731 printf(
"[DS] Device[%i] %i:%s score is %f\n", d + 1, device.type,
2732 device.oclDeviceName, time);
2733 if (time < bestTime) {
2738 printf(
"[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1,
2739 profile->devices[bestDeviceIdx].oclDeviceName,
2740 profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE
2746 bool overridden =
false;
2747 char *overrideDeviceStr = getenv(
"TESSERACT_OPENCL_DEVICE");
2748 if (overrideDeviceStr !=
nullptr) {
2749 int overrideDeviceIdx = atoi(overrideDeviceStr);
2750 if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices) {
2752 "[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, " 2754 overrideDeviceStr, overrideDeviceIdx);
2755 bestDeviceIdx = overrideDeviceIdx - 1;
2759 "[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are " 2760 "valid devices).\n",
2761 overrideDeviceStr, profile->numDevices);
2766 printf(
"[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1,
2767 profile->devices[bestDeviceIdx].oclDeviceName,
2768 profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE
2772 selectedDevice = profile->devices[bestDeviceIdx];
2774 releaseDSProfile(profile, releaseScore);
2777 printf(
"[DS] OpenCL runtime not available.\n");
2778 selectedDevice.type = DS_DEVICE_NATIVE_CPU;
2779 selectedDevice.oclDeviceName =
"(null)";
2780 selectedDevice.score =
nullptr;
2781 selectedDevice.oclDeviceID =
nullptr;
2782 selectedDevice.oclDriverVersion =
nullptr;
2784 deviceIsSelected =
true;
2789 return selectedDevice;
2793 bool OpenclDevice::selectedDeviceIsOpenCL() {
2794 ds_device device = getDeviceSelection();
2795 return (device.type == DS_DEVICE_OPENCL_DEVICE);
#define PERF_COUNT_START(FUNCT_NAME)
const int kMinLineLengthFraction
Denominator of resolution makes min pixels to demand line lengths to be.
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
#define PERF_COUNT_SUB(SUB)
const int kThinLineFraction
Denominator of resolution makes max pixel width to allow thin lines.