33 #include <libFreeWRL.h>
35 #include "../scenegraph/Viewer.h"
36 #include "../scenegraph/RenderFuncs.h"
37 #include "../vrml_parser/Structs.h"
39 #include "../main/headers.h"
40 #include "../vrml_parser/CRoutes.h"
41 #include "../scenegraph/Collision.h"
43 #include "OpenCL_Utils.h"
48 static bool initialize_OpenCL();
49 static void createGPUInterpolators();
50 static const char* coordinateInterpolator_kernel;
51 static const char* interpolator_headers;
54 static void *OpenCL_Utils_constructor(){
55 void *v = malloc(
sizeof(
struct pOpenCL_Utils));
56 memset(v,0,
sizeof(
struct pOpenCL_Utils));
61 void OpenCL_Utils_init(
struct tOpenCL_Utils *t)
65 t->prv = OpenCL_Utils_constructor();
70 t->OpenCL_Initialized = FALSE;
76 void fwl_OpenCL_startup(
struct tOpenCL_Utils *t) {
78 if (t->OpenCL_Initialized) printf (
".... fwl_opencl already done?\n");
else printf (
"..... OpenCL init currently false\n");
83 createGPUCollisionProgram();
85 createGPUInterpolators();
87 t->OpenCL_Initialized = TRUE;
92 static char *getCLErrorString(cl_int err) {
95 case CL_COMPILE_PROGRAM_FAILURE:
return "CL_COMPILE_PROGRAM_FAILURE";
break;
96 case CL_LINKER_NOT_AVAILABLE:
return "CL_LINKER_NOT_AVAILABLE";
break;
97 case CL_LINK_PROGRAM_FAILURE:
return "CL_LINK_PROGRAM_FAILURE";
break;
98 case CL_DEVICE_PARTITION_FAILED:
return "CL_DEVICE_PARTITION_FAILED";
break;
99 case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
break;
100 case CL_INVALID_IMAGE_DESCRIPTOR:
return "CL_INVALID_IMAGE_DESCRIPTOR";
break;
101 case CL_INVALID_COMPILER_OPTIONS:
return "CL_INVALID_COMPILER_OPTIONS";
break;
102 case CL_INVALID_LINKER_OPTIONS:
return "CL_INVALID_LINKER_OPTIONS";
break;
103 case CL_INVALID_DEVICE_PARTITION_COUNT:
return "CL_INVALID_DEVICE_PARTITION_COUNT";
break;
105 #endif //CL_VERSION_1_2
107 case CL_DEVICE_NOT_FOUND:
return "CL_DEVICE_NOT_FOUND";
break;
108 case CL_DEVICE_NOT_AVAILABLE:
return "CL_DEVICE_NOT_AVAILABLE";
break;
109 case CL_COMPILER_NOT_AVAILABLE:
return "CL_COMPILER_NOT_AVAILABLE";
break;
110 case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
break;
111 case CL_OUT_OF_RESOURCES:
return "CL_OUT_OF_RESOURCES";
break;
112 case CL_OUT_OF_HOST_MEMORY:
return "CL_OUT_OF_HOST_MEMORY";
break;
113 case CL_PROFILING_INFO_NOT_AVAILABLE:
return "CL_PROFILING_INFO_NOT_AVAILABLE";
break;
114 case CL_MEM_COPY_OVERLAP:
return "CL_MEM_COPY_OVERLAP";
break;
115 case CL_IMAGE_FORMAT_MISMATCH:
return "CL_IMAGE_FORMAT_MISMATCH";
break;
116 case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
break;
117 case CL_BUILD_PROGRAM_FAILURE:
return "CL_BUILD_PROGRAM_FAILURE";
break;
118 case CL_MAP_FAILURE:
return "CL_MAP_FAILURE";
break;
119 case CL_MISALIGNED_SUB_BUFFER_OFFSET:
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
break;
120 case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
break;
121 case CL_INVALID_VALUE:
return "CL_INVALID_VALUE";
break;
122 case CL_INVALID_DEVICE_TYPE:
return "CL_INVALID_DEVICE_TYPE";
break;
123 case CL_INVALID_PLATFORM:
return "CL_INVALID_PLATFORM";
break;
124 case CL_INVALID_DEVICE:
return "CL_INVALID_DEVICE";
break;
125 case CL_INVALID_CONTEXT:
return "CL_INVALID_CONTEXT";
break;
126 case CL_INVALID_QUEUE_PROPERTIES:
return "CL_INVALID_QUEUE_PROPERTIES";
break;
127 case CL_INVALID_COMMAND_QUEUE:
return "CL_INVALID_COMMAND_QUEUE";
break;
128 case CL_INVALID_HOST_PTR:
return "CL_INVALID_HOST_PTR";
break;
129 case CL_INVALID_MEM_OBJECT:
return "CL_INVALID_MEM_OBJECT";
break;
130 case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
break;
131 case CL_INVALID_IMAGE_SIZE:
return "CL_INVALID_IMAGE_SIZE";
break;
132 case CL_INVALID_SAMPLER:
return "CL_INVALID_SAMPLER";
break;
133 case CL_INVALID_BINARY:
return "CL_INVALID_BINARY";
break;
134 case CL_INVALID_BUILD_OPTIONS:
return "CL_INVALID_BUILD_OPTIONS";
break;
135 case CL_INVALID_PROGRAM:
return "CL_INVALID_PROGRAM";
break;
136 case CL_INVALID_PROGRAM_EXECUTABLE:
return "CL_INVALID_PROGRAM_EXECUTABLE";
break;
137 case CL_INVALID_KERNEL_NAME:
return "CL_INVALID_KERNEL_NAME";
break;
138 case CL_INVALID_KERNEL_DEFINITION:
return "CL_INVALID_KERNEL_DEFINITION";
break;
139 case CL_INVALID_KERNEL:
return "CL_INVALID_KERNEL";
break;
140 case CL_INVALID_ARG_INDEX:
return "CL_INVALID_ARG_INDEX";
break;
141 case CL_INVALID_ARG_VALUE:
return "CL_INVALID_ARG_VALUE";
break;
142 case CL_INVALID_ARG_SIZE:
return "CL_INVALID_ARG_SIZE";
break;
143 case CL_INVALID_KERNEL_ARGS:
return "CL_INVALID_KERNEL_ARGS";
break;
144 case CL_INVALID_WORK_DIMENSION:
return "CL_INVALID_WORK_DIMENSION";
break;
145 case CL_INVALID_WORK_GROUP_SIZE:
return "CL_INVALID_WORK_GROUP_SIZE";
break;
146 case CL_INVALID_WORK_ITEM_SIZE:
return "CL_INVALID_WORK_ITEM_SIZE";
break;
147 case CL_INVALID_GLOBAL_OFFSET:
return "CL_INVALID_GLOBAL_OFFSET";
break;
148 case CL_INVALID_EVENT_WAIT_LIST:
return "CL_INVALID_EVENT_WAIT_LIST";
break;
149 case CL_INVALID_EVENT:
return "CL_INVALID_EVENT";
break;
150 case CL_INVALID_OPERATION:
return "CL_INVALID_OPERATION";
break;
151 case CL_INVALID_GL_OBJECT:
return "CL_INVALID_GL_OBJECT";
break;
152 case CL_INVALID_BUFFER_SIZE:
return "CL_INVALID_BUFFER_SIZE";
break;
153 case CL_INVALID_MIP_LEVEL:
return "CL_INVALID_MIP_LEVEL";
break;
154 case CL_INVALID_GLOBAL_WORK_SIZE:
return "CL_INVALID_GLOBAL_WORK_SIZE";
break;
155 case CL_INVALID_PROPERTY:
return "CL_INVALID_PROPERTY";
break;
156 case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:
return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
break;
157 default :{
return "hmmm - error message makes no sense";}
161 void printCLError(
const char *where, cl_int err) {
162 printf (
"OpenCL fn %s, error %s (%d)\n",where,getCLErrorString(err),err);
170 static bool initialize_OpenCL() {
174 p = (ppOpenCL_Utils)tg->OpenCL_Utils.prv;
200 #
if defined (_MSC_VER)
203 err = extraInitFromNvidiaSamples(p);
207 cl_platform_id cpPlatform = NULL;
212 cl_context_properties properties[] = {
213 CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
214 CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
215 CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
218 p->context = clCreateContext(properties, 1, &p->device_id, NULL, NULL, &err);
224 #if defined (__linux__)
226 #define MAX_OPENCL_PLATFORMS 10
227 #define MAX_OPENCL_DEVICES 32
228 cl_platform_id platforms[MAX_OPENCL_PLATFORMS];
229 cl_device_id devices[MAX_OPENCL_DEVICES];
234 int selectedPlatform = -1;
235 int selectedDevice = -1;
240 err = clGetPlatformIDs(10,platforms,&numPlats);
241 TEST_ERR(
"clGetPlatformIDs",err);
245 printf (
"OpenCL init - numPlats is %d, OpenCL device not found\n",numPlats);
250 if (numPlats >= MAX_OPENCL_PLATFORMS) {
251 printf (
"OpenCL init - numPlats is %d, setting to %d\n",numPlats,MAX_OPENCL_PLATFORMS);
252 numPlats = MAX_OPENCL_PLATFORMS;
277 printf (
"printing out the device names for each platform found\n");
278 for (i=0; i<numPlats; i++) {
279 err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, MAX_OPENCL_DEVICES, devices, &numDevs);
280 printf (
"done the clGetDeviceIDS call for platform %d - devices %d\n",i,numDevs);
283 for (j=0; j<numDevs; j++) {
296 err = clGetDeviceInfo(devices[j],CL_DEVICE_EXTENSIONS,1000,crv,&crvs);
297 if (err != CL_SUCCESS) {
298 printCLError(
"clGetDeviceIDs",err);
303 if (strstr(crv,
"cl_khr_gl_sharing") != NULL) {
304 printf (
"**** Found cl_khr_gl_sharing ****\n");
305 selectedPlatform = i;
307 p->CL_device_id = devices[j];
314 if ((selectedPlatform <0) || (selectedDevice<0)) {
315 printCLError(
"No good OpenCL device or platform found, error ",err);
320 if ((selectedPlatform != 0) && (selectedDevice != 0)) {
322 err = clGetDeviceIDs(platforms[selectedPlatform], CL_DEVICE_TYPE_GPU, MAX_OPENCL_DEVICES, devices, &numDevs);
326 p->CL_device_id = devices[selectedDevice];
331 cl_context_properties properties[] = {
332 CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
333 CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
334 CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[selectedPlatform],
339 typedef CL_API_ENTRY cl_int
340 (CL_API_CALL *clGetGLContextInfoKHR_fn)(
341 const cl_context_properties * ,
347 clGetGLContextInfoKHR_fn clGetGLContextInfoKHR = NULL;
349 #ifdef CL_VERSION_1_2
350 clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platforms[selectedPlatform],
"clGetGLContextInfoKHR");
352 #ifdef CL_VERSION_1_1
353 clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddress(
"clGetGLContextInfoKHR");
358 err = clGetGLContextInfoKHR(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR,
359 MAX_OPENCL_DEVICES*
sizeof(cl_device_id), devices, &size);
361 TEST_ERR(
"clGetGLContextInfoKHR",err);
363 printf (
"clGetGLContextInfoKHR returns size of %d\n",size);
369 p->CL_context=clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
373 TEST_ERR(
"clCreateContextFromType",err);
378 #ifdef GL_ES_VERSION_2_0
380 cl_platform_id platforms[10];
383 err = getFunctionHandles();
387 if (err != CL_SUCCESS) {
388 printCLError(
"clCreateContext",err);
393 err = clGetPlatformIDs(10,platforms,&numPlats);
394 TEST_ERR(
"clGetPlatformIDs",err);
395 printf (
"looking for up to 10 platforms, got %d",numPlats);
397 cl_platform_id platform;
398 err = clGetPlatformIDs(1,&platform,NULL);
399 TEST_ERR(
"clGetPlatformIDs",err);
401 err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &p->device_id, NULL);
403 if (err != CL_SUCCESS) {
404 printCLError(
"clGetDeviceIDs",err);
418 p->CL_context=clCreateContextFromType(NULL, CL_DEVICE_TYPE_ANY, NULL, NULL, &err);
419 TEST_ERR(
"clCreateContextFromType",err);
421 printf (
"remember - building currently without the CL_KHR_gl_sharing enabled - the clCreateFromGLBuffer will error out, so return code removed.");
423 #endif //GL_ES_VERSION_2_0
428 p->CL_queue = clCreateCommandQueue(p->CL_context, p->CL_device_id, 0, &err);
432 if (!p->CL_queue || (err != CL_SUCCESS)) {
433 printCLError(
"clCreateCommandQueue",err);
437 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(
size_t), &(p->CL_default_workgroup_size), &rvlen);
438 if ((rv != CL_SUCCESS) || (err != CL_SUCCESS)) {
439 printCLError(
"clGetDeviceInfo",err);
446 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(
size_t), &wg_size, &rvlen);
447 TEST_ERR(
"clGetDeviceInfo",rv);
448 printf (
"CL_DEVICE_MAX_WORK_GROUP_SIZE %d\n",wg_size);
451 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_PROFILE,1000,rvstring,&rvlen);
452 printf (
"CL_PLATFORM_PROFILE :%s:\n",rvstring);
453 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_VERSION,1000,rvstring,&rvlen);
454 printf (
"CL_PLATFORM_VERSION :%s:\n",rvstring);
455 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_NAME,1000,rvstring,&rvlen);
456 printf (
"CL_PLATFORM_NAME :%s:\n",rvstring);
457 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_VENDOR,1000,rvstring,&rvlen);
458 printf (
"CL_PLATFORM_VENDOR :%s:\n",rvstring);
459 rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_EXTENSIONS,1000,rvstring,&rvlen);
460 printf (
"CL_PLATFORM_EXTENSIONS :%s:\n",rvstring);
462 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(
size_t), &xyz, &rvlen);
463 printf (
"CL_DEVICE_MAX_COMPUTE_UNITS %d\n",xyz);
464 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,
sizeof(cl_ulong), &longish, &rvlen);
465 printf (
"CL_DEVICE_GLOBAL_MEM_CACHE_SIZE %lu\n",longish);
466 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(cl_ulong), &longish, &rvlen);
467 printf (
"CL_DEVICE_GLOBAL_MEM_SIZE %lu\n",longish);
468 rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_LOCAL_MEM_SIZE,
sizeof(cl_ulong), &longish, &rvlen);
469 printf (
"CL_DEVICE_LOCAL_MEM_SIZE %lu\n",longish);
470 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_EXTENSIONS, 1000, rvstring,&rvlen);
471 printf (
"CL_DEVICE_EXTENSIONS :%s:\n",rvstring);
472 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_PROFILE, 1000, rvstring,&rvlen);
473 printf (
"CL_DEVICE_PROFILE :%s:\n",rvstring);
474 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_NAME, 1000, rvstring,&rvlen);
475 printf (
"CL_DEVICE_NAME :%s:\n",rvstring);
476 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_VENDOR, 1000, rvstring,&rvlen);
477 printf (
"CL_DEVICE_VENDOR :%s:\n",rvstring);
478 rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_VERSION, 1000, rvstring,&rvlen);
479 printf (
"CL_DEVICE_VERSION :%s:\n",rvstring);
497 #ifdef GL_ES_VERSION_2_0
501 #define clGetPlatformIDs(aa,bb,cc) rclGetPlatformIDs(aa,bb,cc)
502 #define clGetPlatformInfo(aa,bb,cc,dd,ee) rclGetPlatformInfo(aa,bb,cc,dd,ee)
503 #define clGetDeviceIDs(aa,bb,cc,dd,ee) rclGetDeviceIDs(aa,bb,cc,dd,ee)
504 #define clGetDeviceInfo(aa,bb,cc,dd,ee) rclGetDeviceInfo(aa,bb,cc,dd,ee)
505 #define clCreateKernel(aa,bb,cc) rclCreateKernel(aa,bb,cc)
506 #define clBuildProgram(aa,bb,cc,dd,ee,ff) rclBuildProgram(aa,bb,cc,dd,ee,ff)
507 #define clCreateBuffer(aa,bb,cc,dd,ee) rclCreateBuffer(aa,bb,cc,dd,ee)
508 #define clCreateCommandQueue(aa,bb,cc,dd) rclCreateCommandQueue(aa,bb,cc,dd)
509 #define clCreateContextFromType(aa,bb,cc,dd,ee) rclCreateContextFromType(aa,bb,cc,dd,ee)
510 #define clCreateFromGLBuffer(aa,bb,cc,dd) rclCreateFromGLBuffer(aa,bb,cc,dd)
511 #define clCreateProgramWithSource(aa,bb,cc,dd,ee) rclCreateProgramWithSource(aa,bb,cc,dd,ee)
512 #define clEnqueueNDRangeKernel(aa,bb,cc,dd,ee,ff,gg,hh,ii) rclEnqueueNDRangeKernel(aa,bb,cc,dd,ee,ff,gg,hh,ii)
513 #define clEnqueueReadBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii) rclEnqueueReadBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii)
514 #define clEnqueueWriteBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii) rclEnqueueWriteBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii)
515 #define clGetKernelWorkGroupInfo(aa,bb,cc,dd,ee,ff) rclGetKernelWorkGroupInfo(aa,bb,cc,dd,ee,ff)
516 #define clGetProgramBuildInfo(aa,bb,cc,dd,ee,ff) rclGetProgramBuildInfo(aa,bb,cc,dd,ee,ff)
517 #define clReleaseMemObject(aa) rclReleaseMemObject(aa)
518 #define clSetKernelArg(aa,bb,cc,dd) rclSetKernelArg(aa,bb,cc,dd)
520 static void *getCLHandle(){
524 res = dlopen(
"/system/lib/libOpenCL.so",RTLD_LAZY);
526 res = dlopen(
"/system/vendor/lib/egl/libGLES_mali.so",RTLD_LAZY);
530 res = dlopen(
"/system/lib/libllvm-a3xx.so",RTLD_LAZY);
534 ConsoleMessage(
"Could not open library :(\n");
539 ConsoleMessage (
"OpenCL lib - libOpenCL.so");
540 }
else if (which == 1) {
541 ConsoleMessage (
"OpenCL lib libGLES_mali.so");
542 }
else if (which == 2) {
543 ConsoleMessage (
"OpenCL Lib - liblvm-a3xx.so");
548 cl_int (*rclGetPlatformIDs)(cl_uint ,
553 cl_int (*rclGetPlatformInfo)(cl_platform_id ,
559 cl_int (*rclGetDeviceIDs)(cl_platform_id ,
566 cl_int (*rclGetDeviceInfo)(cl_device_id ,
572 cl_kernel (*rclCreateKernel)(cl_program ,
576 cl_int (*rclBuildProgram)(cl_program ,
578 const cl_device_id * ,
580 void (CL_CALLBACK * )(cl_program ,
void * ),
583 cl_mem (*rclCreateBuffer)(cl_context ,
589 cl_command_queue (*rclCreateCommandQueue)(cl_context ,
591 cl_command_queue_properties ,
595 cl_context (*rclCreateContextFromType)(
const cl_context_properties * ,
597 void (CL_CALLBACK * )(
const char *,
const void *, size_t,
void *),
601 cl_program (*rclCreateProgramWithSource)(cl_context ,
608 cl_int (*rclEnqueueNDRangeKernel)(cl_command_queue ,
618 cl_int (*rclEnqueueReadBuffer)(cl_command_queue ,
629 cl_int (*rclEnqueueWriteBuffer)(cl_command_queue ,
640 cl_int (*rclGetKernelWorkGroupInfo)(cl_kernel ,
642 cl_kernel_work_group_info ,
648 cl_int (*rclReleaseMemObject)(cl_mem );
650 cl_int (*rclSetKernelArg)(cl_kernel ,
655 cl_mem (*rclCreateFromGLBuffer)(cl_context, cl_mem_flags, GLuint, cl_int *);
657 cl_int (*rclGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t,
void *,
size_t *);
659 static int getFunctionHandles(){
660 static void* getCLHandle();
662 void *handle = getCLHandle();
663 if(handle==NULL)
return CL_DEVICE_NOT_AVAILABLE;
664 rclGetPlatformIDs = (cl_int (*)(cl_uint,cl_platform_id *,cl_uint*))dlsym(handle,
"clGetPlatformIDs");
665 rclGetPlatformInfo = (cl_int (*)(cl_platform_id, cl_platform_info, size_t,
void *,
size_t*))dlsym(handle,
"clGetPlatformInfo");
666 rclGetDeviceIDs = (cl_int (*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint*))dlsym(handle,
"clGetDeviceIDs");
667 rclGetDeviceInfo = (cl_int (*)(cl_device_id, cl_device_info, size_t,
void *,
size_t*))dlsym(handle,
"clGetDeviceInfo");
668 rclBuildProgram = (cl_int (*)(cl_program,cl_uint,
const cl_device_id *,
const char *, void (CL_CALLBACK*)(cl_program,
void*),
void *))dlsym(handle,
"clBuildProgram");
669 rclCreateBuffer = (cl_mem (*)(cl_context, cl_mem_flags, size_t,
void *, cl_int *))dlsym(handle,
"clCreateBuffer");
670 rclCreateKernel = (cl_kernel (*)(cl_program,
const char*,cl_int*))dlsym(handle,
"clCreateKernel");
671 rclCreateCommandQueue = (cl_command_queue (*) (cl_context,cl_device_id,cl_command_queue_properties,cl_int*))dlsym(handle,
"clCreateCommandQueue");
672 rclCreateContextFromType = (cl_context (*)(
const cl_context_properties*,cl_device_type,void(CL_CALLBACK*)(
const char*,
const void*,size_t,
void*),
void*,cl_int*))dlsym(handle,
"clCreateContextFromType");
673 rclCreateProgramWithSource = (cl_program (*) (cl_context,cl_uint,
const char**,
const size_t*,cl_int*))dlsym(handle,
"clCreateProgramWithSource");
674 rclEnqueueNDRangeKernel=(cl_int(*)(cl_command_queue,cl_kernel,cl_uint,
const size_t*,
const size_t*,
const size_t*,cl_uint,
const cl_event*,cl_event*))dlsym(handle,
"clEnqueueNDRangeKernel");
675 rclEnqueueReadBuffer = (cl_int (*)(cl_command_queue,cl_mem,cl_bool,size_t,size_t,
void*,cl_uint,
const cl_event*,cl_event*))dlsym(handle,
"clEnqueueReadBuffer");
676 rclEnqueueWriteBuffer = (cl_int (*)(cl_command_queue,cl_mem,cl_bool,size_t,size_t,
const void*,cl_uint,
const cl_event*,cl_event*))dlsym(handle,
"clEnqueueWriteBuffer");
677 rclGetKernelWorkGroupInfo = (cl_int (*)(cl_kernel,cl_device_id,cl_kernel_work_group_info,size_t,
void*,
size_t *))dlsym(handle,
"clGetKernelWorkGroupInfo");
678 rclReleaseMemObject = (cl_int (*)(cl_mem))dlsym(handle,
"clReleaseMemObject");
679 rclSetKernelArg = (cl_int (*)(cl_kernel,cl_uint,size_t,
const void *))dlsym(handle,
"clSetKernelArg");
680 rclGetProgramBuildInfo = (cl_int (*)(cl_program, cl_device_id, cl_program_build_info, size_t,
void *,
size_t *))dlsym(handle,
"clGetProgramBuildInfo");
681 rclCreateFromGLBuffer = (cl_mem (*)(cl_context, cl_mem_flags, GLuint, cl_int *))dlsym(handle,
"clCreateFromGLBuffer");
685 if (!(rclGetPlatformIDs) || !(rclGetPlatformInfo) || !(rclGetDeviceIDs) || !(rclGetDeviceInfo) ||
686 !(rclBuildProgram) || !(rclCreateBuffer) || !(rclCreateKernel) || !(rclCreateCommandQueue) ||
687 !(rclCreateContextFromType) || !(rclCreateProgramWithSource) || !(rclEnqueueNDRangeKernel) || !(rclEnqueueReadBuffer) ||
688 !(rclEnqueueWriteBuffer) || !(rclGetKernelWorkGroupInfo) || !(rclReleaseMemObject) || !(rclSetKernelArg) ||
689 !(rclGetProgramBuildInfo) || !(rclCreateFromGLBuffer)) {
690 ConsoleMessage (
"did not find one of the functions in this OpenCL Library");
691 if (!rclGetPlatformIDs) ConsoleMessage (
"did not find rclGetPlatformIDs");
692 if (!rclGetPlatformInfo) ConsoleMessage (
"did not find rclGetPlatformInfo");
693 if (!rclGetDeviceIDs) ConsoleMessage (
"did not find rclGetDeviceIDs");
694 if (!rclGetDeviceInfo) ConsoleMessage (
"did not find rclGetDeviceInfo");
695 if (!rclBuildProgram) ConsoleMessage (
"did not find rclBuildProgram");
696 if (!rclCreateBuffer) ConsoleMessage (
"did not find rclCreateBuffer");
697 if (!rclCreateKernel) ConsoleMessage (
"did not find rclCreateKernel");
698 if (!rclCreateCommandQueue) ConsoleMessage (
"did not find rclCreateCommandQueue");
699 if (!rclCreateContextFromType) ConsoleMessage (
"did not find rclCreateContextFromType");
700 if (!rclCreateProgramWithSource) ConsoleMessage (
"did not find rclCreateProgramWithSource");
701 if (!rclEnqueueNDRangeKernel) ConsoleMessage (
"did not find rclEnqueueNDRangeKernel");
702 if (!rclEnqueueReadBuffer) ConsoleMessage (
"did not find rclEnqueueReadBuffer");
703 if (!rclEnqueueWriteBuffer) ConsoleMessage (
"did not find rclEnqueueWriteBuffer");
704 if (!rclGetKernelWorkGroupInfo) ConsoleMessage (
"did not find rclGetKernelWorkGroupInfo");
705 if (!rclReleaseMemObject) ConsoleMessage (
"did not find rclReleaseMemObject");
706 if (!rclSetKernelArg) ConsoleMessage (
"did not find rclSetKernelArg");
707 if (!rclGetProgramBuildInfo) ConsoleMessage (
"did not find rclGetProgramBuildInfo");
708 if (!rclCreateFromGLBuffer) ConsoleMessage (
"did not find rclCreateFromGLBuffer");
719 static void createGPUInterpolators() {
722 p = (ppOpenCL_Utils)tg->OpenCL_Utils.prv;
727 size_t kernel_wg_size;
734 kp[0] = (
char *)interpolator_headers;
735 kp[1] = (
char *)coordinateInterpolator_kernel;
737 p->coordinateInterpolatorProgram = clCreateProgramWithSource(p->CL_context, 2, (
const char **) kp, NULL, &err);
740 if (!p->coordinateInterpolatorProgram || (err != CL_SUCCESS)) {
741 printCLError(
"clCreateProgramWithSource",err);
753 err = clBuildProgram(p->coordinateInterpolatorProgram, 1, &(p->CL_device_id), opts, NULL, NULL);
757 if (err != CL_SUCCESS) {
761 ConsoleMessage(
"Error: Failed to build program executable\n");
762 printCLError(
"clBuildProgram",err);
763 err = clGetProgramBuildInfo(p->coordinateInterpolatorProgram, p->CL_device_id, CL_PROGRAM_BUILD_LOG,
764 sizeof(buffer), buffer, &len);
765 TEST_ERR(
"clGetProgramBuildInfo",err);
766 ConsoleMessage (
"error string len %d\n",(
int)len);
767 ConsoleMessage(
"%s\n", buffer);
772 p->coordinateInterpolatorKernel = clCreateKernel(p->coordinateInterpolatorProgram,
"compute_collide", &err);
775 if (!p->coordinateInterpolatorKernel || (err != CL_SUCCESS)) {
776 printCLError(
"clCreateKernel",err);
783 err = clGetKernelWorkGroupInfo (p->coordinateInterpolatorKernel, p->CL_device_id,
784 CL_KERNEL_WORK_GROUP_SIZE,
sizeof(
size_t), &kernel_wg_size, &rvlen);
786 if (err!=CL_SUCCESS) {
787 printCLError(
"clGetKernelWorkGroupInfo",err);
792 if (kernel_wg_size < p->CL_default_workgroup_size) p->coordinateInterpolator_workgroup_size = kernel_wg_size;
793 else p->coordinateInterpolator_workgroup_size = p->CL_default_workgroup_size;
796 ConsoleMessage (
"MAX_WORK_GROUP_SIZE %d\n",kernel_wg_size);
797 ConsoleMessage (
"We are going to set our workgroup size to %d\n",wg_size);
809 static int printOnce = FALSE;
818 cl_mem myVert = NULL;
822 static void runItOnce(cl_kernel myKernel, GLuint keyVBO, GLuint keyValueVBO, GLuint destVBO,
int keysIn,
int keyValuesIn,
float frac) {
824 size_t global_work_size;
825 size_t local_work_size;
833 cl_mem output_buffer;
837 printf (
"calling glFinish()\n");
840 printf (
"runItOnce, frac %f keysIn %d keyValuesIn %d\n",frac,keysIn,keyValuesIn);
846 cl_mem myK, myKV,myVert;
851 p = (ppOpenCL_Utils)tg->OpenCL_Utils.prv;
854 if (myK==NULL) myK = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyVBO, &err);
855 TEST_ERR(
"clCreateFromGLBuffer 1",err);
857 if (myKV == NULL) myKV = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyValueVBO, &err);
858 TEST_ERR(
"clCreateFromGLBuffer 2",err);
861 if (myVert==NULL) myVert = clCreateFromGLBuffer(p->CL_context, CL_MEM_WRITE_ONLY, destVBO, &err);
862 TEST_ERR(
"clCreateFromGLBuffer 3",err);
863 clFinish(p->CL_queue);
866 myK = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyVBO, &err);
867 TEST_ERR(
"clCreateFromGLBuffer 1",err);
869 myKV = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyValueVBO, &err);
870 TEST_ERR(
"clCreateFromGLBuffer 2",err);
873 myVert = clCreateFromGLBuffer(p->CL_context, CL_MEM_WRITE_ONLY, destVBO, &err);
874 TEST_ERR(
"clCreateFromGLBuffer 3",err);
875 clFinish(p->CL_queue);
882 err = clEnqueueAcquireGLObjects(p->CL_queue, 1, &myVert, 0, NULL, NULL);
883 TEST_ERR(
"clEnqueueAcquire",err);
889 err = clSetKernelArg(myKernel, 0,
sizeof(cl_mem), &myK);
890 TEST_ERR(
"clSetKernelArg",err);
891 err = clSetKernelArg(myKernel, 1,
sizeof(cl_mem), &myKV);
892 TEST_ERR(
"clSetKernelArg",err);
893 err = clSetKernelArg(myKernel, 2,
sizeof(cl_mem), &myVert);
894 TEST_ERR(
"clSetKernelArg",err);
895 err =clSetKernelArg(myKernel, 3,
sizeof(
int), &keysIn);
896 TEST_ERR(
"clSetKernelArg",err);
897 err =clSetKernelArg(myKernel, 4,
sizeof(
int), &keyValuesIn);
898 TEST_ERR(
"clSetKernelArg",err);
899 err =clSetKernelArg(myKernel, 5,
sizeof(
float), &frac);
900 TEST_ERR(
"clSetKernelArg",err);
905 output_buffer = clCreateBuffer(p->CL_context, CL_MEM_WRITE_ONLY,
sizeof(
float) * keyValuesIn/keysIn, NULL, NULL);
906 err = clSetKernelArg(myKernel, 6,
sizeof(cl_mem), &output_buffer);
907 TEST_ERR(
"clSetKernelArg",err);
915 #define MYWG (p->CL_default_workgroup_size)
918 global_work_size = (size_t) (keysIn) / MYWG;
919 else global_work_size = 0;
922 global_work_size += 1;
925 global_work_size *= MYWG;
929 local_work_size = MYWG;
934 err = clEnqueueNDRangeKernel(p->CL_queue, myKernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
935 TEST_ERR(
"clEnqueueNDRangeKernel", err);
940 clFinish(p->CL_queue);
942 printf (
"past clFinish\n");
955 err = clEnqueueReleaseGLObjects(p->CL_queue, 1, &myKV, 0, NULL, NULL);
956 TEST_ERR(
"clEnqueueRelease",err);
958 err = clReleaseMemObject(myK) || clReleaseMemObject(myKV) || clReleaseMemObject(myVert);
959 TEST_ERR(
"clReleaseMemObject",err);
961 #endif // TEST_GLOBAL
965 err = clEnqueueReadBuffer (p->CL_queue, output_buffer,
966 CL_TRUE, 0,
sizeof(
float) * 6 ,
969 if (err != CL_SUCCESS) {
970 printCLError(
"clEnqueueReadBuffer",err);
975 for (i=0; i < (keyValuesIn/keysIn); i++) {
976 printf (
"rv %d is %f\n", i, rvs[i]);
978 clReleaseMemObject(output_buffer);
988 void runOpenCLInterpolator(
struct CRStruct *route,
struct X3D_Node * toNode,
int toOffset) {
990 GLuint keyValueVBO = 0;
999 printf (
"RUNNING OPENCL INTERPOLATOR PROGRAM %p\n",route->CL_Interpolator);
1000 printf (
"it is coming from a %s\n",stringNodeType(route->routeFromNode->_nodeType));
1001 printf (
"and, it is going to a %s\n",stringNodeType(toNode->_nodeType));
1002 printf (
"with a length of %d\n",route->len);
1005 if (route->CL_Interpolator == NULL) {
1006 printf (
"runCLInterpolator - interpolator is NULL??\n");
1010 if ((toNode == NULL) || (route->routeFromNode == NULL)) {
1011 printf (
"runCLInterpolator - error - destination or source NULL\n");
1016 switch (route->routeFromNode->_nodeType) {
1017 case NODE_CoordinateInterpolator: {
1019 keyVBO = px->_keyVBO;
1020 keyValueVBO = px->_keyValueVBO;
1022 keyValuesIn = px->keyValue.n;
1023 frac = px->set_fraction;
1026 default: ConsoleMessage (
"do not route from a node of %s on the GPU - help!\n",
1027 stringNodeType(route->routeFromNode->_nodeType));
1030 if ((keyVBO == 0) || (keyValueVBO == 0)) {
1031 printf (
"runCLInterpolator - error - source VBOS are %d %d, should not be zero\n",
1032 keyVBO, keyValueVBO);
1036 switch (toNode->_nodeType) {
1037 case NODE_Coordinate: {
1042 for (i=0; i<vectorSize(px->_parentVector); i++) {
1043 struct X3D_Node * me = vector_get(
struct X3D_Node *, px->_parentVector, i);
1047 destVBO = pr.VBO_buffers[VERTEX_VBO];
1050 runItOnce(route->CL_Interpolator, keyVBO, keyValueVBO, destVBO, keysIn, keyValuesIn, frac);
1056 default: ConsoleMessage (
"do not route from a node of %s on the GPU - help!\n",
1057 stringNodeType(route->routeFromNode->_nodeType));
1061 printf (
"so, if we were to run the interp, keyVBO %d keyValueVBO %d destVBO %d keysIn %d keyValuesIn %d frac %f\n",
1062 keyVBO, keyValueVBO, destVBO, keysIn, keyValuesIn, frac);
1067 #ifdef GL_ES_VERSION_2_0
1068 static const char* interpolator_headers =
" \
1069 //#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n\
1070 #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable \n\
1071 //#pragma OPENCL EXTENSION CL_APPLE_gl_sharing : enable \n\
1072 //#pragma OPENCL EXTENSION CL_KHR_gl_sharing : enable \n\
1073 //#pragma OPENCL EXTENSION cl_khr_select_fprounding_mode : enable \n\
1078 static const char* interpolator_headers =
" \
1079 #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable \n\
1080 //#pragma OPENCL EXTENSION cl_khr_gl_sharing : enable \n\
1085 static const char* coordinateInterpolator_kernel =
" \
1086 /* Function prototypes */ \n \
1087 int find_key (int kin, float frac, __global float *keys); \n \
1089 int find_key (int kin, float frac, __global float *keys) { \n \
1092 for (counter=1; counter <= kin; counter++) { \n \
1093 if (frac <keys[counter]) { \n \
1094 return counter; \n \
1097 return kin; /* huh? not found! */ \n \
1100 /********************************************************************************/ \n\
1102 __kernel void compute_collide ( \n\
1103 __global float *keys, /* 0 */ \n\
1104 __global float *keyValues, /* 1 */ \n \
1105 __global float *destVertices, /* 2 */ \n \
1106 const int kin, /* 3 */ \n\
1107 const int kvin, /* 4 */ \n \
1108 const float frac /* 5 */ \n \
1109 /* , __global float *output */ \n \
1112 int i_am_canadian = get_global_id(0); \n\
1114 /* get keysPerKeyValue */ \n \
1115 int kpkv = kvin/kin; \n\
1116 if (i_am_canadian > kpkv) return; /* this invocation is above our bounds */ \n\
1118 //output[i_am_canadian] = -999.9f; /* convert_float(get_global_id(0)); */ /* keys[kin-1]; */ \n \
1121 //output[i_am_canadian] = destVertices[i_am_canadian]; \n \
1123 /* set fraction less than or greater than keys */ \n\
1124 if (frac <= keys[0]) { \n\
1125 //output[i_am_canadian] = -100.0f; \n \
1126 destVertices[i_am_canadian*3+0] = keyValues[i_am_canadian*3+0]; \n \
1127 destVertices[i_am_canadian*3+1] = keyValues[i_am_canadian*3+1]; \n \
1128 destVertices[i_am_canadian*3+2] = keyValues[i_am_canadian*3+2]; \n \
1129 } else if (frac >=keys[kin-1]) { \n \
1130 //output[i_am_canadian] = 100.0f; \n\
1131 destVertices[i_am_canadian*3+0] = keyValues[(kvin - kpkv + i_am_canadian)*3+0]; \n \
1132 destVertices[i_am_canadian*3+1] = keyValues[(kvin - kpkv + i_am_canadian)*3+1]; \n \
1133 destVertices[i_am_canadian*3+2] = keyValues[(kvin - kpkv + i_am_canadian)*3+2]; \n \
1135 int myKey = find_key(kin,frac,keys); \n \
1136 float interval = (frac - keys[myKey-1]) / (keys[myKey] - keys[myKey-1]); \n \
1137 //output[i_am_canadian] = convert_float(myKey*100+kpkv); \n \
1139 int thisone = myKey*kpkv*3; \n \
1140 int prevone = (myKey-1) * kpkv *3; \n \
1141 //output[i_am_canadian] = convert_float(thisone * 100 + prevone)+interval; \n \
1142 prevone = prevone + i_am_canadian*3; \n \
1143 thisone = thisone + i_am_canadian*3; \n \
1144 destVertices[i_am_canadian*3+0] = keyValues[prevone+0] + interval*(keyValues[thisone+0]-keyValues[prevone+0]); \n \
1145 destVertices[i_am_canadian*3+1] = keyValues[prevone+1] + interval*(keyValues[thisone+1]-keyValues[prevone+1]); \n \
1146 destVertices[i_am_canadian*3+2] = keyValues[prevone+2] + interval*(keyValues[thisone+2]-keyValues[prevone+2]); \n \
1148 //output[i_am_canadian] = destVertices[i_am_canadian*3]; \n \
1154 #endif //HAVE_OPENCL