FreeWRL/FreeX3D  3.0.0
OpenCL_Utils.c
1 /*
2 
3 
4 Render the children of nodes.
5 
6 */
7 
8 /****************************************************************************
9  This file is part of the FreeWRL/FreeX3D Distribution.
10 
11  Copyright 2009 CRC Canada. (http://www.crc.gc.ca)
12 
13  FreeWRL/FreeX3D is free software: you can redistribute it and/or modify
14  it under the terms of the GNU Lesser Public License as published by
15  the Free Software Foundation, either version 3 of the License, or
16  (at your option) any later version.
17 
18  FreeWRL/FreeX3D is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  GNU General Public License for more details.
22 
23  You should have received a copy of the GNU General Public License
24  along with FreeWRL/FreeX3D. If not, see <http://www.gnu.org/licenses/>.
25 ****************************************************************************/
26 
27 
28 #include <config.h>
29 #include <system.h>
30 #include <display.h>
31 #include <internal.h>
32 
33 #include <libFreeWRL.h>
34 
35 #include "../scenegraph/Viewer.h"
36 #include "../scenegraph/RenderFuncs.h"
37 #include "../vrml_parser/Structs.h"
38 
39 #include "../main/headers.h"
40 #include "../vrml_parser/CRoutes.h"
41 #include "../scenegraph/Collision.h"
42 
43 #include "OpenCL_Utils.h"
44 
45 #ifdef HAVE_OPENCL
46 
47 
48 static bool initialize_OpenCL();
49 static void createGPUInterpolators();
50 static const char* coordinateInterpolator_kernel;
51 static const char* interpolator_headers;
52 
53 
54 static void *OpenCL_Utils_constructor(){
55  void *v = malloc(sizeof(struct pOpenCL_Utils));
56  memset(v,0,sizeof(struct pOpenCL_Utils));
57  return v;
58 }
59 
60 
61 void OpenCL_Utils_init(struct tOpenCL_Utils *t)
62 {
63  //printf ("start calling OpenCL_Utils_init - t is %p\n",t);
64  //private
65  t->prv = OpenCL_Utils_constructor();
66  {
67  //ppOpenCL_Utils p = (ppOpenCL_Utils)t->prv;
68  }
69 
70  t->OpenCL_Initialized = FALSE;
71  t->OpenCL_OK = FALSE;
72  //printf ("done calling OpenCL_Utils_init\n");
73 
74 }
75 
76 void fwl_OpenCL_startup(struct tOpenCL_Utils *t) {
77  //printf ("called fwl_OpenCL_startup...\n");
78 if (t->OpenCL_Initialized) printf (".... fwl_opencl already done?\n"); else printf ("..... OpenCL init currently false\n");
79 
80  initialize_OpenCL();
81 
82  //printf ("past the initialize_OpenCL call\n");
83  createGPUCollisionProgram();
84 
85  createGPUInterpolators();
86  //printf ("finished called fwl_OpenCL_startup...\n");
87  t->OpenCL_Initialized = TRUE;
88 
89 }
90 
91 
92 static char *getCLErrorString(cl_int err) {
93  switch (err) {
94 #ifdef CL_VERSION_1_2
95  case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE"; break;
96  case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE"; break;
97  case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE"; break;
98  case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED"; break;
99  case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; break;
100  case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR"; break;
101  case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS"; break;
102  case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS"; break;
103  case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT"; break;
104 
105 #endif //CL_VERSION_1_2
106 
107  case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND"; break;
108  case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE"; break;
109  case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE"; break;
110  case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; break;
111  case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES"; break;
112  case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY"; break;
113  case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE"; break;
114  case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP"; break;
115  case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH"; break;
116  case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; break;
117  case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE"; break;
118  case CL_MAP_FAILURE: return "CL_MAP_FAILURE"; break;
119  case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; break;
120  case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; break;
121  case CL_INVALID_VALUE: return "CL_INVALID_VALUE"; break;
122  case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE"; break;
123  case CL_INVALID_PLATFORM: return "CL_INVALID_PLATFORM"; break;
124  case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE"; break;
125  case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT"; break;
126  case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES"; break;
127  case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE"; break;
128  case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR"; break;
129  case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT"; break;
130  case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; break;
131  case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE"; break;
132  case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER"; break;
133  case CL_INVALID_BINARY: return "CL_INVALID_BINARY"; break;
134  case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS"; break;
135  case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM"; break;
136  case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE"; break;
137  case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME"; break;
138  case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION"; break;
139  case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL"; break;
140  case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX"; break;
141  case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE"; break;
142  case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE"; break;
143  case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS"; break;
144  case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION"; break;
145  case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE"; break;
146  case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE"; break;
147  case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET"; break;
148  case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST"; break;
149  case CL_INVALID_EVENT: return "CL_INVALID_EVENT"; break;
150  case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION"; break;
151  case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT"; break;
152  case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE"; break;
153  case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL"; break;
154  case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE"; break;
155  case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY"; break;
156  case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; break;
157  default :{return "hmmm - error message makes no sense";}
158  }
159 
160 }
161 void printCLError(const char *where, cl_int err) {
162  printf ("OpenCL fn %s, error %s (%d)\n",where,getCLErrorString(err),err);
163 }
164 
165 /********************************************************************************/
166 /* */
167 /* */
168 /********************************************************************************/
169 
170 static bool initialize_OpenCL() {
171 
172  ppOpenCL_Utils p;
173  ttglobal tg = gglobal();
174  p = (ppOpenCL_Utils)tg->OpenCL_Utils.prv;
175 
176  cl_int err;
177 
178  // debugging information
179  cl_int rv;
180  size_t rvlen;
181 
182  #ifdef GPU_DEBUG
183  size_t wg_size;
184  cl_ulong longish;
185  size_t xyz;
186  char rvstring[1000];
187  int gpu;
188  #endif // GPU_DEBUG
189 
190 // get the current context.
191 // windows - IntPtr curDC = wglGetCurrentDC();
192 // then in the new compute context, we pass in the context
193 
194  /* initialized yet? */
195  //if (p->kernel != NULL) return false;
196 
197 
198  // get the device id
199 
200 #if defined (_MSC_VER)
201 
202  if(1)
203  err = extraInitFromNvidiaSamples(p);
204  else
205  {
206  cl_int ciErrNum;
207  cl_platform_id cpPlatform = NULL; // OpenCL platform
208  // Get the NVIDIA platform
209  //ciErrNum = oclGetPlatformID(&cpPlatform);
210  {
211  /* from OpenCL Programming Guide, pg 338 */
212  cl_context_properties properties[] = {
213  CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
214  CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
215  CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
216  0};
217 
218  p->context = clCreateContext(properties, 1, &p->device_id, NULL, NULL, &err);
219  }
220  }
221 #endif // _MSC_VER
222 
223 /* is this Linux? */
224 #if defined (__linux__)
225 
226  #define MAX_OPENCL_PLATFORMS 10
227  #define MAX_OPENCL_DEVICES 32
228  cl_platform_id platforms[MAX_OPENCL_PLATFORMS];
229  cl_device_id devices[MAX_OPENCL_DEVICES];
230  cl_uint numPlats;
231  cl_uint numDevs;
232 
233  // we may have OpenCL, but maybe we dont have cl_khr_gl_sharing, so look for it
234  int selectedPlatform = -1;
235  int selectedDevice = -1;
236 
237  // printf ("have linux uere\n");
238  // printf ("OpenCL - before clGetPlatformIDs\n");
239 
240  err = clGetPlatformIDs(10,platforms,&numPlats);
241  TEST_ERR("clGetPlatformIDs",err);
242 
243  //printf ("looking for up to 10 platforms, got %d\n",numPlats);
244  if (numPlats <1) {
245  printf ("OpenCL init - numPlats is %d, OpenCL device not found\n",numPlats);
246  return FALSE;
247  }
248 
249  // bounds check
250  if (numPlats >= MAX_OPENCL_PLATFORMS) {
251  printf ("OpenCL init - numPlats is %d, setting to %d\n",numPlats,MAX_OPENCL_PLATFORMS);
252  numPlats = MAX_OPENCL_PLATFORMS;
253  }
254 
255  /* not sure what platform to choose, if more than 1...
256  {
257  int i;
258 
259  //printf ("printing out the platform names:\n");
260  for (i=0; i<numPlats; i++) {
261  char platname[500];
262  cl_int err = clGetPlatformInfo(platforms[i],CL_PLATFORM_NAME,sizeof(platname),platname,NULL);
263  TEST_ERR("clGetPlatformInfo",err);
264  printf ("GetPlatfromInfo for %d is :%s:\n",i,platname);
265  }
266 
267  }
268  */
269 
270  //printf ("now, trying to get the device IDS\n");
271 
272 
273 
274 
275  {
276  int i,j;
277  printf ("printing out the device names for each platform found\n");
278  for (i=0; i<numPlats; i++) {
279  err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, MAX_OPENCL_DEVICES, devices, &numDevs);
280  printf ("done the clGetDeviceIDS call for platform %d - devices %d\n",i,numDevs);
281 
282 /* XXX */
283  for (j=0; j<numDevs; j++) {
284  char crv[1000];
285  size_t crvs;
286 
287  /*
288  err = clGetDeviceInfo(devices[j],CL_DEVICE_NAME,1000,crv,&crvs);
289  printf ("NAME for %d is %s\n",j,crv);
290  err = clGetDeviceInfo(devices[j],CL_DEVICE_VENDOR,1000,crv,&crvs);
291  printf ("VENDOR for %d is %s\n",j,crv);
292  err = clGetDeviceInfo(devices[j],CL_DEVICE_PROFILE,1000,crv,&crvs);
293  printf ("PROFILE for %d is %s\n",j,crv);
294  */
295 
296  err = clGetDeviceInfo(devices[j],CL_DEVICE_EXTENSIONS,1000,crv,&crvs);
297  if (err != CL_SUCCESS) {
298  printCLError("clGetDeviceIDs",err);
299  return FALSE;
300  }
301  // printf ("EXTENSIONS for %d is %s\n",j,crv);
302 
303  if (strstr(crv,"cl_khr_gl_sharing") != NULL) {
304  printf ("**** Found cl_khr_gl_sharing ****\n");
305  selectedPlatform = i;
306  selectedDevice = j;
307  p->CL_device_id = devices[j];
308  }
309  }
310  }
311  }
312 
313  //printf ("Linux, have device id...\n");
314  if ((selectedPlatform <0) || (selectedDevice<0)) {
315  printCLError("No good OpenCL device or platform found, error ",err);
316  return FALSE;
317  }
318 
319  // redo the calls, now that we have (the best?) match
320  if ((selectedPlatform != 0) && (selectedDevice != 0)) {
321  //printf ("regetting platform %d and device %d\n",selectedPlatform, selectedDevice);
322  err = clGetDeviceIDs(platforms[selectedPlatform], CL_DEVICE_TYPE_GPU, MAX_OPENCL_DEVICES, devices, &numDevs);
323  }
324 
325  // now save the device id
326  p->CL_device_id = devices[selectedDevice];
327 
328 
329  // printf ("\n.....now doing the context sharing getting.....\n\n");
330 
331  cl_context_properties properties[] = {
332  CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
333  CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
334  CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[selectedPlatform],
335  0 };
336 
337 // function pointer typedefs must use the
338 // following naming convention
339 typedef CL_API_ENTRY cl_int
340  (CL_API_CALL *clGetGLContextInfoKHR_fn)(
341  const cl_context_properties * /* properties */,
342  cl_gl_context_info /* param_name */,
343  size_t /* param_value_size */,
344  void * /* param_value */,
345  size_t * /*param_value_size_ret*/);
346 
347 clGetGLContextInfoKHR_fn clGetGLContextInfoKHR = NULL;
348 
349 #ifdef CL_VERSION_1_2
350 clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platforms[selectedPlatform],"clGetGLContextInfoKHR");
351 #else
352 #ifdef CL_VERSION_1_1
353 clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddress("clGetGLContextInfoKHR");
354 #endif
355 #endif
356  // find CL capable devices in the current GL context
357  size_t size;
358  err = clGetGLContextInfoKHR(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR,
359  MAX_OPENCL_DEVICES*sizeof(cl_device_id), devices, &size);
360 
361  TEST_ERR("clGetGLContextInfoKHR",err);
362 
363  printf ("clGetGLContextInfoKHR returns size of %d\n",size);
364 
365  //printf ("going to clCreateContextFromType:\n");
366  //printf ("just so we know, p is %p\n",p);
367  //printf ("just so we know, p->CL_context is %p\n",p->CL_context);
368 
369  p->CL_context=clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
370 
371  //printf ("done the createContextFromType, it is %p\n",p->CL_context);
372 
373  TEST_ERR("clCreateContextFromType",err);
374 
375 #endif // linux
376 
377 /* how about Android (and maybe IPHONE) using OpenCL-ES 2.0? */
378 #ifdef GL_ES_VERSION_2_0
379 
380  cl_platform_id platforms[10];
381  cl_uint numPlats;
382 
383  err = getFunctionHandles();
384 
385 
386 
387  if (err != CL_SUCCESS) {
388  printCLError("clCreateContext",err);
389  return FALSE;
390  }
391 
392 
393  err = clGetPlatformIDs(10,platforms,&numPlats);
394  TEST_ERR("clGetPlatformIDs",err);
395  printf ("looking for up to 10 platforms, got %d",numPlats);
396 
397  cl_platform_id platform;
398  err = clGetPlatformIDs(1,&platform,NULL);
399  TEST_ERR("clGetPlatformIDs",err);
400 
401  err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &p->device_id, NULL);
402 
403  if (err != CL_SUCCESS) {
404  printCLError("clGetDeviceIDs",err);
405  return FALSE;
406  }
407 
408 /*
409  cl_context_properties properties[] = {
410  CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
411  CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
412  CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
413  0 };
414  p->context=clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
415  TEST_ERR("clCreateContextFromType",err);
416 */
417 
418  p->CL_context=clCreateContextFromType(NULL, CL_DEVICE_TYPE_ANY, NULL, NULL, &err);
419  TEST_ERR("clCreateContextFromType",err);
420 
421  printf ("remember - building currently without the CL_KHR_gl_sharing enabled - the clCreateFromGLBuffer will error out, so return code removed.");
422 
423 #endif //GL_ES_VERSION_2_0
424 
425 
426  // create a command queue
427 
428  p->CL_queue = clCreateCommandQueue(p->CL_context, p->CL_device_id, 0, &err);
429  //printf ("CL_queue is %p for context %p, device %d\n",p->CL_queue, p->CL_context, p->CL_device_id);
430 
431 
432  if (!p->CL_queue || (err != CL_SUCCESS)) {
433  printCLError("clCreateCommandQueue",err);
434  return FALSE;
435  }
436 
437  rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &(p->CL_default_workgroup_size), &rvlen);
438  if ((rv != CL_SUCCESS) || (err != CL_SUCCESS)) {
439  printCLError("clGetDeviceInfo",err);
440  return FALSE;
441  }
442 
443 
444  #ifdef GPU_DEBUG
445  // Find the work group size
446  rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, &rvlen);
447  TEST_ERR("clGetDeviceInfo",rv);
448  printf ("CL_DEVICE_MAX_WORK_GROUP_SIZE %d\n",wg_size);
449 
450  // debugging information
451  rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_PROFILE,1000,rvstring,&rvlen);
452  printf ("CL_PLATFORM_PROFILE :%s:\n",rvstring);
453  rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_VERSION,1000,rvstring,&rvlen);
454  printf ("CL_PLATFORM_VERSION :%s:\n",rvstring);
455  rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_NAME,1000,rvstring,&rvlen);
456  printf ("CL_PLATFORM_NAME :%s:\n",rvstring);
457  rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_VENDOR,1000,rvstring,&rvlen);
458  printf ("CL_PLATFORM_VENDOR :%s:\n",rvstring);
459  rv = clGetPlatformInfo(platforms[0],CL_PLATFORM_EXTENSIONS,1000,rvstring,&rvlen);
460  printf ("CL_PLATFORM_EXTENSIONS :%s:\n",rvstring);
461 
462  rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), &xyz, &rvlen);
463  printf ("CL_DEVICE_MAX_COMPUTE_UNITS %d\n",xyz);
464  rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(cl_ulong), &longish, &rvlen);
465  printf ("CL_DEVICE_GLOBAL_MEM_CACHE_SIZE %lu\n",longish);
466  rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &longish, &rvlen);
467  printf ("CL_DEVICE_GLOBAL_MEM_SIZE %lu\n",longish);
468  rv = clGetDeviceInfo (p->CL_device_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &longish, &rvlen);
469  printf ("CL_DEVICE_LOCAL_MEM_SIZE %lu\n",longish);
470  rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_EXTENSIONS, 1000, rvstring,&rvlen);
471  printf ("CL_DEVICE_EXTENSIONS :%s:\n",rvstring);
472  rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_PROFILE, 1000, rvstring,&rvlen);
473  printf ("CL_DEVICE_PROFILE :%s:\n",rvstring);
474  rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_NAME, 1000, rvstring,&rvlen);
475  printf ("CL_DEVICE_NAME :%s:\n",rvstring);
476  rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_VENDOR, 1000, rvstring,&rvlen);
477  printf ("CL_DEVICE_VENDOR :%s:\n",rvstring);
478  rv= clGetDeviceInfo (p->CL_device_id,CL_DEVICE_VERSION, 1000, rvstring,&rvlen);
479  printf ("CL_DEVICE_VERSION :%s:\n",rvstring);
480 
481 
482  #endif //GPU_DEBUG
483 #undef GPU_DEBUG
484 
485  // do this when we need collision - collision_initGPUCollide(p);
486 
487  return TRUE;
488 }
489 
490 
491 /********************************************************************************/
492 /* */
493 /* Android, (code might work on IPHONE) OpenGL ES 2.0, CL integration */
494 /* */
495 /********************************************************************************/
496 
497 #ifdef GL_ES_VERSION_2_0
498 
499 #include <dlfcn.h> // possibly Android only
500 
501 #define clGetPlatformIDs(aa,bb,cc) rclGetPlatformIDs(aa,bb,cc)
502 #define clGetPlatformInfo(aa,bb,cc,dd,ee) rclGetPlatformInfo(aa,bb,cc,dd,ee)
503 #define clGetDeviceIDs(aa,bb,cc,dd,ee) rclGetDeviceIDs(aa,bb,cc,dd,ee)
504 #define clGetDeviceInfo(aa,bb,cc,dd,ee) rclGetDeviceInfo(aa,bb,cc,dd,ee)
505 #define clCreateKernel(aa,bb,cc) rclCreateKernel(aa,bb,cc)
506 #define clBuildProgram(aa,bb,cc,dd,ee,ff) rclBuildProgram(aa,bb,cc,dd,ee,ff)
507 #define clCreateBuffer(aa,bb,cc,dd,ee) rclCreateBuffer(aa,bb,cc,dd,ee)
508 #define clCreateCommandQueue(aa,bb,cc,dd) rclCreateCommandQueue(aa,bb,cc,dd)
509 #define clCreateContextFromType(aa,bb,cc,dd,ee) rclCreateContextFromType(aa,bb,cc,dd,ee)
510 #define clCreateFromGLBuffer(aa,bb,cc,dd) rclCreateFromGLBuffer(aa,bb,cc,dd)
511 #define clCreateProgramWithSource(aa,bb,cc,dd,ee) rclCreateProgramWithSource(aa,bb,cc,dd,ee)
512 #define clEnqueueNDRangeKernel(aa,bb,cc,dd,ee,ff,gg,hh,ii) rclEnqueueNDRangeKernel(aa,bb,cc,dd,ee,ff,gg,hh,ii)
513 #define clEnqueueReadBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii) rclEnqueueReadBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii)
514 #define clEnqueueWriteBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii) rclEnqueueWriteBuffer(aa,bb,cc,dd,ee,ff,gg,hh,ii)
515 #define clGetKernelWorkGroupInfo(aa,bb,cc,dd,ee,ff) rclGetKernelWorkGroupInfo(aa,bb,cc,dd,ee,ff)
516 #define clGetProgramBuildInfo(aa,bb,cc,dd,ee,ff) rclGetProgramBuildInfo(aa,bb,cc,dd,ee,ff)
517 #define clReleaseMemObject(aa) rclReleaseMemObject(aa)
518 #define clSetKernelArg(aa,bb,cc,dd) rclSetKernelArg(aa,bb,cc,dd)
519 
520 static void *getCLHandle(){
521  void *res = NULL;
522  int which=0;
523 
524  res = dlopen("/system/lib/libOpenCL.so",RTLD_LAZY);
525  if(res==NULL){
526  res = dlopen("/system/vendor/lib/egl/libGLES_mali.so",RTLD_LAZY);
527  which = 1;
528  }
529  if(res==NULL){
530  res = dlopen("/system/lib/libllvm-a3xx.so",RTLD_LAZY);
531  which = 2;
532  }
533  if(res==NULL) {
534  ConsoleMessage("Could not open library :(\n");
535  return NULL;
536  }
537 
538  if (which==0) {
539  ConsoleMessage ("OpenCL lib - libOpenCL.so");
540  } else if (which == 1) {
541  ConsoleMessage ("OpenCL lib libGLES_mali.so");
542  } else if (which == 2) {
543  ConsoleMessage ("OpenCL Lib - liblvm-a3xx.so");
544  }
545  return res;
546 }
547 
548 cl_int (*rclGetPlatformIDs)(cl_uint /* num_entries */,
549  cl_platform_id * /* platforms */,
550  cl_uint * /* num_platforms */);
551 
552 
553 cl_int (*rclGetPlatformInfo)(cl_platform_id /* platform */,
554  cl_platform_info /* param_name */,
555  size_t /* param_value_size */,
556  void * /* param_value */,
557  size_t * /* param_value_size_ret */);
558 
559 cl_int (*rclGetDeviceIDs)(cl_platform_id /* platform */,
560  cl_device_type /* device_type */,
561  cl_uint /* num_entries */,
562  cl_device_id * /* devices */,
563  cl_uint * /* num_devices */);
564 
565 
566 cl_int (*rclGetDeviceInfo)(cl_device_id /* device */,
567  cl_device_info /* param_name */,
568  size_t /* param_value_size */,
569  void * /* param_value */,
570  size_t * /* param_value_size_ret */);
571 
572 cl_kernel (*rclCreateKernel)(cl_program /*program */,
573  const char * /* kernel name */,
574  cl_int * /* errorcode_ret */);
575 
576 cl_int (*rclBuildProgram)(cl_program /* program */,
577  cl_uint /* num_devices */,
578  const cl_device_id * /* device_list */,
579  const char * /* options */,
580  void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
581  void * /* user_data */);
582 
583 cl_mem (*rclCreateBuffer)(cl_context /* context */,
584  cl_mem_flags /* flags */,
585  size_t /* size */,
586  void * /* host_ptr */,
587  cl_int * /* errcode_ret */);
588 
589 cl_command_queue (*rclCreateCommandQueue)(cl_context /* context */,
590  cl_device_id /* device */,
591  cl_command_queue_properties /* properties */,
592  cl_int * /* errcode_ret */);
593 
594 
595 cl_context (*rclCreateContextFromType)(const cl_context_properties * /* properties */,
596  cl_device_type /* device_type */,
597  void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
598  void * /* user_data */,
599  cl_int * /* errcode_ret */);
600 
601 cl_program (*rclCreateProgramWithSource)(cl_context /* context */,
602  cl_uint /* count */,
603  const char ** /* strings */,
604  const size_t * /* lengths */,
605  cl_int * /* errcode_ret */);
606 
607 
608 cl_int (*rclEnqueueNDRangeKernel)(cl_command_queue /* command_queue */,
609  cl_kernel /* kernel */,
610  cl_uint /* work_dim */,
611  const size_t * /* global_work_offset */,
612  const size_t * /* global_work_size */,
613  const size_t * /* local_work_size */,
614  cl_uint /* num_events_in_wait_list */,
615  const cl_event * /* event_wait_list */,
616  cl_event * /* event */);
617 
618 cl_int (*rclEnqueueReadBuffer)(cl_command_queue /* command_queue */,
619  cl_mem /* buffer */,
620  cl_bool /* blocking_read */,
621  size_t /* offset */,
622  size_t /* size */,
623  void * /* ptr */,
624  cl_uint /* num_events_in_wait_list */,
625  const cl_event * /* event_wait_list */,
626  cl_event * /* event */);
627 
628 
629 cl_int (*rclEnqueueWriteBuffer)(cl_command_queue /* command_queue */,
630  cl_mem /* buffer */,
631  cl_bool /* blocking_write */,
632  size_t /* offset */,
633  size_t /* size */,
634  const void * /* ptr */,
635  cl_uint /* num_events_in_wait_list */,
636  const cl_event * /* event_wait_list */,
637  cl_event * /* event */);
638 
639 
640 cl_int (*rclGetKernelWorkGroupInfo)(cl_kernel /* kernel */,
641  cl_device_id /* device */,
642  cl_kernel_work_group_info /* param_name */,
643  size_t /* param_value_size */,
644  void * /* param_value */,
645  size_t * /* param_value_size_ret */);
646 
647 
648 cl_int (*rclReleaseMemObject)(cl_mem /* memobj */);
649 
650 cl_int (*rclSetKernelArg)(cl_kernel /* kernel */,
651  cl_uint /* arg_index */,
652  size_t /* arg_size */,
653  const void * /* arg_value */);
654 
655 cl_mem (*rclCreateFromGLBuffer)(cl_context, cl_mem_flags, GLuint, cl_int *);
656 
657 cl_int (*rclGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
658 
659 static int getFunctionHandles(){
660  static void* getCLHandle();
661 
662  void *handle = getCLHandle();
663  if(handle==NULL) return CL_DEVICE_NOT_AVAILABLE;
664  rclGetPlatformIDs = (cl_int (*)(cl_uint,cl_platform_id *,cl_uint*))dlsym(handle,"clGetPlatformIDs");
665  rclGetPlatformInfo = (cl_int (*)(cl_platform_id, cl_platform_info, size_t, void *, size_t*))dlsym(handle,"clGetPlatformInfo");
666  rclGetDeviceIDs = (cl_int (*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint*))dlsym(handle,"clGetDeviceIDs");
667  rclGetDeviceInfo = (cl_int (*)(cl_device_id, cl_device_info, size_t, void *, size_t*))dlsym(handle,"clGetDeviceInfo");
668  rclBuildProgram = (cl_int (*)(cl_program,cl_uint,const cl_device_id *, const char *, void (CL_CALLBACK*)(cl_program,void*), void *))dlsym(handle,"clBuildProgram");
669  rclCreateBuffer = (cl_mem (*)(cl_context, cl_mem_flags, size_t, void *, cl_int *))dlsym(handle,"clCreateBuffer");
670  rclCreateKernel = (cl_kernel (*)(cl_program,const char*,cl_int*))dlsym(handle,"clCreateKernel");
671  rclCreateCommandQueue = (cl_command_queue (*) (cl_context,cl_device_id,cl_command_queue_properties,cl_int*))dlsym(handle,"clCreateCommandQueue");
672  rclCreateContextFromType = (cl_context (*)(const cl_context_properties*,cl_device_type,void(CL_CALLBACK*)(const char*,const void*,size_t,void*),void*,cl_int*))dlsym(handle,"clCreateContextFromType");
673  rclCreateProgramWithSource = (cl_program (*) (cl_context,cl_uint,const char**,const size_t*,cl_int*))dlsym(handle,"clCreateProgramWithSource");
674  rclEnqueueNDRangeKernel=(cl_int(*)(cl_command_queue,cl_kernel,cl_uint,const size_t*,const size_t*,const size_t*,cl_uint,const cl_event*,cl_event*))dlsym(handle,"clEnqueueNDRangeKernel");
675  rclEnqueueReadBuffer = (cl_int (*)(cl_command_queue,cl_mem,cl_bool,size_t,size_t,void*,cl_uint,const cl_event*,cl_event*))dlsym(handle,"clEnqueueReadBuffer");
676  rclEnqueueWriteBuffer = (cl_int (*)(cl_command_queue,cl_mem,cl_bool,size_t,size_t,const void*,cl_uint,const cl_event*,cl_event*))dlsym(handle,"clEnqueueWriteBuffer");
677  rclGetKernelWorkGroupInfo = (cl_int (*)(cl_kernel,cl_device_id,cl_kernel_work_group_info,size_t, void*, size_t *))dlsym(handle,"clGetKernelWorkGroupInfo");
678  rclReleaseMemObject = (cl_int (*)(cl_mem))dlsym(handle,"clReleaseMemObject");
679  rclSetKernelArg = (cl_int (*)(cl_kernel,cl_uint,size_t,const void *))dlsym(handle,"clSetKernelArg");
680  rclGetProgramBuildInfo = (cl_int (*)(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *))dlsym(handle,"clGetProgramBuildInfo");
681  rclCreateFromGLBuffer = (cl_mem (*)(cl_context, cl_mem_flags, GLuint, cl_int *))dlsym(handle,"clCreateFromGLBuffer");
682 
683 
684 
685  if (!(rclGetPlatformIDs) || !(rclGetPlatformInfo) || !(rclGetDeviceIDs) || !(rclGetDeviceInfo) ||
686  !(rclBuildProgram) || !(rclCreateBuffer) || !(rclCreateKernel) || !(rclCreateCommandQueue) ||
687  !(rclCreateContextFromType) || !(rclCreateProgramWithSource) || !(rclEnqueueNDRangeKernel) || !(rclEnqueueReadBuffer) ||
688  !(rclEnqueueWriteBuffer) || !(rclGetKernelWorkGroupInfo) || !(rclReleaseMemObject) || !(rclSetKernelArg) ||
689  !(rclGetProgramBuildInfo) || !(rclCreateFromGLBuffer)) {
690  ConsoleMessage ("did not find one of the functions in this OpenCL Library");
691  if (!rclGetPlatformIDs) ConsoleMessage ("did not find rclGetPlatformIDs");
692  if (!rclGetPlatformInfo) ConsoleMessage ("did not find rclGetPlatformInfo");
693  if (!rclGetDeviceIDs) ConsoleMessage ("did not find rclGetDeviceIDs");
694  if (!rclGetDeviceInfo) ConsoleMessage ("did not find rclGetDeviceInfo");
695  if (!rclBuildProgram) ConsoleMessage ("did not find rclBuildProgram");
696  if (!rclCreateBuffer) ConsoleMessage ("did not find rclCreateBuffer");
697  if (!rclCreateKernel) ConsoleMessage ("did not find rclCreateKernel");
698  if (!rclCreateCommandQueue) ConsoleMessage ("did not find rclCreateCommandQueue");
699  if (!rclCreateContextFromType) ConsoleMessage ("did not find rclCreateContextFromType");
700  if (!rclCreateProgramWithSource) ConsoleMessage ("did not find rclCreateProgramWithSource");
701  if (!rclEnqueueNDRangeKernel) ConsoleMessage ("did not find rclEnqueueNDRangeKernel");
702  if (!rclEnqueueReadBuffer) ConsoleMessage ("did not find rclEnqueueReadBuffer");
703  if (!rclEnqueueWriteBuffer) ConsoleMessage ("did not find rclEnqueueWriteBuffer");
704  if (!rclGetKernelWorkGroupInfo) ConsoleMessage ("did not find rclGetKernelWorkGroupInfo");
705  if (!rclReleaseMemObject) ConsoleMessage ("did not find rclReleaseMemObject");
706  if (!rclSetKernelArg) ConsoleMessage ("did not find rclSetKernelArg");
707  if (!rclGetProgramBuildInfo) ConsoleMessage ("did not find rclGetProgramBuildInfo");
708  if (!rclCreateFromGLBuffer) ConsoleMessage ("did not find rclCreateFromGLBuffer");
709 
710  // JAS return !CL_SUCCESS;
711  }
712  return CL_SUCCESS;
713 }
714 
715 
716 #endif
717 
718 /* create the Interpolators for the GPU */
719 static void createGPUInterpolators() {
720  ppOpenCL_Utils p;
721  ttglobal tg = gglobal();
722  p = (ppOpenCL_Utils)tg->OpenCL_Utils.prv;
723 
724  //bool collision_initGPUCollide (struct sCollisionGPU* initme) {
725  char *kp[2];
726  cl_int err;
727  size_t kernel_wg_size;
728  size_t rvlen;
729 
730  //printf ("called initGPUCollide, p is %p\n",p);
731  //printf ("called initGPUCollide, context is %p\n",p->CL_context);
732  //printf ("called initGPUCollide, initme is %p\n",initme);
733 
734  kp[0] = (char *)interpolator_headers;
735  kp[1] = (char *)coordinateInterpolator_kernel;
736 
737  p->coordinateInterpolatorProgram = clCreateProgramWithSource(p->CL_context, 2, (const char **) kp, NULL, &err);
738  //printf ("past the clCreateProgramWithSource call\n");
739 
740  if (!p->coordinateInterpolatorProgram || (err != CL_SUCCESS)) {
741  printCLError("clCreateProgramWithSource",err);
742  return;
743  }
744 
745  // build the compute program executable
746  //char *opts = "-Werror -cl-single-precision-constant -cl-nv-verbose -g -cl-opt-disable -cl-strict-aliasing";
747  //char *opts = "-Werror -cl-single-precision-constant -cl-opt-disable -cl-strict-aliasing";
748  //err = clBuildProgram(p->program, 0, NULL, opts, NULL, NULL);
749  //ConsoleMessage ("calling clBuildProgram with program %p\n",p->program);
750 
751  // build the program, hard code in devices to 1 device, with the device list, no options
752  char *opts = NULL;
753  err = clBuildProgram(p->coordinateInterpolatorProgram, 1, &(p->CL_device_id), opts, NULL, NULL);
754  //printf ("past the clBuildProgram call\n");
755 
756  //ConsoleMessage ("called clBuildProgram error %d\n",err);
757  if (err != CL_SUCCESS) {
758  size_t len;
759  char buffer[16384];
760 
761  ConsoleMessage("Error: Failed to build program executable\n");
762  printCLError("clBuildProgram",err);
763  err = clGetProgramBuildInfo(p->coordinateInterpolatorProgram, p->CL_device_id, CL_PROGRAM_BUILD_LOG,
764  sizeof(buffer), buffer, &len);
765  TEST_ERR("clGetProgramBuildInfo",err);
766  ConsoleMessage ("error string len %d\n",(int)len);
767  ConsoleMessage("%s\n", buffer);
768  return;
769  }
770 
771  // create the compute kernel
772  p->coordinateInterpolatorKernel = clCreateKernel(p->coordinateInterpolatorProgram, "compute_collide", &err);
773  //printf ("kernel is %p %p\n",p, p->coordinateInterpolatorKernel);
774 
775  if (!p->coordinateInterpolatorKernel || (err != CL_SUCCESS)) {
776  printCLError("clCreateKernel",err);
777  return;
778  }
779 
780 
781  // Kernel Workgroup size
782  // rv = clGetDeviceInfo (p->device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, &rvlen);
783  err = clGetKernelWorkGroupInfo (p->coordinateInterpolatorKernel, p->CL_device_id,
784  CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernel_wg_size, &rvlen);
785 
786  if (err!=CL_SUCCESS) {
787  printCLError( "clGetKernelWorkGroupInfo",err);
788  return;
789  }
790 
791  // try the smaller of the two
792  if (kernel_wg_size < p->CL_default_workgroup_size) p->coordinateInterpolator_workgroup_size = kernel_wg_size;
793  else p->coordinateInterpolator_workgroup_size = p->CL_default_workgroup_size;
794 
795 #ifdef GPU_DEBUG
796  ConsoleMessage ("MAX_WORK_GROUP_SIZE %d\n",kernel_wg_size);
797  ConsoleMessage ("We are going to set our workgroup size to %d\n",wg_size);
798 
799 
800  /*
801  1. Get workGroupSize from clGetDeviceInfo with CL_DEVICE_mum of two values and use that value as your optimal workGroupSize
802  2. Get KernelWorkGroupSize from from clGetKernelWorkGroupInfo with CL_KERNEL_WORK_GPOUP_SIZE
803  3. Get minimum of two values and use that value as your optimal workGroupSize
804  */
805 
806 #endif // GPU_DEBUG
807 }
808 
809 static int printOnce = FALSE;
810 
811 /* actually do the interpolator for each and every parent - eg, a Coordinate destination may have multiple
812  parents, of course, (DEF, multi USE) */
813 
814 #define TEST_GLOBAL
815 #ifdef TEST_GLOBAL
816 cl_mem myK = NULL;
817 cl_mem myKV = NULL;
818 cl_mem myVert = NULL;
819 #endif
820 
821 
822 static void runItOnce(cl_kernel myKernel, GLuint keyVBO, GLuint keyValueVBO, GLuint destVBO, int keysIn, int keyValuesIn, float frac) {
823  cl_int err;
824  size_t global_work_size;
825  size_t local_work_size;
826 
827  //printf ("runItOnce...\n");
828 
829 #ifdef TESTING
830  /* TESTING */
831  float rvs[2000];
832  int i;
833  cl_mem output_buffer;
834 #endif //TESTING
835 
836 #ifdef TESTING
837  printf ("calling glFinish()\n");
838  glFinish();
839 
840  printf ("runItOnce, frac %f keysIn %d keyValuesIn %d\n",frac,keysIn,keyValuesIn);
841 
842 #endif //TESTING
843 
844 #ifndef TEST_GLOBAL
845  // set up pointers to buffers
846  cl_mem myK, myKV,myVert;
847 #endif
848 
849  ppOpenCL_Utils p;
850  ttglobal tg = gglobal();
851  p = (ppOpenCL_Utils)tg->OpenCL_Utils.prv;
852 
853 #ifdef TEST_GLOBAL
854  if (myK==NULL) myK = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyVBO, &err);
855  TEST_ERR("clCreateFromGLBuffer 1",err);
856 
857  if (myKV == NULL) myKV = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyValueVBO, &err);
858  TEST_ERR("clCreateFromGLBuffer 2",err);
859 
860 
861  if (myVert==NULL) myVert = clCreateFromGLBuffer(p->CL_context, CL_MEM_WRITE_ONLY, destVBO, &err);
862  TEST_ERR("clCreateFromGLBuffer 3",err);
863  clFinish(p->CL_queue);
864 
865 #else
866  myK = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyVBO, &err);
867  TEST_ERR("clCreateFromGLBuffer 1",err);
868 
869  myKV = clCreateFromGLBuffer(p->CL_context, CL_MEM_READ_ONLY, keyValueVBO, &err);
870  TEST_ERR("clCreateFromGLBuffer 2",err);
871 
872 
873  myVert = clCreateFromGLBuffer(p->CL_context, CL_MEM_WRITE_ONLY, destVBO, &err);
874  TEST_ERR("clCreateFromGLBuffer 3",err);
875  clFinish(p->CL_queue);
876 #endif //TEST_GLOBAL
877 
878 
879 
880  /* TESTING */
881  //printf ("acquiring objects\n");
882  err = clEnqueueAcquireGLObjects(p->CL_queue, 1, &myVert, 0, NULL, NULL);
883  TEST_ERR("clEnqueueAcquire",err);
884 
885 
886 
887 
888  //send along the values as arguments to the CL kernel
889  err = clSetKernelArg(myKernel, 0, sizeof(cl_mem), &myK);
890  TEST_ERR("clSetKernelArg",err);
891  err = clSetKernelArg(myKernel, 1, sizeof(cl_mem), &myKV);
892  TEST_ERR("clSetKernelArg",err);
893  err = clSetKernelArg(myKernel, 2, sizeof(cl_mem), &myVert);
894  TEST_ERR("clSetKernelArg",err);
895  err =clSetKernelArg(myKernel, 3, sizeof(int), &keysIn);
896  TEST_ERR("clSetKernelArg",err);
897  err =clSetKernelArg(myKernel, 4, sizeof(int), &keyValuesIn);
898  TEST_ERR("clSetKernelArg",err);
899  err =clSetKernelArg(myKernel, 5, sizeof(float), &frac);
900  TEST_ERR("clSetKernelArg",err);
901 
902 
903  /* testing */
904 #ifdef TESTING
905  output_buffer = clCreateBuffer(p->CL_context, CL_MEM_WRITE_ONLY, sizeof(float) * keyValuesIn/keysIn, NULL, NULL);
906  err = clSetKernelArg(myKernel, 6, sizeof(cl_mem), &output_buffer);
907  TEST_ERR("clSetKernelArg",err);
908 #endif //TESTING
909 
910 
911 
912 
913 
914  // global work group size
915 #define MYWG (p->CL_default_workgroup_size)
916  // find out how many "blocks" we can have
917  if (MYWG > 0)
918  global_work_size = (size_t) (keysIn) / MYWG;
919  else global_work_size = 0;
920 
921  // add 1 to it, because we have to round up
922  global_work_size += 1;
923 
924  // now, global_work_size will be an exact multiple of local_work_size
925  global_work_size *= MYWG;
926 
927  //ConsoleMessage ("global_work_size is %d %x right now...\n",global_work_size, global_work_size);
928 
929  local_work_size = MYWG;
930  //ConsoleMessage ("local_work_size %d\n",local_work_size);
931  //ConsoleMessage ("ntri %d, global_work_size %d, local_work_size %d\n",ntri,global_work_size,local_work_size);
932 
933  //printf ("calling kernel local_work_size %d, global_Work_size %d\n",local_work_size, global_work_size);
934  err = clEnqueueNDRangeKernel(p->CL_queue, myKernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
935  TEST_ERR("clEnqueueNDRangeKernel", err);
936 
937  //printf ("called kernel\n");
938 
939 #ifdef TESTING
940  clFinish(p->CL_queue);
941 
942  printf ("past clFinish\n");
943 #endif //TESTING
944 
945 
946  /* TESTING */
947  /* glBindBuffer(GL_ARRAY_BUFFER,destVBO);
948  glGetBufferSubData(GL_ARRAY_BUFFER, 0, sizeof (float) * 24, rvs);
949  for (i=0; i<8; i++) {
950  printf ("glBufferData is %d %f\n",i,rvs[i]);
951  }
952  */
953 
954 #ifndef TEST_GLOBAL
955  err = clEnqueueReleaseGLObjects(p->CL_queue, 1, &myKV, 0, NULL, NULL);
956  TEST_ERR("clEnqueueRelease",err);
957 
958  err = clReleaseMemObject(myK) || clReleaseMemObject(myKV) || clReleaseMemObject(myVert);
959  TEST_ERR("clReleaseMemObject",err);
960 
961 #endif // TEST_GLOBAL
962 
963 #ifdef TESTING
964 
965  err = clEnqueueReadBuffer (p->CL_queue, output_buffer,
966  CL_TRUE, 0, sizeof(float) * 6 /* keyValuesIn/keysIn */,
967  rvs, 0, NULL, NULL);
968 
969  if (err != CL_SUCCESS) {
970  printCLError("clEnqueueReadBuffer",err);
971  return;
972  }
973 
974 
975  for (i=0; i < (keyValuesIn/keysIn); i++) {
976  printf ("rv %d is %f\n", i, rvs[i]);
977  }
978  clReleaseMemObject(output_buffer);
979 
980 #endif //TESTING
981 
982 
983 
984 
985 }
986 
987 /* do an interpolator on the GPU - the destination will be on the GPU, too */
988 void runOpenCLInterpolator(struct CRStruct *route, struct X3D_Node * toNode, int toOffset) {
989  GLuint keyVBO = 0;
990  GLuint keyValueVBO = 0;
991  GLuint destVBO = 0;
992  int keysIn = 0;
993  int keyValuesIn = 0;
994  float frac = 0.0;
995 
996 
997 
998  if (!printOnce) {
999  printf ("RUNNING OPENCL INTERPOLATOR PROGRAM %p\n",route->CL_Interpolator);
1000  printf ("it is coming from a %s\n",stringNodeType(route->routeFromNode->_nodeType));
1001  printf ("and, it is going to a %s\n",stringNodeType(toNode->_nodeType));
1002  printf ("with a length of %d\n",route->len);
1003  }
1004 
1005  if (route->CL_Interpolator == NULL) {
1006  printf ("runCLInterpolator - interpolator is NULL??\n");
1007  return;
1008  }
1009 
1010  if ((toNode == NULL) || (route->routeFromNode == NULL)) {
1011  printf ("runCLInterpolator - error - destination or source NULL\n");
1012  return;
1013  }
1014 
1015  // gather info here
1016  switch (route->routeFromNode->_nodeType) {
1017  case NODE_CoordinateInterpolator: {
1018  struct X3D_CoordinateInterpolator *px = (struct X3D_CoordinateInterpolator *) route->routeFromNode;
1019  keyVBO = px->_keyVBO;
1020  keyValueVBO = px->_keyValueVBO;
1021  keysIn = px->key.n;
1022  keyValuesIn = px->keyValue.n;
1023  frac = px->set_fraction;
1024  break;
1025  }
1026  default: ConsoleMessage ("do not route from a node of %s on the GPU - help!\n",
1027  stringNodeType(route->routeFromNode->_nodeType));
1028  }
1029 
1030  if ((keyVBO == 0) || (keyValueVBO == 0)) {
1031  printf ("runCLInterpolator - error - source VBOS are %d %d, should not be zero\n",
1032  keyVBO, keyValueVBO);
1033  return;
1034  }
1035 
1036  switch (toNode->_nodeType) {
1037  case NODE_Coordinate: {
1038  struct X3D_Coordinate *px = X3D_COORDINATE(toNode);
1039  int i;
1040  //printf ("haveCoordinateHere...\n");
1041 
1042  for (i=0; i<vectorSize(px->_parentVector); i++) {
1043  struct X3D_Node * me = vector_get(struct X3D_Node *, px->_parentVector, i);
1044  //printf ("parent %d of %d is %s\n",i,vectorSize(px->_parentVector), stringNodeType(me->_nodeType));
1045  struct X3D_PolyRep pr = *(me->_intern);
1046  //printf ("polyrep buffer is %d\n",pr.VBO_buffers[VERTEX_VBO]);
1047  destVBO = pr.VBO_buffers[VERTEX_VBO];
1048 
1049  if (destVBO != 0) {
1050  runItOnce(route->CL_Interpolator, keyVBO, keyValueVBO, destVBO, keysIn, keyValuesIn, frac);
1051  }
1052  }
1053  //destVBO = px->
1054  break;
1055  }
1056  default: ConsoleMessage ("do not route from a node of %s on the GPU - help!\n",
1057  stringNodeType(route->routeFromNode->_nodeType));
1058  }
1059 
1060  if (!printOnce) {
1061  printf ("so, if we were to run the interp, keyVBO %d keyValueVBO %d destVBO %d keysIn %d keyValuesIn %d frac %f\n",
1062  keyVBO, keyValueVBO, destVBO, keysIn, keyValuesIn, frac);
1063  printOnce = TRUE;
1064  }
1065 }
1066 
1067 #ifdef GL_ES_VERSION_2_0
1068 static const char* interpolator_headers = " \
1069 //#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n\
1070 #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable \n\
1071 //#pragma OPENCL EXTENSION CL_APPLE_gl_sharing : enable \n\
1072 //#pragma OPENCL EXTENSION CL_KHR_gl_sharing : enable \n\
1073 //#pragma OPENCL EXTENSION cl_khr_select_fprounding_mode : enable \n\
1074 ";
1075 #else
1076 
1077 // this seems to be ok on AMD drivers under Linux
1078 static const char* interpolator_headers = " \
1079 #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable \n\
1080 //#pragma OPENCL EXTENSION cl_khr_gl_sharing : enable \n\
1081 ";
1082 
1083 #endif
1084 
1085 static const char* coordinateInterpolator_kernel = " \
1086 /* Function prototypes */ \n \
1087 int find_key (int kin, float frac, __global float *keys); \n \
1088 \n \
1089 int find_key (int kin, float frac, __global float *keys) { \n \
1090 int counter; \n \
1091 \
1092 for (counter=1; counter <= kin; counter++) { \n \
1093  if (frac <keys[counter]) { \n \
1094  return counter; \n \
1095  } \n \
1096 } \n \
1097 return kin; /* huh? not found! */ \n \
1098 } \
1099 \
1100 /********************************************************************************/ \n\
1101 \n\
1102 __kernel void compute_collide ( \n\
1103 __global float *keys, /* 0 */ \n\
1104 __global float *keyValues, /* 1 */ \n \
1105 __global float *destVertices, /* 2 */ \n \
1106 const int kin, /* 3 */ \n\
1107 const int kvin, /* 4 */ \n \
1108 const float frac /* 5 */ \n \
1109 /* , __global float *output */ \n \
1110 ) { \n\
1111 \n\
1112  int i_am_canadian = get_global_id(0); \n\
1113 \
1114  /* get keysPerKeyValue */ \n \
1115  int kpkv = kvin/kin; \n\
1116 if (i_am_canadian > kpkv) return; /* this invocation is above our bounds */ \n\
1117 \
1118 //output[i_am_canadian] = -999.9f; /* convert_float(get_global_id(0)); */ /* keys[kin-1]; */ \n \
1119 \
1120 \
1121 //output[i_am_canadian] = destVertices[i_am_canadian]; \n \
1122  \
1123  /* set fraction less than or greater than keys */ \n\
1124  if (frac <= keys[0]) { \n\
1125 //output[i_am_canadian] = -100.0f; \n \
1126  destVertices[i_am_canadian*3+0] = keyValues[i_am_canadian*3+0]; \n \
1127  destVertices[i_am_canadian*3+1] = keyValues[i_am_canadian*3+1]; \n \
1128  destVertices[i_am_canadian*3+2] = keyValues[i_am_canadian*3+2]; \n \
1129  } else if (frac >=keys[kin-1]) { \n \
1130 //output[i_am_canadian] = 100.0f; \n\
1131  destVertices[i_am_canadian*3+0] = keyValues[(kvin - kpkv + i_am_canadian)*3+0]; \n \
1132  destVertices[i_am_canadian*3+1] = keyValues[(kvin - kpkv + i_am_canadian)*3+1]; \n \
1133  destVertices[i_am_canadian*3+2] = keyValues[(kvin - kpkv + i_am_canadian)*3+2]; \n \
1134  } else { \n \
1135  int myKey = find_key(kin,frac,keys); \n \
1136  float interval = (frac - keys[myKey-1]) / (keys[myKey] - keys[myKey-1]); \n \
1137 //output[i_am_canadian] = convert_float(myKey*100+kpkv); \n \
1138 \
1139  int thisone = myKey*kpkv*3; \n \
1140  int prevone = (myKey-1) * kpkv *3; \n \
1141 //output[i_am_canadian] = convert_float(thisone * 100 + prevone)+interval; \n \
1142  prevone = prevone + i_am_canadian*3; \n \
1143  thisone = thisone + i_am_canadian*3; \n \
1144  destVertices[i_am_canadian*3+0] = keyValues[prevone+0] + interval*(keyValues[thisone+0]-keyValues[prevone+0]); \n \
1145  destVertices[i_am_canadian*3+1] = keyValues[prevone+1] + interval*(keyValues[thisone+1]-keyValues[prevone+1]); \n \
1146  destVertices[i_am_canadian*3+2] = keyValues[prevone+2] + interval*(keyValues[thisone+2]-keyValues[prevone+2]); \n \
1147  } \n \
1148  //output[i_am_canadian] = destVertices[i_am_canadian*3]; \n \
1149 \n \
1150 }";
1151 
1152 
1153 
1154 #endif //HAVE_OPENCL