OpenCL clEnqueueNDRangeKernel Exception

-1

I run my OpenCL program and a clEnqueueNDRangeKernel always give me an exception even I get CL_SUCCESS back from all other requirement.

Unhandled exception at 0x55A7BAB2 (nvopencl.dll) in ******.exe: 0xC000041D: An unhandled exception was encountered during a user callback.

Here is the link to my code on github: Source Code

Exception on file "DrawingProcess.cpp" line 955. My kernels locates in "Renderer.h", check "Hierarchization".

Kernel

"__kernel void Hierarchization(\n",
"__global ushort2 *ScrPos,\n",
"const float4 CamPos,\n",
"const float4 CamNorV1,\n",                 //W represents horizontal resolution.
"const float4 CamNorV3,\n",                 //W represents diagonal resolution.
"__global float4 *AllVert,\n",
"__global ushort4 *DltIdx,\n",
"__global float4 *PixVect,\n",
"__global bool *Polar,\n",
"__global int *TopDltIdx)\n",
"{\n",
"   float Deep[SCOUNTER1*SCOUNTER2],Distance[SCOUNTER1*SCOUNTER2];\n",
"   int GID1=(int)get_global_id(0);\n",
"   int GID2=(int)get_global_id(1);\n",
"   int2 v,u;\n",
"   int a[SCOUNTER1*SCOUNTER2],b[SCOUNTER1*SCOUNTER2];\n",
"   float4 m,n,l;\n",
"   float4 p1[SCOUNTER1*SCOUNTER2],p2[SCOUNTER1*SCOUNTER2];\n",
"   float t[SCOUNTER1*SCOUNTER2];",
"   float4 focus[SCOUNTER1*SCOUNTER2];",
"   Deep[GID1+SCOUNTER1*GID2]=-1;\n",
"   for(uint i=0;i<TCOUNTER;i++){\n",
"       if(Polar[i]==true){\n",
"           v.xy=(int)(ScrPos[DltIdx[i].y].xy-ScrPos[DltIdx[i].x].xy);\n",
"           u.xy=(int)(ScrPos[DltIdx[i].z].xy-ScrPos[DltIdx[i].x].xy);\n",
//          Simplify:   Px=Vx*a+Ux*b,
//                      Py=Vy*a+Uy*b.
"           b[GID1+SCOUNTER1*GID2]=(GID2*v.x-GID1*v.y)/(u.y*v.x-u.x*v.y);\n",
"           a[GID1+SCOUNTER1*GID2]=(GID1-u.x*b[GID1+SCOUNTER1*GID2])/v.x;\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
"           if(a[GID1+SCOUNTER1*GID2]>=0&&b[GID1+SCOUNTER1*GID2]>=0&&a[GID1+SCOUNTER1*GID2]+b[GID1+SCOUNTER1*GID2]<=1){\n",
//              Plane Equation
"               m=AllVert[DltIdx[i].y]-AllVert[DltIdx[i].x];\n",
"               n=AllVert[DltIdx[i].z]-AllVert[DltIdx[i].x];\n",
"               l=cross(m,n);\n",
"               l.w=l.x*(-m.x)+l.y*(-m.y)+l.z*(-m.z);\n",
//              Two Points in Linear Equation
"               p1[GID1+SCOUNTER1*GID2]=CamPos;\n",
"               p2[GID1+SCOUNTER1*GID2]=p1[GID1+SCOUNTER1*GID2]+PixVect[GID1+SCOUNTER1*GID2];",
//              (x-x1)(x2-x1)=(y-y1)(y2-y1)=(z-z1)(z2-z1)=t
//              ax+by+cz+d=0
"               t[GID1+SCOUNTER1*GID2]=(l.x*p1[GID1+SCOUNTER1*GID2].x+l.y*p1[GID1+SCOUNTER1*GID2].y+l.z*p1[GID1+SCOUNTER1*GID2].z+l.w)/(l.x*(p1[GID1+SCOUNTER1*GID2].x-p2[GID1+SCOUNTER1*GID2].x)+l.y*(p1[GID1+SCOUNTER1*GID2].y-p2[GID1+SCOUNTER1*GID2].y)+l.z*(p1[GID1+SCOUNTER1*GID2].z-p2[GID1+SCOUNTER1*GID2].z));\n",
"               focus[GID1+SCOUNTER1*GID2]=(p2[GID1+SCOUNTER1*GID2]-p1[GID1+SCOUNTER1*GID2])*t[GID1+SCOUNTER1*GID2]+p1[GID1+SCOUNTER1*GID2];\n",
"               p1[GID1+SCOUNTER1*GID2].w=0;\n",
"               focus[GID1+SCOUNTER1*GID2].w=0;\n",
"               Distance[GID1+SCOUNTER1*GID2]=fast_distance(focus[GID1+SCOUNTER1*GID2],p1[GID1+SCOUNTER1*GID2]);\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
"               if(Deep[GID1+SCOUNTER1*GID2]<0||Deep[GID1+SCOUNTER1*GID2]>Distance[GID1+SCOUNTER1*GID2]){\n"
"                   TopDltIdx[GID1+SCOUNTER1*GID2]=i;\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
"               }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
"           }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
"       }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
"   }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
"}\n",

Disassemble; Cash on:

00DE467C  mov         dword ptr ds:[016EBC68h],eax

The one called Hierarchization does not work. Exception on status=clEnqueueNDRangeKernel(CommandQueue,Hierarchization,2,NULL,GlobalThread3,LocalThread3,0,NULL,NULL); All 9 related clSetKernelArg return CL_SUCCESS. I really cannot find the problem. There are 5 kernels run on this program, this is the 3rd one. All other kernels do not have exceptions. The first two can give correct outcome that I want.

GPU: GT750M. I use GDK v7.5. Plz help. Thank you! Happy new year!

c++
opencl
gpu
asked on Stack Overflow Jan 2, 2016 by Soban • edited Jan 6, 2016 by ARK

1 Answer

0

I solved it!

In file "DrawingProcess.cpp". Line 20. Change "cl_mem BufIdx[12];" to "cl_mem BufIdx[14];".

answered on Stack Overflow Jan 6, 2016 by Soban

User contributions licensed under CC BY-SA 3.0