Surface Blur algo in C++ by opencl Process returned (0xC0000005)

0

When I code Surface Blur algorithm by opencl ,I meet this problem .

I think I have not exceed the length (My picture is 512*512)

I try to test BlurredProcessDown function is ok but when I want to add to BD ,GD,RD .

It return "Process returned -1073741819 (0xC0000005)"

I don't know why

Here's the code:(Face.cl)

int checkrange(int value){
    if (value > 255){
        return 255;
    }else if (value < 0){
        return 0;
    }else{
        return value;
    }
}

double BlurredProcessDown(int xi, int x, int T){
        double w = 1 - (abs_diff(xi,x) / (2.5 * T));
        if (w < 0){
            return 0;
        }else{
            return w;
        }

    }

__kernel void Face(__global uchar* sR, __global uchar* sG, __global uchar* sB,__global uchar* tR, __global uchar* tG, __global uchar* tB)
{   
    int id = get_global_id(0);
    int offset=7;

    double RD = 0, GD = 0, BD = 0;
    double RU = 0, GU = 0, BU = 0;
    double BPD=0;
    int index;
    if(id/512<offset||id%512<offset||id/512>511-offset||id%512>511-offset) 
    //I think this code is already filter id which cause index exceed 
    {
        tR[id]=sR[id];
        tG[id]=sG[id];
        tB[id]=sB[id];

    }else{       

        for(int i=-offset;i!=offset;++i){

            for(int j=-offset;j!=offset;++j){

                index=id+i*512+j;

                BPD = BlurredProcessDown((int)sR[index], (int)sR[id], 15);
                RD =RD+ BPD;
                RU =RU+ (BPD * (double)sR[index]);

                BPD = BlurredProcessDown((int)sG[index], (int)sG[id], 15);
                GD =GD+ BPD;
                GU =GU+ (BPD * (double)sG[index]);

                BPD = BlurredProcessDown((int)sB[index], (int)sB[id], 15);
                BD =BD+ BPD;
                BU =BU+ (BPD * (double)sB[index]);

            }                                
        }
        tR[id] = (unsigned)checkrange((int)(RU / RD));
        tG[id] = (unsigned)checkrange((int)(GU / GD));
        tB[id] = (unsigned)checkrange((int)(BU / BD));                                
    }                           
}

host code:

typedef struct IMG_obj {
    unsigned char* InputSrc, * InputData;       // Input Image: 2048x2048
    unsigned char* sR, * sG, * sB,*sY;                // Input data
    unsigned short        IMGW, IMGH;               // Input Size

} IMGObj, * pImgObj;
int OCL_Blurred_Surface(IMGObj *IObj){
    int     width = IMG_W, height = IMG_H;//512*512
    int     x, y, index=0, off=0;
    // read RGB Data


    for (y = 0; y != height; y++)
    {

        for (x = 0; x != width; x++)
        {

            IObj->sR[off] = IObj->InputSrc[index + 2];
            IObj->sG[off] = IObj->InputSrc[index + 1];
            IObj->sB[off] = IObj->InputSrc[index + 0];

            off++;
            index+=3;
        }
    }

    //
    //===========OpenCL============pre process
    cl_uint status;
    cl_platform_id platform;
    status=clGetPlatformIDs(1,&platform,NULL);

    cl_device_id device;
    clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);

    cl_context context=clCreateContext(NULL,1,&device,NULL,NULL,NULL);

    cl_command_queue queue =clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,NULL);
    //===========OpenCL============pre process

    //read only rgb data
    cl_mem clbufR=clCreateBuffer(context,
    CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
    IMG_Size*sizeof(cl_uchar),IObj->sR,NULL);

    cl_mem clbufG=clCreateBuffer(context,
    CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
    IMG_Size*sizeof(cl_uchar),IObj->sG,NULL);

    cl_mem clbufB=clCreateBuffer(context,
    CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
    IMG_Size*sizeof(cl_uchar),IObj->sB,NULL);
    //read only rgb data


    //write only rgb data
    cl_mem clbufWOR=clCreateBuffer(context,
    CL_MEM_WRITE_ONLY,
    IMG_Size*sizeof(cl_uchar),NULL,NULL);

    cl_mem clbufWOG=clCreateBuffer(context,
    CL_MEM_WRITE_ONLY,
    IMG_Size*sizeof(cl_uchar),NULL,NULL);

    cl_mem clbufWOB=clCreateBuffer(context,
    CL_MEM_WRITE_ONLY,
    IMG_Size*sizeof(cl_uchar),NULL,NULL);

    //write only rgb data

    const char * filename ="Face.cl";
    std::string sourceStr;
    status =convertToString(filename,sourceStr);
    const char * source=sourceStr.c_str();
    size_t sourceSize[]={strlen(source)};
    //創建程序對象

    cl_program program=clCreateProgramWithSource(context,
                                                 1,
                                                 &source,
                                                 sourceSize,
                                                 NULL);
    //編譯程序對象
    status =clBuildProgram(program,1,&device,NULL,NULL,NULL);
    if(status!=0){
        printf("clBuild failed:%d\n",status);
        char tbuf[0x10000];
        clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,0x10000,tbuf,NULL);
        printf("%s\n",tbuf);
        return -1;
    }else{
       // printf("clBuild sucess\n");
    }
    //創建kernel對象
    cl_kernel kernel =clCreateKernel(program,"Face",NULL);

    //set arg
    cl_int clnum=IMG_Size;
    //source rgb
    clSetKernelArg(kernel,0,sizeof(cl_mem),(void*)&clbufR);
    clSetKernelArg(kernel,1,sizeof(cl_mem),(void*)&clbufG);
    clSetKernelArg(kernel,2,sizeof(cl_mem),(void*)&clbufB);
    //target rgb
    clSetKernelArg(kernel,3,sizeof(cl_mem),(void*)&clbufWOR);
    clSetKernelArg(kernel,4,sizeof(cl_mem),(void*)&clbufWOG);
    clSetKernelArg(kernel,5,sizeof(cl_mem),(void*)&clbufWOB);

    //printf("set arg sucess\n");
    //run
    double Start_Time, End_Time, All_Time;
    Start_Time = clock();

    cl_event ev;
    size_t global_work_size=IMG_Size;
    clEnqueueNDRangeKernel(queue,
                           kernel,
                           1,
                           NULL,
                           &global_work_size,
                           NULL,0,NULL,&ev);
    clFinish(queue);
    End_Time= clock();
    All_Time = (End_Time-Start_Time) / CLOCKS_PER_SEC;

    printf("GPU Time:%10.5fsec\n", All_Time);
    //Copy to host
    cl_uchar *tR,*tG,*tB;

    tR=(cl_uchar*)clEnqueueMapBuffer(queue,
        clbufWOR,CL_TRUE,CL_MAP_READ,0,
        IMG_Size*sizeof(cl_uchar),
        0,NULL,NULL,NULL);

    tG=(cl_uchar*)clEnqueueMapBuffer(queue,
        clbufWOG,CL_TRUE,CL_MAP_READ,0,
        IMG_Size*sizeof(cl_uchar),
        0,NULL,NULL,NULL);

    tB=(cl_uchar*)clEnqueueMapBuffer(queue,
        clbufWOB,CL_TRUE,CL_MAP_READ,0,
        IMG_Size*sizeof(cl_uchar),
        0,NULL,NULL,NULL);
    //save
    SSaveIMGOCL(tR,tG,tB,"OCL.bmp");

    return 0;
}

Thank you for reading QAQ

c++
opencl
asked on Stack Overflow Jan 6, 2020 by TC code • edited Jan 7, 2020 by TC code

0 Answers

Nobody has answered this question yet.


User contributions licensed under CC BY-SA 3.0