CUDA: Define a struct array within a struct to pass to a kernel

Question

CUDA: Define a struct array within a struct to pass to a kernel

My plan is simple, I want to render some polygons on the GPU. For that I have a SceneData struct which holds the necessary data and then passed to the kernel for further work. So these are the structs

struct SceneData
{
    int numOfObjectsToRender = 0;
    float(*cameraToWorld)[4] = nullptr;
    float(*cameraInverseProjection)[4] = nullptr;
    GameObject**objectsToRender = nullptr;
};

struct GameObject
{
    int numOfVertices;
    float(*matrix)[4] = nullptr;
    int numOfTriangles;
    float3 *vertices = nullptr;
    float3 *worldVertices = nullptr;
    int* triangles = nullptr;
    int id;
};

I have a global variable SceneData sd, which has a variable objectsToRender that should be an array of GameObjects. I wrote the following function which builds the array:

void setGameObjects(int n, GameObject* go)
{
    sd.numOfObjectsToRender = n;

    GameObject**gs;// = (GameObject**)malloc(n * sizeof(GameObject));
    cudaMalloc((void**)&gs, n * sizeof(GameObject));

    for (int i = 0; i < n; i++)
    {
        // Copy triangles to GPU
        int *triangles;
        cudaMalloc((void**)&triangles, go[i].numOfTriangles * sizeof(int));
        cudaMemcpy(triangles, go[i].triangles, go[i].numOfTriangles * sizeof(int), cudaMemcpyHostToDevice);

        // Copy vertices to GPU
        float3 *worldVertices;
        cudaMalloc((void**)&worldVertices, go[i].numOfVertices * sizeof(float3));
        cudaMemcpy(worldVertices, go[i].worldVertices, go[i].numOfVertices * sizeof(float3), cudaMemcpyHostToDevice);

        // Copy transformation matrix
        float(*matrix)[4];
        cudaMalloc((void**)&matrix, 16 * sizeof(float));
        cudaMemcpy(worldVertices, go[i].matrix, 16 * sizeof(float), cudaMemcpyHostToDevice);

        GameObject *g, p_g;
        p_g = GameObject();
        p_g.id = go[i].id;
        p_g.numOfTriangles = go[i].numOfTriangles;
        p_g.numOfVertices = go[i].numOfVertices;

        p_g.matrix = matrix;
        p_g.triangles = triangles;
        p_g.worldVertices = worldVertices;

        cudaMalloc((void**)&g, sizeof(GameObject));
        cudaMemcpy(g, &p_g, sizeof(GameObject), cudaMemcpyHostToDevice);

        cudaMalloc((void**)&gs[i], sizeof(GameObject));

        //This is where I get an error
        cudaMemcpy(gs[i], g, sizeof(GameObject), cudaMemcpyHostToDevice);
    }

    sd.objectsToRender = gs;
}

It all compiles, but crashed at runtime on the last line of the for loop with the following error

Exception thrown at 0x00007FF98EA947EF (nvcuda.dll) in PathTracing3.exe: 0xC0000005: Access violation writing location 0x0000000603E00400.

If there is a handler for this exception, the program may be safely continued.

I tried a lot of different ways to do this and this is my latest attempted version.. I also tried to define the variable objectsToRender as both GameObject *objectsToRender and GameObject**objectsToRender, but they both crash pretty similarly...

EDIT:

I got it to work, thanks to talonmies' help. I modified the function to be as follows

void setGameObjects(int n, GameObject* go)
{
    sceneData.numOfObjectsToRender = n;

    GameObject**p_gs = (GameObject**)malloc(n * sizeof(GameObject));

    for (int i = 0; i < n; i++)
    {
        // Copy triangles to GPU
        int *triangles;
        cudaMalloc((void**)&triangles, go[i].numOfTriangles * sizeof(int));
        cudaMemcpy(triangles, go[i].triangles, go[i].numOfTriangles * sizeof(int), cudaMemcpyHostToDevice);

        // Copy vertices to GPU
        float3 *worldVertices;
        cudaMalloc((void**)&worldVertices, go[i].numOfVertices * sizeof(float3));
        cudaMemcpy(worldVertices, go[i].worldVertices, go[i].numOfVertices * sizeof(float3), cudaMemcpyHostToDevice);

        // Copy transformation matrix
        float(*matrix)[4];
        cudaMalloc((void**)&matrix, 16 * sizeof(float));
        cudaMemcpy(worldVertices, go[i].matrix, 16 * sizeof(float), cudaMemcpyHostToDevice);

        GameObject *g, p_g;
        p_g = GameObject();
        p_g.id = go[i].id;
        p_g.numOfTriangles = go[i].numOfTriangles;
        p_g.numOfVertices = go[i].numOfVertices;

        p_g.matrix = matrix;
        p_g.triangles = triangles;
        p_g.worldVertices = worldVertices;

        cudaMalloc((void**)&g, sizeof(GameObject));
        cudaMemcpy(g, &p_g, sizeof(GameObject), cudaMemcpyHostToDevice);

        //cudaMalloc((void**)&gs[i], sizeof(GameObject));
        p_gs[i] = g;
    }
    cudaMalloc((void**)&(sceneData.objectsToRender), n * sizeof(GameObject));
    cudaMemcpy(sceneData.objectsToRender, &p_gs, sizeof(GameObject), cudaMemcpyHostToDevice);
}

Now my problem is to access the data from within the kernel.. I don't really know how to debug the kernel so I'm using printf to diagnose my issues.. For example I want to see if the data was passed successfully, so I'm trying the following kernel:

__global__ void render(SceneData sd)
{
    GameObject**p = sd.objectsToRender;

    printf("Number of vertices: &d\n", (*p[0]).numOfTriangles);
}

but it doesn't print anything.. is it because it encountered a crash? If I run the next kernel it is executed properly

__global__ void render(SceneData sd)
{
    GameObject**p = sd.objectsToRender;

    printf("TESTING \n");
}

A lot of lines with the text TESTING is printed onto the console. What am I missing?

c++

arrays

struct

cuda

asked on Stack Overflow Oct 16, 2018 by

Roman • edited Oct 16, 2018 by

Roman

0 Answers

Nobody has answered this question yet.

User contributions licensed under CC BY-SA 3.0