My plan is simple, I want to render some polygons on the GPU. For that I have a SceneData
struct which holds the necessary data and then passed to the kernel for further work. So these are the structs
struct SceneData
{
int numOfObjectsToRender = 0;
float(*cameraToWorld)[4] = nullptr;
float(*cameraInverseProjection)[4] = nullptr;
GameObject**objectsToRender = nullptr;
};
struct GameObject
{
int numOfVertices;
float(*matrix)[4] = nullptr;
int numOfTriangles;
float3 *vertices = nullptr;
float3 *worldVertices = nullptr;
int* triangles = nullptr;
int id;
};
I have a global variable SceneData sd
, which has a variable objectsToRender
that should be an array of GameObject
s.
I wrote the following function which builds the array:
void setGameObjects(int n, GameObject* go)
{
sd.numOfObjectsToRender = n;
GameObject**gs;// = (GameObject**)malloc(n * sizeof(GameObject));
cudaMalloc((void**)&gs, n * sizeof(GameObject));
for (int i = 0; i < n; i++)
{
// Copy triangles to GPU
int *triangles;
cudaMalloc((void**)&triangles, go[i].numOfTriangles * sizeof(int));
cudaMemcpy(triangles, go[i].triangles, go[i].numOfTriangles * sizeof(int), cudaMemcpyHostToDevice);
// Copy vertices to GPU
float3 *worldVertices;
cudaMalloc((void**)&worldVertices, go[i].numOfVertices * sizeof(float3));
cudaMemcpy(worldVertices, go[i].worldVertices, go[i].numOfVertices * sizeof(float3), cudaMemcpyHostToDevice);
// Copy transformation matrix
float(*matrix)[4];
cudaMalloc((void**)&matrix, 16 * sizeof(float));
cudaMemcpy(worldVertices, go[i].matrix, 16 * sizeof(float), cudaMemcpyHostToDevice);
GameObject *g, p_g;
p_g = GameObject();
p_g.id = go[i].id;
p_g.numOfTriangles = go[i].numOfTriangles;
p_g.numOfVertices = go[i].numOfVertices;
p_g.matrix = matrix;
p_g.triangles = triangles;
p_g.worldVertices = worldVertices;
cudaMalloc((void**)&g, sizeof(GameObject));
cudaMemcpy(g, &p_g, sizeof(GameObject), cudaMemcpyHostToDevice);
cudaMalloc((void**)&gs[i], sizeof(GameObject));
//This is where I get an error
cudaMemcpy(gs[i], g, sizeof(GameObject), cudaMemcpyHostToDevice);
}
sd.objectsToRender = gs;
}
It all compiles, but crashed at runtime on the last line of the for loop with the following error
Exception thrown at 0x00007FF98EA947EF (nvcuda.dll) in PathTracing3.exe: 0xC0000005: Access violation writing location 0x0000000603E00400.
If there is a handler for this exception, the program may be safely continued.
I tried a lot of different ways to do this and this is my latest attempted version.. I also tried to define the variable objectsToRender
as both GameObject *objectsToRender
and GameObject**objectsToRender
, but they both crash pretty similarly...
EDIT:
I got it to work, thanks to talonmies' help. I modified the function to be as follows
void setGameObjects(int n, GameObject* go)
{
sceneData.numOfObjectsToRender = n;
GameObject**p_gs = (GameObject**)malloc(n * sizeof(GameObject));
for (int i = 0; i < n; i++)
{
// Copy triangles to GPU
int *triangles;
cudaMalloc((void**)&triangles, go[i].numOfTriangles * sizeof(int));
cudaMemcpy(triangles, go[i].triangles, go[i].numOfTriangles * sizeof(int), cudaMemcpyHostToDevice);
// Copy vertices to GPU
float3 *worldVertices;
cudaMalloc((void**)&worldVertices, go[i].numOfVertices * sizeof(float3));
cudaMemcpy(worldVertices, go[i].worldVertices, go[i].numOfVertices * sizeof(float3), cudaMemcpyHostToDevice);
// Copy transformation matrix
float(*matrix)[4];
cudaMalloc((void**)&matrix, 16 * sizeof(float));
cudaMemcpy(worldVertices, go[i].matrix, 16 * sizeof(float), cudaMemcpyHostToDevice);
GameObject *g, p_g;
p_g = GameObject();
p_g.id = go[i].id;
p_g.numOfTriangles = go[i].numOfTriangles;
p_g.numOfVertices = go[i].numOfVertices;
p_g.matrix = matrix;
p_g.triangles = triangles;
p_g.worldVertices = worldVertices;
cudaMalloc((void**)&g, sizeof(GameObject));
cudaMemcpy(g, &p_g, sizeof(GameObject), cudaMemcpyHostToDevice);
//cudaMalloc((void**)&gs[i], sizeof(GameObject));
p_gs[i] = g;
}
cudaMalloc((void**)&(sceneData.objectsToRender), n * sizeof(GameObject));
cudaMemcpy(sceneData.objectsToRender, &p_gs, sizeof(GameObject), cudaMemcpyHostToDevice);
}
Now my problem is to access the data from within the kernel..
I don't really know how to debug the kernel so I'm using printf
to diagnose my issues.. For example I want to see if the data was passed successfully, so I'm trying the following kernel:
__global__ void render(SceneData sd)
{
GameObject**p = sd.objectsToRender;
printf("Number of vertices: &d\n", (*p[0]).numOfTriangles);
}
but it doesn't print anything.. is it because it encountered a crash? If I run the next kernel it is executed properly
__global__ void render(SceneData sd)
{
GameObject**p = sd.objectsToRender;
printf("TESTING \n");
}
A lot of lines with the text TESTING
is printed onto the console.
What am I missing?
User contributions licensed under CC BY-SA 3.0