I have a D3D12 compute shader (Shader Model 6.0) that compiles successfully. When I compile it with optimization disabled (/Od), it runs fine, the outputs are correct.
When I enabled optimization, it also compiles, but crashes when I am creating the compute pipeline state. I get the following output when I called CreateComputePipelineState
Exception thrown at 0x00007FFE1A2FC800 (nvwgf2umx.dll) in D3DCompute.exe: 0xC0000005: Access violation reading location 0x0000000000000008.
The debug layer didn't complain about anything either. Other shaders I wrote work fine, and I checked this problematic shader, I am sure there is no out of bounds behaviour. Why does this happen?
Edit: minimal shader to reproduce this problem
#define MACRO_MAX(a, b) (((b) > (a)) ? (b) : (a))
#define BINS_TRACKED_PER_THREAD 2
#define BLOCK_THREADS 64
#define RADIX_BITS 7
#define RADIX_DIGITS 128
#define LOG_PACKTING_RATIO 1
#define LOG_COUNTER_LANES 6
#define COUNTER_LANES 64
#define MASK ((1 << 16) - 1)
groupshared uint TempStorage[8192];
[numthreads(BLOCK_THREADS, 1, 1)]
void main(uint blockIdx : SV_GroupID, uint threadIdx : SV_GroupThreadID)
{
uint exDigitPrefix[BINS_TRACKED_PER_THREAD];
{
[unroll(BINS_TRACKED_PER_THREAD)]
for (uint track = 0; track < BINS_TRACKED_PER_THREAD; ++track) {
uint binIdx = (threadIdx * BINS_TRACKED_PER_THREAD) + track;
if ((BLOCK_THREADS == RADIX_DIGITS) || (binIdx < RADIX_DIGITS)) {
uint counterLane = binIdx & (COUNTER_LANES - 1);
uint subCounter = binIdx >> (LOG_COUNTER_LANES);
exDigitPrefix[track] = (TempStorage[counterLane * BLOCK_THREADS] >> (16 * subCounter)) & MASK;
}
}
}
GroupMemoryBarrierWithGroupSync();
{
[unroll(BINS_TRACKED_PER_THREAD)]
for (uint track = 0; track < BINS_TRACKED_PER_THREAD; ++track) {
uint binIdx = (threadIdx * BINS_TRACKED_PER_THREAD) + track;
if ((BLOCK_THREADS == RADIX_DIGITS) || (binIdx < RADIX_DIGITS))
TempStorage[binIdx] = exDigitPrefix[track];
}
}
GroupMemoryBarrierWithGroupSync();
}
User contributions licensed under CC BY-SA 3.0