What is the problem with generated SPIR-V code and how to verify it?

0

I have some generated SPIR-V code which I want to use with the vulkan API. But I get an Exception thrown at 0x00007FFB68D933CB (nvoglv64.dll) in vulkanCompute.exe: 0xC0000005: Access violation reading location 0x0000000000000008. when trying to create the pipline with vkCreateComputePipelines.

The API calls should be fine, because the same code works with a shader compiled with glslangValidator. Therefore I assume that the generated SPIR-V code must be illformed somehow.

I've checked the SPIR-V code with the validator tool from khronos, using spirv-val --target-env vulkan1.1 mainV.spv which exited without error. Anyhow it is also known that this tool is still incomplete.

I've also tried to use the Radeon GPU Analyzer to compile my SPIR-V code, which is also available online at the shader playground and this tool throws the error Error: Error: internal error: Bil::BilInstructionConvert::Create(60) Code Not Tested! which is not really helpful, but encourages the assumption that the code is malformed.

The SPIR-V code is unfortunately to long to post it here, but it is in the link of the shader playground.

Does anyone know what the problem is with my setting or has any idea how I can verify my SPIR-V code in a better way, without checking all 700 lines of code manually.

I don't thinkt the problem is there, but anyway here is the c++ host code:

#include "vulkan/vulkan.hpp"

#include <iostream>
#include <fstream>
#include <vector>
#define BAIL_ON_BAD_RESULT(result)                             \
if (VK_SUCCESS != (result))                                  \
{                                                            \
  fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); \
  exit(-1);                                                  \
}

VkResult vkGetBestComputeQueueNPH(vk::PhysicalDevice &physicalDevice, uint32_t &queueFamilyIndex)
{

  auto properties = physicalDevice.getQueueFamilyProperties();
  int i = 0;
  for (auto prop : properties)
  {
    vk::QueueFlags maskedFlags = (~(vk::QueueFlagBits::eTransfer | vk::QueueFlagBits::eSparseBinding) & prop.queueFlags);
    if (!(vk::QueueFlagBits::eGraphics & maskedFlags) && (vk::QueueFlagBits::eCompute & maskedFlags))
    {
      queueFamilyIndex = i;
      return VK_SUCCESS;
    }
    i++;
  }
  i = 0;
  for (auto prop : properties)
  {
    vk::QueueFlags maskedFlags = (~(vk::QueueFlagBits::eTransfer | vk::QueueFlagBits::eSparseBinding) & prop.queueFlags);
    if (vk::QueueFlagBits::eCompute & maskedFlags)
    {
      queueFamilyIndex = i;
      return VK_SUCCESS;
    }
    i++;
  }
  return VK_ERROR_INITIALIZATION_FAILED;
}

int main(int argc, const char *const argv[])
{
  (void)argc;
  (void)argv;

  try
  {

    // initialize the vk::ApplicationInfo structure
    vk::ApplicationInfo applicationInfo("VecAdd", 1, "Vulkan.hpp", 1, VK_API_VERSION_1_1);
    
    // initialize the vk::InstanceCreateInfo
    std::vector<char *> layers = {
      "VK_LAYER_LUNARG_api_dump",
      "VK_LAYER_KHRONOS_validation"
    };
    vk::InstanceCreateInfo instanceCreateInfo({}, &applicationInfo, static_cast<uint32_t>(layers.size()), layers.data());

    // create a UniqueInstance
    vk::UniqueInstance instance = vk::createInstanceUnique(instanceCreateInfo);

    auto physicalDevices = instance->enumeratePhysicalDevices();

    for (auto &physicalDevice : physicalDevices)
    {
    
      auto props = physicalDevice.getProperties();

      // get the QueueFamilyProperties of the first PhysicalDevice
      std::vector<vk::QueueFamilyProperties> queueFamilyProperties = physicalDevice.getQueueFamilyProperties();
      uint32_t computeQueueFamilyIndex = 0;

      // get the best index into queueFamiliyProperties which supports compute and stuff
      BAIL_ON_BAD_RESULT(vkGetBestComputeQueueNPH(physicalDevice, computeQueueFamilyIndex));

      std::vector<char *>extensions = {"VK_EXT_external_memory_host", "VK_KHR_shader_float16_int8"};
      // create a UniqueDevice
      float queuePriority = 0.0f;

      vk::DeviceQueueCreateInfo deviceQueueCreateInfo(vk::DeviceQueueCreateFlags(), static_cast<uint32_t>(computeQueueFamilyIndex), 1, &queuePriority);
       vk::StructureChain<vk::DeviceCreateInfo, vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceShaderFloat16Int8Features> createDeviceInfo = {
        vk::DeviceCreateInfo(vk::DeviceCreateFlags(), 1, &deviceQueueCreateInfo, 0, nullptr, static_cast<uint32_t>(extensions.size()), extensions.data()),
        vk::PhysicalDeviceFeatures2(),
        vk::PhysicalDeviceShaderFloat16Int8Features()
      };
      createDeviceInfo.get<vk::PhysicalDeviceFeatures2>().features.setShaderInt64(true);
      createDeviceInfo.get<vk::PhysicalDeviceShaderFloat16Int8Features>().setShaderInt8(true);
      vk::UniqueDevice device = physicalDevice.createDeviceUnique(createDeviceInfo.get<vk::DeviceCreateInfo>());

      auto memoryProperties2 = physicalDevice.getMemoryProperties2();

      vk::PhysicalDeviceMemoryProperties const &memoryProperties = memoryProperties2.memoryProperties;

      const int32_t bufferLength = 16384;

      const uint32_t bufferSize = sizeof(int32_t) * bufferLength;

      // we are going to need two buffers from this one memory
      const vk::DeviceSize memorySize = bufferSize * 3;

      // set memoryTypeIndex to an invalid entry in the properties.memoryTypes array
      uint32_t memoryTypeIndex = VK_MAX_MEMORY_TYPES;

      for (uint32_t k = 0; k < memoryProperties.memoryTypeCount; k++)
      {
        if ((vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent) & memoryProperties.memoryTypes[k].propertyFlags &&
            (memorySize < memoryProperties.memoryHeaps[memoryProperties.memoryTypes[k].heapIndex].size))
        {
          memoryTypeIndex = k;
          std::cout << "found memory " << memoryTypeIndex + 1 << " out of " << memoryProperties.memoryTypeCount << std::endl;
          break;
        }
      }

      BAIL_ON_BAD_RESULT(memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS);

      auto memory = device->allocateMemoryUnique(vk::MemoryAllocateInfo(memorySize, memoryTypeIndex));
      auto in_buffer = device->createBufferUnique(vk::BufferCreateInfo(vk::BufferCreateFlags(), bufferSize, vk::BufferUsageFlagBits::eStorageBuffer, vk::SharingMode::eExclusive));
      device->bindBufferMemory(in_buffer.get(), memory.get(), 0);
      // create a DescriptorSetLayout
      std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBinding{
          {0, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}};
      vk::UniqueDescriptorSetLayout descriptorSetLayout = device->createDescriptorSetLayoutUnique(vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), static_cast<uint32_t>(descriptorSetLayoutBinding.size()), descriptorSetLayoutBinding.data()));

      std::cout << "Memory bound" << std::endl;

      std::ifstream myfile;
      myfile.open("shaders/MainV.spv", std::ios::ate | std::ios::binary);

      if (!myfile.is_open())
      {
        std::cout << "File not found" << std::endl;
        return EXIT_FAILURE;
      }

      auto size = myfile.tellg();
      std::vector<unsigned int> shader_spv(size / sizeof(unsigned int));
      myfile.seekg(0);
      myfile.read(reinterpret_cast<char *>(shader_spv.data()), size);
      myfile.close();

      std::cout << "Shader size: " << shader_spv.size() << std::endl;

      auto shaderModule = device->createShaderModuleUnique(vk::ShaderModuleCreateInfo(vk::ShaderModuleCreateFlags(), shader_spv.size() * sizeof(unsigned int), shader_spv.data()));

      // create a PipelineLayout using that DescriptorSetLayout
      vk::UniquePipelineLayout pipelineLayout = device->createPipelineLayoutUnique(vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), 1, &descriptorSetLayout.get()));

      vk::ComputePipelineCreateInfo computePipelineInfo(
          vk::PipelineCreateFlags(),
          vk::PipelineShaderStageCreateInfo(
              vk::PipelineShaderStageCreateFlags(),
              vk::ShaderStageFlagBits::eCompute,
              shaderModule.get(),
              "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE6VecAdd"),
          pipelineLayout.get());

      auto pipeline = device->createComputePipelineUnique(nullptr, computePipelineInfo);

      auto descriptorPoolSize = vk::DescriptorPoolSize(vk::DescriptorType::eStorageBuffer, 2);
      auto descriptorPool = device->createDescriptorPool(vk::DescriptorPoolCreateInfo(vk::DescriptorPoolCreateFlags(), 1, 1, &descriptorPoolSize));

      auto commandPool = device->createCommandPoolUnique(vk::CommandPoolCreateInfo(vk::CommandPoolCreateFlags(), computeQueueFamilyIndex));

      auto commandBuffer = std::move(device->allocateCommandBuffersUnique(vk::CommandBufferAllocateInfo(commandPool.get(), vk::CommandBufferLevel::ePrimary, 1)).front());

      commandBuffer->begin(vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit)));

      commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, pipeline.get());
    
      commandBuffer->dispatch(bufferSize / sizeof(int32_t), 1, 1);

      commandBuffer->end();

      auto queue = device->getQueue(computeQueueFamilyIndex, 0);

      vk::SubmitInfo submitInfo(0, nullptr, nullptr, 1, &commandBuffer.get(), 0, nullptr);
      queue.submit(1, &submitInfo, vk::Fence());

      queue.waitIdle();

      printf("all done\nWoohooo!!!\n\n");
    }
  }
  catch (vk::SystemError &err)
  {
    std::cout << "vk::SystemError: " << err.what() << std::endl;
    exit(-1);
  }
  catch (std::runtime_error &err)
  {
    std::cout << "std::runtime_error: " << err.what() << std::endl;
    exit(-1);
  }
  catch (...)
  {
    std::cout << "unknown error\n";
    exit(-1);
  }

  return EXIT_SUCCESS;
}
vulkan
spir-v
asked on Stack Overflow Jul 3, 2020 by tade

1 Answer

0

Well after checking out line per line it showed that the problem is when working with pointers of pointers. For me it is still not clear from the specification that it is not allowed, but it is understandable that it does not work with logical pointers. Still the behaviour is strange that the validator is not able to note that and that compiling the SPIRV code crashes instead of throwing a clear error message. So in the end, it was the Shader code which was wrong.

answered on Stack Overflow Jul 8, 2020 by tade

User contributions licensed under CC BY-SA 3.0