vkPhysicalDeviceRayTracingPipelineFeatures
because we're using ray tracing pipelines and not ray-queries.
We also need vkPhysicalDeviceBufferAddressFeatures
because fundamentally KHR_raytracing needs buffer device addresses all over the place. For almost everything.
deviceDescriptorIndexingFeature
(pretty much bindless...) is a staple of any ray-tracing.
std::vector enabledDeviceExtensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_MAINTENANCE3_EXTENSION_NAME,
VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME,
VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME,
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
VK_NV_RAY_TRACING_EXTENSION_NAME };
VkPhysicalDeviceFeatures deviceFeatures = {};
deviceFeatures.samplerAnisotropy = VK_TRUE;
deviceFeatures.depthClamp = VK_TRUE;
deviceFeatures.geometryShader = VK_TRUE;
deviceFeatures.shaderStorageImageExtendedFormats = VK_TRUE;
deviceFeatures.tessellationShader = VK_TRUE;
deviceFeatures.vertexPipelineStoresAndAtomics = VK_TRUE;
deviceFeatures.fragmentStoresAndAtomics = VK_TRUE;
VkPhysicalDeviceDescriptorIndexingFeaturesEXT deviceDescriptorIndexingFeature = {};
deviceDescriptorIndexingFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
deviceDescriptorIndexingFeature.descriptorBindingVariableDescriptorCount = VK_TRUE;
deviceDescriptorIndexingFeature.runtimeDescriptorArray = VK_TRUE;
deviceDescriptorIndexingFeature.shaderSampledImageArrayNonUniformIndexing = VK_TRUE;
deviceDescriptorIndexingFeature.shaderStorageBufferArrayNonUniformIndexing = VK_TRUE;
VkDeviceCreateInfo deviceCreateInfo = {};
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
deviceCreateInfo.pNext = &deviceDescriptorIndexingFeature;
deviceCreateInfo.queueCreateInfoCount = 1;
deviceCreateInfo.pQueueCreateInfos = queueCreateInfo;
deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
std::vector enabledDeviceExtensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_MAINTENANCE3_EXTENSION_NAME,
VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME,
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME,
VK_KHR_SPIRV_1_4_EXTENSION_NAME,
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME,
VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME,
VK_KHR_PIPELINE_LIBRARY_EXTENSION_NAME,
VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME,
VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME };
VkPhysicalDeviceFeatures deviceFeatures = {};
deviceFeatures.samplerAnisotropy = VK_TRUE;
deviceFeatures.depthClamp = VK_TRUE;
deviceFeatures.geometryShader = VK_TRUE;
deviceFeatures.shaderStorageImageExtendedFormats = VK_TRUE;
deviceFeatures.tessellationShader = VK_TRUE;
deviceFeatures.vertexPipelineStoresAndAtomics = VK_TRUE;
deviceFeatures.fragmentStoresAndAtomics = VK_TRUE;
VkPhysicalDeviceAccelerationStructureFeaturesKHR vkPhysicalDeviceAccelerationStructureFeatures = {};
vkPhysicalDeviceAccelerationStructureFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR;
vkPhysicalDeviceAccelerationStructureFeatures.accelerationStructure = VK_TRUE;
vkPhysicalDeviceAccelerationStructureFeatures.accelerationStructureCaptureReplay = VK_FALSE;
vkPhysicalDeviceAccelerationStructureFeatures.accelerationStructureHostCommands = VK_FALSE;
vkPhysicalDeviceAccelerationStructureFeatures.accelerationStructureIndirectBuild = VK_FALSE;
vkPhysicalDeviceAccelerationStructureFeatures.descriptorBindingAccelerationStructureUpdateAfterBind = VK_TRUE;
VkPhysicalDeviceRayTracingPipelineFeaturesKHR vkPhysicalDeviceRayTracingPipelineFeatures = {};
vkPhysicalDeviceRayTracingPipelineFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR;
vkPhysicalDeviceRayTracingPipelineFeatures.pNext = &vkPhysicalDeviceAccelerationStructureFeatures;
vkPhysicalDeviceRayTracingPipelineFeatures.rayTracingPipeline = VK_TRUE;
vkPhysicalDeviceRayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay = VK_FALSE;
vkPhysicalDeviceRayTracingPipelineFeatures.rayTracingPipelineShaderGroupHandleCaptureReplayMixed = VK_FALSE;
vkPhysicalDeviceRayTracingPipelineFeatures.rayTracingPipelineTraceRaysIndirect = VK_FALSE;
vkPhysicalDeviceRayTracingPipelineFeatures.rayTraversalPrimitiveCulling = VK_FALSE;
VkPhysicalDeviceBufferDeviceAddressFeatures vkPhysicalDeviceBufferAddressFeatures = {};
vkPhysicalDeviceBufferAddressFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES;
vkPhysicalDeviceBufferAddressFeatures.pNext = &vkPhysicalDeviceRayTracingPipelineFeatures;
vkPhysicalDeviceBufferAddressFeatures.bufferDeviceAddress = VK_TRUE;
vkPhysicalDeviceBufferAddressFeatures.bufferDeviceAddressCaptureReplay = VK_FALSE;
vkPhysicalDeviceBufferAddressFeatures.bufferDeviceAddressMultiDevice = VK_FALSE;
VkPhysicalDeviceDescriptorIndexingFeaturesEXT deviceDescriptorIndexingFeature = {};
deviceDescriptorIndexingFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
deviceDescriptorIndexingFeature.pNext = &vkPhysicalDeviceBufferAddressFeatures;
deviceDescriptorIndexingFeature.descriptorBindingVariableDescriptorCount = VK_TRUE;
deviceDescriptorIndexingFeature.runtimeDescriptorArray = VK_TRUE;
deviceDescriptorIndexingFeature.shaderSampledImageArrayNonUniformIndexing = VK_TRUE;
deviceDescriptorIndexingFeature.shaderStorageBufferArrayNonUniformIndexing = VK_TRUE;
VkDeviceCreateInfo deviceCreateInfo = {};
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
deviceCreateInfo.pNext = &deviceDescriptorIndexingFeature;
deviceCreateInfo.queueCreateInfoCount = 1;
deviceCreateInfo.pQueueCreateInfos = queueCreateInfo;
deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
::Enable(...)
and check whether it is enabled hereafter with ::Enabled()
.
Please see comments in red regarding queried maximum ray recursions.
namespace NVIDIA_RTX
{
class RTXInstance
{
private:
static bool rtxEnabled;
public:
static PFN_vkCreateAccelerationStructureNV vkCreateAccelerationStructureNV;
static PFN_vkDestroyAccelerationStructureNV vkDestroyAccelerationStructureNV;
static PFN_vkGetAccelerationStructureMemoryRequirementsNV vkGetAccelerationStructureMemoryRequirementsNV;
static PFN_vkCmdCopyAccelerationStructureNV vkCmdCopyAccelerationStructureNV;
static PFN_vkBindAccelerationStructureMemoryNV vkBindAccelerationStructureMemoryNV;
static PFN_vkCmdBuildAccelerationStructureNV vkCmdBuildAccelerationStructureNV;
static PFN_vkCmdTraceRaysNV vkCmdTraceRaysNV;
static PFN_vkGetRayTracingShaderGroupHandlesNV vkGetRayTracingShaderGroupHandlesNV;
static PFN_vkCreateRayTracingPipelinesNV vkCreateRayTracingPipelinesNV;
static PFN_vkGetAccelerationStructureHandleNV vkGetAccelerationStructureHandleNV;
static VkPhysicalDeviceRayTracingPropertiesNV raytracingProperties;
static void Enable(InstanceClass & inpInstance);
static bool Enabled();
};
// Just a bunch of forward declarations... as they're part of this namespace
class RTXPipelineStateClass;
class RTXAccelStruct;
class RTXScene;
class RTXTracelet;
}
...
bool RTXInstance::rtxEnabled = false;
PFN_vkCreateAccelerationStructureNV RTXInstance::vkCreateAccelerationStructureNV = VK_NULL_HANDLE;
PFN_vkDestroyAccelerationStructureNV RTXInstance::vkDestroyAccelerationStructureNV = VK_NULL_HANDLE;
PFN_vkGetAccelerationStructureMemoryRequirementsNV RTXInstance::vkGetAccelerationStructureMemoryRequirementsNV = VK_NULL_HANDLE;
PFN_vkCmdCopyAccelerationStructureNV RTXInstance::vkCmdCopyAccelerationStructureNV = VK_NULL_HANDLE;
PFN_vkBindAccelerationStructureMemoryNV RTXInstance::vkBindAccelerationStructureMemoryNV = VK_NULL_HANDLE;
PFN_vkCmdBuildAccelerationStructureNV RTXInstance::vkCmdBuildAccelerationStructureNV = VK_NULL_HANDLE;
PFN_vkCmdTraceRaysNV RTXInstance::vkCmdTraceRaysNV = VK_NULL_HANDLE;
PFN_vkGetRayTracingShaderGroupHandlesNV RTXInstance::vkGetRayTracingShaderGroupHandlesNV = VK_NULL_HANDLE;
PFN_vkCreateRayTracingPipelinesNV RTXInstance::vkCreateRayTracingPipelinesNV = VK_NULL_HANDLE;
PFN_vkGetAccelerationStructureHandleNV RTXInstance::vkGetAccelerationStructureHandleNV = VK_NULL_HANDLE;
VkPhysicalDeviceRayTracingPropertiesNV RTXInstance::raytracingProperties = { };
...
void NVIDIA_RTX::RTXInstance::Enable(InstanceClass & inpInstance)
{
vkCreateAccelerationStructureNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkCreateAccelerationStructureNV"));
vkDestroyAccelerationStructureNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkDestroyAccelerationStructureNV"));
vkGetAccelerationStructureMemoryRequirementsNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkGetAccelerationStructureMemoryRequirementsNV"));
vkCmdCopyAccelerationStructureNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkCmdCopyAccelerationStructureNV"));
vkBindAccelerationStructureMemoryNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkBindAccelerationStructureMemoryNV"));
vkCmdBuildAccelerationStructureNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkCmdBuildAccelerationStructureNV"));
vkCmdTraceRaysNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkCmdTraceRaysNV"));
vkGetRayTracingShaderGroupHandlesNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkGetRayTracingShaderGroupHandlesNV"));
vkCreateRayTracingPipelinesNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkCreateRayTracingPipelinesNV"));
vkGetAccelerationStructureHandleNV = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkGetAccelerationStructureHandleNV"));
raytracingProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV;
raytracingProperties.maxRecursionDepth = 8; /* This can be as high as 31 (all nVidia RTX cards) */
raytracingProperties.shaderGroupHandleSize = 0;
VkPhysicalDeviceProperties2 props = {};
props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
props.pNext = &raytracingProperties;
props.properties = { };
vkGetPhysicalDeviceProperties2(inpInstance.physicalDevice, &props);
rtxEnabled = true;
}
bool NVIDIA_RTX::RTXInstance::Enabled()
{
return rtxEnabled;
}
namespace KHR_RT
{
class RTInstance
{
private:
static bool rtEnabled;
public:
static PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR;
static PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR;
static PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR;
static PFN_vkCmdTraceRaysKHR vkCmdTraceRaysKHR;
static PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR;
static PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR;
static PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR;
static PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR;
static PFN_vkGetRayTracingShaderGroupHandlesKHR vkGetRayTracingShaderGroupHandlesKHR;
static VkPhysicalDeviceRayTracingPipelinePropertiesKHR raytracingPipelineProperties;
static void Enable(InstanceClass & inpInstance);
static bool Enabled();
};
class RTPipelineStateClass;
class RTAccelStruct;
class RTScene;
class RTTracelet;
}
...
bool RTInstance::rtEnabled = false;
PFN_vkCreateAccelerationStructureKHR RTInstance::vkCreateAccelerationStructureKHR = VK_NULL_HANDLE;
PFN_vkDestroyAccelerationStructureKHR RTInstance::vkDestroyAccelerationStructureKHR = VK_NULL_HANDLE;
PFN_vkCmdBuildAccelerationStructuresKHR RTInstance::vkCmdBuildAccelerationStructuresKHR = VK_NULL_HANDLE;
PFN_vkCmdTraceRaysKHR RTInstance::vkCmdTraceRaysKHR = VK_NULL_HANDLE;
PFN_vkGetBufferDeviceAddressKHR RTInstance::vkGetBufferDeviceAddressKHR = VK_NULL_HANDLE;
PFN_vkCreateRayTracingPipelinesKHR RTInstance::vkCreateRayTracingPipelinesKHR = VK_NULL_HANDLE;
PFN_vkGetAccelerationStructureBuildSizesKHR RTInstance::vkGetAccelerationStructureBuildSizesKHR = VK_NULL_HANDLE;
PFN_vkGetAccelerationStructureDeviceAddressKHR RTInstance::vkGetAccelerationStructureDeviceAddressKHR = VK_NULL_HANDLE;
PFN_vkGetRayTracingShaderGroupHandlesKHR RTInstance::vkGetRayTracingShaderGroupHandlesKHR = VK_NULL_HANDLE;
VkPhysicalDeviceRayTracingPipelinePropertiesKHR RTInstance::raytracingPipelineProperties = { };
...
void KHR_RT::RTInstance::Enable(InstanceClass & inpInstance)
{
vkCreateAccelerationStructureKHR = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkCreateAccelerationStructureKHR"));
vkDestroyAccelerationStructureKHR = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkDestroyAccelerationStructureKHR"));
vkCmdBuildAccelerationStructuresKHR = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkCmdBuildAccelerationStructuresKHR"));
vkCmdTraceRaysKHR = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkCmdTraceRaysKHR"));
vkGetBufferDeviceAddressKHR = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkGetBufferDeviceAddressKHR"));
vkCreateRayTracingPipelinesKHR = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkCreateRayTracingPipelinesKHR"));
vkGetAccelerationStructureBuildSizesKHR = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkGetAccelerationStructureBuildSizesKHR"));
vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkGetAccelerationStructureDeviceAddressKHR"));
vkGetRayTracingShaderGroupHandlesKHR = reinterpret_cast(vkGetDeviceProcAddr(inpInstance.device, "vkGetRayTracingShaderGroupHandlesKHR"));
raytracingPipelineProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR;
raytracingPipelineProperties.maxRayRecursionDepth = 8; /* This can be anywhere from 31 (all nVidia RTX cards)... to just 1 (AMD devices with certain software stacks) */
raytracingPipelineProperties.shaderGroupHandleSize = 0;
VkPhysicalDeviceProperties2 props = {};
props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
props.pNext = &raytracingPipelineProperties;
props.properties = { };
vkGetPhysicalDeviceProperties2(inpInstance.physicalDevice, &props);
rtEnabled = true;
}
bool KHR_RT::RTInstance::Enabled()
{
return rtEnabled;
}
addedChunkRef.mem
here -- with the VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT
flag.
In my case this is manual as I like to roll my own usually :). If you're using VMA, this page might be of help.
// Chunks are allocated using one of these usage flags (not used in this snippet)
enum MEMORY_MAP_TYPE
{
IMAGE = 0,
BUFFER,
RTX
};
...
allocInfo.allocationSize = chunkMaxSize;
result = vkAllocateMemory(inpDev, &allocInfo, nullptr, &addedChunkRef.mem);
if (result != VK_SUCCESS) throw std::runtime_error("Could not allocate memory chunk");
// Basically the RTX chunk is now anything that needs device address
enum MEMORY_MAP_TYPE
{
IMAGE = 0,
BUFFER,
DEV_ADDRESS
};
...
allocInfo.allocationSize = chunkMaxSize;
VkMemoryAllocateFlagsInfo memFlagInfo = {};
if (RTInstance::Enabled() && memoryMapType == DEV_ADDRESS) // Requested chunk is an RT-type chunk... requester needs device address
{
memFlagInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR;
memFlagInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
memFlagInfo.deviceMask = 0u;
allocInfo.pNext = &memFlagInfo;
}
result = vkAllocateMemory(inpDev, &allocInfo, nullptr, &addedChunkRef.mem);
if (result != VK_SUCCESS) throw std::runtime_error("Could not allocate memory chunk");
Buffer(...)
.
If you call BufferClassWithStaging(...)
on a BufferClass
instance, it will create a GPU-side-only buffer using a temporary staging buffer.
It will no longer have any host-side visibility.
We have new buffer usage modes and USAGE_DEVICE_ADDRESS
will decide whether or not we'll ask the memory manager to make our buffer on memory objects dedicated to ray-tracing(, see comments in red).
BufferClassWithStaging(...)
is not used here and is only provided for reference. It doesn't change due to this transition.
enum MEMORY_USAGE
{
USAGE_SRC = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
USAGE_DST = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
USAGE_VERT = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
USAGE_INDEX = VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
USAGE_SSBO = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
USAGE_UBO = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
USAGE_RTX = VK_BUFFER_USAGE_RAY_TRACING_BIT_NV
};
...
void BufferClass::Buffer(MEMORY_OPTIONS inpMemOpts, BUFFER_SHARING inpSharing, BUFFER_MODE inpMode, MEMORY_USAGE inpUsage, InstanceClass & inpInstance, void *inpData, VkDeviceSize inpDataSize)
{
if (totalDataSize > 0) RemovePast();
instanceRef = &inpInstance;
VkMemoryRequirements memReqs = {};
usage = inpUsage;
memOpts = inpMemOpts;
sharing = inpSharing;
mode = inpMode;
totalDataSize = inpDataSize;
VkBufferCreateInfo bufferInfo = {};
allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = 0;
allocInfo.memoryTypeIndex = 0;
if (mode == MODE_CREATE) bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = totalDataSize;
bufferInfo.usage = inpUsage;
if (sharing == SHARING_EXCLUSIVE) bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VkResult result = vkCreateBuffer(inpInstance.device, &bufferInfo, nullptr, &buffer);
if (result != VK_SUCCESS) { RemovePast(); throw std::runtime_error("Could not create buffer"); }
haveBuffer = true;
vkGetBufferMemoryRequirements(inpInstance.device, buffer, &memReqs);
getMemoryType(&inpInstance, memReqs.memoryTypeBits, memOpts, &allocInfo.memoryTypeIndex);
try
{
/* Please note the buffer flags passed */
subAllocId = AllocMem(inpInstance.device, allocInfo, memReqs, BUFFER, &memPtr, &offset, &len);
typeForSubAlloc = allocInfo.memoryTypeIndex;
haveSubAlloc = true;
}
catch (...)
{
RemovePast();
throw std::runtime_error("Could not allocate buffer memory");
}
result = vkBindBufferMemory(inpInstance.device, buffer, memPtr, offset);
if (result != VK_SUCCESS) { RemovePast(); throw std::runtime_error("Could not bind buffer to memory"); }
descriptor.buffer = buffer;
descriptor.offset = 0;
descriptor.range = totalDataSize;
if (inpData != nullptr)
{
try
{
UploadSubData(0, inpData, totalDataSize);
}
catch (...) { RemovePast(); throw std::runtime_error("Could not upload buffer data"); }
}
}
void BufferClass::BufferClassWithStaging(BUFFER_SHARING inpSharing, BUFFER_MODE inpMode, MEMORY_USAGE inpUsage, InstanceClass & inpInstance, void * inpData, VkDeviceSize inpDataSize)
{
totalDataSize = inpDataSize;
BufferClass bufferStaging(MEMORY_HOST_VISIBLE, inpSharing, inpMode, USAGE_SRC, inpInstance, inpData, totalDataSize);
Buffer(MEMORY_DEVICE_LOCAL, inpSharing, inpMode, USAGE_DST | inpUsage, inpInstance, nullptr, totalDataSize);
VkBufferCopy copyRegion = {};
try { BeginCommandBuffer(inpInstance); }
catch (...) { RemovePast(); throw std::runtime_error("Could not begin setup cmd buffer for buffer creation"); }
copyRegion.size = totalDataSize;
vkCmdCopyBuffer(cmdBuffers[0], bufferStaging.buffer, buffer, 1, ©Region);
try { EndCommandBuffer(); }
catch (...) { RemovePast(); throw std::runtime_error("Could not end setup cmd buffer for buffer creation"); }
try { SubmitCommandBuffer(); }
catch (...) { RemovePast(); throw std::runtime_error("Could not submit setup cmd buffer for buffer creation"); }
}
// We have new buffer usage modes
enum MEMORY_USAGE
{
USAGE_SRC = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
USAGE_DST = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
USAGE_VERT = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
USAGE_INDEX = VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
USAGE_SSBO = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
USAGE_UBO = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
USAGE_ACCEL_STRUCT = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR,
USAGE_SBT = VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR,
USAGE_DEVICE_ADDRESS = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
};
...
void BufferClass::Buffer(MEMORY_OPTIONS inpMemOpts, BUFFER_SHARING inpSharing, BUFFER_MODE inpMode, MEMORY_USAGE inpUsage, InstanceClass & inpInstance, void *inpData, VkDeviceSize inpDataSize)
{
if (totalDataSize > 0) RemovePast();
instanceRef = &inpInstance;
VkMemoryRequirements memReqs = {};
usage = inpUsage;
memOpts = inpMemOpts;
sharing = inpSharing;
mode = inpMode;
totalDataSize = inpDataSize;
VkBufferCreateInfo bufferInfo = {};
allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = 0;
allocInfo.memoryTypeIndex = 0;
if (mode == MODE_CREATE) bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = totalDataSize;
bufferInfo.usage = inpUsage;
if (sharing == SHARING_EXCLUSIVE) bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VkResult result = vkCreateBuffer(inpInstance.device, &bufferInfo, nullptr, &buffer);
if (result != VK_SUCCESS) { RemovePast(); throw std::runtime_error("Could not create buffer"); }
haveBuffer = true;
vkGetBufferMemoryRequirements(inpInstance.device, buffer, &memReqs);
getMemoryType(&inpInstance, memReqs.memoryTypeBits, memOpts, &allocInfo.memoryTypeIndex);
try
{
/* Please note the buffer flags passed */
subAllocId = AllocMem(inpInstance.device, allocInfo, memReqs, ((usage & USAGE_DEVICE_ADDRESS) != 0) ? DEV_ADDRESS : BUFFER, &memPtr, &offset, &len);
typeForSubAlloc = allocInfo.memoryTypeIndex;
haveSubAlloc = true;
}
catch (...)
{
RemovePast();
throw std::runtime_error("Could not allocate buffer memory");
}
result = vkBindBufferMemory(inpInstance.device, buffer, memPtr, offset);
if (result != VK_SUCCESS) { RemovePast(); throw std::runtime_error("Could not bind buffer to memory"); }
descriptor.buffer = buffer;
descriptor.offset = 0;
descriptor.range = totalDataSize;
if (inpData != nullptr)
{
try
{
UploadSubData(0, inpData, totalDataSize);
}
catch (...) { RemovePast(); throw std::runtime_error("Could not upload buffer data"); }
}
}
void BufferClass::BufferClassWithStaging(BUFFER_SHARING inpSharing, BUFFER_MODE inpMode, MEMORY_USAGE inpUsage, InstanceClass & inpInstance, void * inpData, VkDeviceSize inpDataSize)
{
totalDataSize = inpDataSize;
BufferClass bufferStaging(MEMORY_HOST_VISIBLE, inpSharing, inpMode, USAGE_SRC, inpInstance, inpData, totalDataSize);
Buffer(MEMORY_DEVICE_LOCAL, inpSharing, inpMode, USAGE_DST | inpUsage, inpInstance, nullptr, totalDataSize);
VkBufferCopy copyRegion = {};
try { BeginCommandBuffer(inpInstance); }
catch (...) { RemovePast(); throw std::runtime_error("Could not begin setup cmd buffer for buffer creation"); }
copyRegion.size = totalDataSize;
vkCmdCopyBuffer(cmdBuffers[0], bufferStaging.buffer, buffer, 1, ©Region);
try { EndCommandBuffer(); }
catch (...) { RemovePast(); throw std::runtime_error("Could not end setup cmd buffer for buffer creation"); }
try { SubmitCommandBuffer(); }
catch (...) { RemovePast(); throw std::runtime_error("Could not submit setup cmd buffer for buffer creation"); }
}
CreateAccelStruct(...)
creates the acceleration structure while SubmitCreateAccelStruct(...)
is meant for updating it.
A call to SubmitCreateAccelStruct(...)
following CreateAccelStruct(...)
should seem familiar to you.
Our new version changes names to more closely reflect their intended use: CreateAccelStruct(...)
and UpdateAccelStruct(...)
.
As you can see, GetScratchBufferSize(...)
is gone and we do not call the memory manager (i.e. AllocMem(...)
) directly any longer: one less class interfacing the memory manager directly!
Also, geometry is fed via VkAccelerationStructureGeometryKHR
rather than VkGeometryNV
.
User-side instance data struct VkGeometryInstance
is no longer needed and VkAccelerationStructureInstanceKHR
is provided via Vulkan itself.
void NVIDIA_RTX::RTXAccelStruct::RemovePast()
{
if (!ptrToInstance) return;
if (hasAccelStruct) RTXInstance::vkDestroyAccelerationStructureNV(ptrToInstance->device, accelStruct, nullptr);
if (hasSubAlloc) FreeMem(subAllocId, RTX, typeForSubAlloc, ptrToInstance->device);
hasAccelStruct = false;
hasSubAlloc = false;
ptrToInstance = nullptr;
}
VkDeviceSize NVIDIA_RTX::RTXAccelStruct::GetScratchBufferSize(VkAccelerationStructureNV handle, bool updateAction)
{
if (!ptrToInstance) throw std::runtime_error("No way to get scratch buffer size: no ptr to Instance.");
VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo = {};
memoryRequirementsInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV;
memoryRequirementsInfo.accelerationStructure = handle;
memoryRequirementsInfo.type = updateAction ? VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV : VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV;
VkMemoryRequirements2 memoryRequirements = {};
RTXInstance::vkGetAccelerationStructureMemoryRequirementsNV(ptrToInstance->device, &memoryRequirementsInfo, &memoryRequirements);
VkDeviceSize result = memoryRequirements.memoryRequirements.size;
return result;
}
void NVIDIA_RTX::RTXAccelStruct::CreateAccelStruct(bool isBlas, VkGeometryNV * inpGeom, std::vector * instanceData, InstanceClass & inpInstance)
{
ptrToInstance = &inpInstance;
VkAccelerationStructureCreateInfoNV accelerationStructureInfo = {};
accelerationStructureInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV;
accelerationStructureInfo.compactedSize = 0;
accelerationStructureInfo.info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV;
accelerationStructureInfo.info.type = isBlas ? VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV;
accelerationStructureInfo.info.flags = VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NV;
accelerationStructureInfo.info.instanceCount = isBlas ? 0 : (uint32_t)instanceData->size();
accelerationStructureInfo.info.geometryCount = isBlas ? 1 : 0;
accelerationStructureInfo.info.pGeometries = isBlas ? inpGeom : nullptr;
VkResult code = RTXInstance::vkCreateAccelerationStructureNV(ptrToInstance->device, &accelerationStructureInfo, nullptr, &accelStruct);
if (code != VK_SUCCESS) { RemovePast(); throw std::runtime_error("Could not create acceleration structure"); }
hasAccelStruct = true;
VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo = {};
memoryRequirementsInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV;
memoryRequirementsInfo.accelerationStructure = accelStruct;
memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV;
VkMemoryRequirements2 memoryRequirements = {};
RTXInstance::vkGetAccelerationStructureMemoryRequirementsNV(ptrToInstance->device, &memoryRequirementsInfo, &memoryRequirements);
VkMemoryAllocateInfo memoryAllocateInfo = {};
memoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memoryAllocateInfo.allocationSize = memoryRequirements.memoryRequirements.size;
getMemoryType(ptrToInstance, memoryRequirements.memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &memoryAllocateInfo.memoryTypeIndex);
try
{
subAllocId = AllocMem(ptrToInstance->device, memoryAllocateInfo, memoryRequirements.memoryRequirements, RTX, &memPtr, &offset, &len);
typeForSubAlloc = memoryAllocateInfo.memoryTypeIndex;
}
catch (...)
{
RemovePast();
throw std::runtime_error("Could not create acceleration structure memory");
}
hasSubAlloc = true;
VkBindAccelerationStructureMemoryInfoNV bindInfo = {};
bindInfo.sType = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV;
bindInfo.accelerationStructure = accelStruct;
bindInfo.memory = memPtr;
bindInfo.memoryOffset = offset;
bindInfo.deviceIndexCount = 0;
bindInfo.pDeviceIndices = nullptr;
code = RTXInstance::vkBindAccelerationStructureMemoryNV(ptrToInstance->device, 1, &bindInfo);
if (code != VK_SUCCESS) { RemovePast(); throw std::runtime_error("Could not bind acceleration structure memory"); }
SubmitCreateAccelStruct(isBlas, inpGeom, instanceData);
}
void NVIDIA_RTX::RTXAccelStruct::SubmitCreateAccelStruct(bool isBlas, VkGeometryNV * inpGeom, std::vector * instanceData, bool updateAction)
{
if (!ptrToInstance) throw std::runtime_error("Cannot submit create accel struct request since a ptr to instance was not found");
VkAccelerationStructureInfoNV asInfo = {};
asInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV;
asInfo.type = isBlas ? VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV;
asInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NV;
VkDeviceSize scratchBufferSize = GetScratchBufferSize(accelStruct, updateAction);
if (scratchBufferSize > scratchBuffer.getSize())
{
scratchBuffer.Buffer(MEMORY_DEVICE_LOCAL, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_RTX, *ptrToInstance, nullptr, scratchBufferSize);
}
VkDeviceSize curInstanceBufferSize = 0;
if (!isBlas)
{
curInstanceBufferSize = (VkDeviceSize)((((instanceData->size() / 1000) + 1) * 1000) * sizeof(VkGeometryInstance));
}
BeginCommandBuffer(*ptrToInstance);
if (isBlas)
{
asInfo.instanceCount = 0;
asInfo.geometryCount = 1;
asInfo.pGeometries = inpGeom;
RTXInstance::vkCmdBuildAccelerationStructureNV(cmdBuffers[0], &asInfo, VK_NULL_HANDLE, 0, updateAction ? VK_TRUE : VK_FALSE, accelStruct, updateAction ? accelStruct : VK_NULL_HANDLE, scratchBuffer.buffer, 0);
}
else
{
if (instanceBuffer.getSize() < curInstanceBufferSize)
{
instanceBuffer.Buffer(MEMORY_HOST_VISIBLE | MEMORY_HOST_COHERENT, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_RTX, *ptrToInstance, nullptr, curInstanceBufferSize);
}
instanceBuffer.UploadSubData(0, instanceData->data(), (unsigned int)instanceData->size() * sizeof(VkGeometryInstance));
asInfo.instanceCount = (uint32_t)instanceData->size();
asInfo.geometryCount = 0;
asInfo.pGeometries = nullptr;
RTXInstance::vkCmdBuildAccelerationStructureNV(cmdBuffers[0], &asInfo, instanceBuffer.buffer, 0, updateAction ? VK_TRUE : VK_FALSE, accelStruct, updateAction ? accelStruct : VK_NULL_HANDLE, scratchBuffer.buffer, 0);
}
EndCommandBuffer();
SubmitCommandBuffer();
}
NVIDIA_RTX::RTXAccelStruct::RTXAccelStruct()
{
hasAccelStruct = false;
hasSubAlloc = false;
ptrToInstance = nullptr;
}
NVIDIA_RTX::RTXAccelStruct::~RTXAccelStruct()
{
RemovePast();
}
void KHR_RT::RTAccelStruct::RemovePast()
{
if (!ptrToInstance) return;
if (hasAccelStruct) RTInstance::vkDestroyAccelerationStructureKHR(ptrToInstance->device, accelStruct, nullptr);
hasAccelStruct = false;
ptrToInstance = nullptr;
}
void KHR_RT::RTAccelStruct::CreateAccelStruct(bool isBlas, VkAccelerationStructureGeometryKHR * inpGeom, VkAccelerationStructureBuildRangeInfoKHR * inpGeomOffset, std::vector * instanceData, InstanceClass & inpInstance)
{
ptrToInstance = &inpInstance;
if (isBlas)
{
VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeomInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR };
accelerationStructureBuildGeomInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR | VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR;
accelerationStructureBuildGeomInfo.geometryCount = 1;
accelerationStructureBuildGeomInfo.pGeometries = inpGeom;
accelerationStructureBuildGeomInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
accelerationStructureBuildGeomInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
VkAccelerationStructureBuildSizesInfoKHR sizeInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR };
RTInstance::vkGetAccelerationStructureBuildSizesKHR(Instance.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accelerationStructureBuildGeomInfo, &inpGeomOffset->primitiveCount, &sizeInfo);
VkAccelerationStructureCreateInfoKHR createInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR };
createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
createInfo.size = sizeInfo.accelerationStructureSize;
accelStructBuffer.Buffer(MEMORY_DEVICE_LOCAL, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_DEVICE_ADDRESS | USAGE_ACCEL_STRUCT, *ptrToInstance, nullptr, createInfo.size);
createInfo.buffer = accelStructBuffer.buffer;
VkResult result = RTInstance::vkCreateAccelerationStructureKHR(Instance.device, &createInfo, nullptr, &accelStruct);
if (result != VK_SUCCESS) { RemovePast(); throw std::runtime_error("Could not create accel struct"); }
hasAccelStruct = true;
accelerationStructureBuildGeomInfo.dstAccelerationStructure = accelStruct;
BufferClass *curScratchBuffer = new BufferClass(MEMORY_DEVICE_LOCAL, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_DEVICE_ADDRESS | USAGE_ACCEL_STRUCT | USAGE_SSBO, *ptrToInstance, nullptr, (unsigned int)sizeInfo.buildScratchSize);
VkBufferDeviceAddressInfo scratchBufferInfo{ VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO };
scratchBufferInfo.buffer = curScratchBuffer->buffer;
VkDeviceAddress scratchAddress = RTInstance::vkGetBufferDeviceAddressKHR(Instance.device, &scratchBufferInfo);
accelerationStructureBuildGeomInfo.scratchData.deviceAddress = scratchAddress;
BeginCommandBuffer(*ptrToInstance);
std::array inpGeomOffsetArray = { inpGeomOffset };
RTInstance::vkCmdBuildAccelerationStructuresKHR(cmdBuffers[0], 1, &accelerationStructureBuildGeomInfo, inpGeomOffsetArray.data());
EndCommandBuffer();
SubmitCommandBuffer();
delete curScratchBuffer;
}
else
{
VkDeviceSize curInstanceBufSize = (VkDeviceSize)(instanceData->size() * sizeof(VkAccelerationStructureInstanceKHR));
if (instanceBuffer.getSize() < curInstanceBufSize)
{
instanceBuffer.Buffer(MEMORY_HOST_VISIBLE | MEMORY_HOST_COHERENT, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_DEVICE_ADDRESS | USAGE_ACCEL_STRUCT | USAGE_SSBO, *ptrToInstance, instanceData->data(), curInstanceBufSize);
}
VkBufferDeviceAddressInfo bufferInfo{ VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO };
bufferInfo.buffer = instanceBuffer.buffer;
VkDeviceAddress instanceAddress = RTInstance::vkGetBufferDeviceAddressKHR(Instance.device, &bufferInfo);
BeginCommandBuffer(*ptrToInstance);
VkAccelerationStructureGeometryInstancesDataKHR instancesVk{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR };
instancesVk.arrayOfPointers = VK_FALSE;
instancesVk.data.deviceAddress = instanceAddress;
VkAccelerationStructureGeometryKHR topASGeometry{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR };
topASGeometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
topASGeometry.geometry.instances = instancesVk;
VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeomInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR };
accelerationStructureBuildGeomInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR | VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR;
accelerationStructureBuildGeomInfo.geometryCount = 1;
accelerationStructureBuildGeomInfo.pGeometries = &topASGeometry;
accelerationStructureBuildGeomInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
accelerationStructureBuildGeomInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
uint32_t instanceCount = (uint32_t)instanceData->size();
VkAccelerationStructureBuildSizesInfoKHR sizeInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR };
RTInstance::vkGetAccelerationStructureBuildSizesKHR(Instance.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accelerationStructureBuildGeomInfo, &instanceCount, &sizeInfo);
VkAccelerationStructureCreateInfoKHR createInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR };
createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
createInfo.size = sizeInfo.accelerationStructureSize;
accelStructBuffer.Buffer(MEMORY_DEVICE_LOCAL, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_DEVICE_ADDRESS | USAGE_ACCEL_STRUCT, *ptrToInstance, nullptr, createInfo.size);
createInfo.buffer = accelStructBuffer.buffer;
VkResult result = RTInstance::vkCreateAccelerationStructureKHR(Instance.device, &createInfo, nullptr, &accelStruct);
if (result != VK_SUCCESS) { RemovePast(); throw std::runtime_error("Could not create accel struct"); }
hasAccelStruct = true;
accelerationStructureBuildGeomInfo.dstAccelerationStructure = accelStruct;
BufferClass *curScratchBuffer = new BufferClass(MEMORY_DEVICE_LOCAL, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_DEVICE_ADDRESS | USAGE_ACCEL_STRUCT | USAGE_SSBO, *ptrToInstance, nullptr, (unsigned int)sizeInfo.buildScratchSize);
VkBufferDeviceAddressInfo scratchBufferInfo{ VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO };
scratchBufferInfo.buffer = curScratchBuffer->buffer;
VkDeviceAddress scratchAddress = RTInstance::vkGetBufferDeviceAddressKHR(Instance.device, &scratchBufferInfo);
accelerationStructureBuildGeomInfo.scratchData.deviceAddress = scratchAddress;
VkAccelerationStructureBuildRangeInfoKHR buildOffsetInfo{ static_cast(instanceCount), 0, 0, 0 };
const VkAccelerationStructureBuildRangeInfoKHR* pBuildOffsetInfo = &buildOffsetInfo;
RTInstance::vkCmdBuildAccelerationStructuresKHR(cmdBuffers[0], 1, &accelerationStructureBuildGeomInfo, &pBuildOffsetInfo);
EndCommandBuffer();
SubmitCommandBuffer();
delete curScratchBuffer;
}
}
void KHR_RT::RTAccelStruct::UpdateAccelStruct(bool isBlas, VkAccelerationStructureGeometryKHR * inpGeom, VkAccelerationStructureBuildRangeInfoKHR * inpGeomOffset, std::vector * instanceData)
{
if (!ptrToInstance) throw std::runtime_error("Cannot submit create accel struct request since a ptr to instance was not found");
if (isBlas)
{
VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeomInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR };
accelerationStructureBuildGeomInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR | VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR;
accelerationStructureBuildGeomInfo.geometryCount = 1;
accelerationStructureBuildGeomInfo.pGeometries = inpGeom;
accelerationStructureBuildGeomInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;
accelerationStructureBuildGeomInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
accelerationStructureBuildGeomInfo.srcAccelerationStructure = accelStruct;
accelerationStructureBuildGeomInfo.dstAccelerationStructure = accelStruct;
VkAccelerationStructureBuildSizesInfoKHR sizeInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR };
RTInstance::vkGetAccelerationStructureBuildSizesKHR(ptrToInstance->device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accelerationStructureBuildGeomInfo, &inpGeomOffset->primitiveCount, &sizeInfo);
BufferClass *curScratchBuffer = new BufferClass(MEMORY_DEVICE_LOCAL, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_DEVICE_ADDRESS | USAGE_ACCEL_STRUCT, *ptrToInstance, nullptr, (unsigned int)sizeInfo.buildScratchSize);
VkBufferDeviceAddressInfo scratchBufferInfo{ VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO };
scratchBufferInfo.buffer = curScratchBuffer->buffer;
VkDeviceAddress scratchAddress = RTInstance::vkGetBufferDeviceAddressKHR(Instance.device, &scratchBufferInfo);
accelerationStructureBuildGeomInfo.scratchData.deviceAddress = scratchAddress;
BeginCommandBuffer(*ptrToInstance);
RTInstance::vkCmdBuildAccelerationStructuresKHR(cmdBuffers[0], 1, &accelerationStructureBuildGeomInfo, &inpGeomOffset);
EndCommandBuffer();
SubmitCommandBuffer();
delete curScratchBuffer;
}
else
{
VkDeviceSize curInstanceBufSize = (VkDeviceSize)(instanceData->size() * sizeof(VkAccelerationStructureInstanceKHR));
if (instanceBuffer.getSize() < curInstanceBufSize)
{
instanceBuffer.Buffer(MEMORY_HOST_VISIBLE | MEMORY_HOST_COHERENT, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_DEVICE_ADDRESS | USAGE_ACCEL_STRUCT, *ptrToInstance, instanceData->data(), curInstanceBufSize);
}
VkBufferDeviceAddressInfo bufferInfo{ VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO };
bufferInfo.buffer = instanceBuffer.buffer;
VkDeviceAddress instanceAddress = RTInstance::vkGetBufferDeviceAddressKHR(Instance.device, &bufferInfo);
BeginCommandBuffer(*ptrToInstance);
VkAccelerationStructureGeometryInstancesDataKHR instancesVk{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR };
instancesVk.arrayOfPointers = VK_FALSE;
instancesVk.data.deviceAddress = instanceAddress;
VkAccelerationStructureGeometryKHR topASGeometry{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR };
topASGeometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
topASGeometry.geometry.instances = instancesVk;
VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeomInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR };
accelerationStructureBuildGeomInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR | VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR;
accelerationStructureBuildGeomInfo.geometryCount = 1;
accelerationStructureBuildGeomInfo.pGeometries = &topASGeometry;
accelerationStructureBuildGeomInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;
accelerationStructureBuildGeomInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
uint32_t instanceCount = (uint32_t)instanceData->size();
VkAccelerationStructureBuildSizesInfoKHR sizeInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR };
RTInstance::vkGetAccelerationStructureBuildSizesKHR(Instance.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accelerationStructureBuildGeomInfo, &instanceCount, &sizeInfo);
accelerationStructureBuildGeomInfo.srcAccelerationStructure = accelStruct;
accelerationStructureBuildGeomInfo.dstAccelerationStructure = accelStruct;
BufferClass *curScratchBuffer = new BufferClass(MEMORY_DEVICE_LOCAL, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_DEVICE_ADDRESS | USAGE_ACCEL_STRUCT, *ptrToInstance, nullptr, (unsigned int)sizeInfo.buildScratchSize);
VkBufferDeviceAddressInfo scratchBufferInfo{ VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO };
scratchBufferInfo.buffer = curScratchBuffer->buffer;
VkDeviceAddress scratchAddress = RTInstance::vkGetBufferDeviceAddressKHR(Instance.device, &scratchBufferInfo);
accelerationStructureBuildGeomInfo.scratchData.deviceAddress = scratchAddress;
VkAccelerationStructureBuildRangeInfoKHR buildOffsetInfo{ static_cast(instanceCount), 0, 0, 0 };
const VkAccelerationStructureBuildRangeInfoKHR* pBuildOffsetInfo = &buildOffsetInfo;
RTInstance::vkCmdBuildAccelerationStructuresKHR(cmdBuffers[0], 1, &accelerationStructureBuildGeomInfo, &pBuildOffsetInfo);
EndCommandBuffer();
SubmitCommandBuffer();
delete curScratchBuffer;
}
}
KHR_RT::RTAccelStruct::RTAccelStruct()
{
hasAccelStruct = false;
ptrToInstance = nullptr;
}
KHR_RT::RTAccelStruct::~RTAccelStruct()
{
RemovePast();
}
traceGeom
is VkGeometryNV
and on the KHR side it is VkAccelerationStructureGeometryKHR
.
Please note where I erroneously set traceGeom.geometry.aabbs
without really providing any real further data. I can't recall why I ever set this. I'm tempted to blame an old tutorial copy-paste situation.
Doing this with KHR_raytracing will give you a nasty data corruption case that will take you days to debug as traceGeom.geometry.aabbs
overlaps as a union with traceGeom.geometry.triangles
.
NVIDIA_RTX::RTXGeometry::RTXGeometry()
{
ptrToInstance = nullptr;
created = false;
dirty = false;
}
NVIDIA_RTX::RTXGeometry::~RTXGeometry()
{
ptrToInstance = nullptr;
created = false;
dirty = false;
}
void NVIDIA_RTX::RTXGeometry::SetGeom(BufferClass & vertBuffer, unsigned int vertexSize, bool isAlphaKeyed, InstanceClass & inpInstance)
{
if (vertBuffer.getSize() == 0) return;
completeVertexBufferRef = &vertBuffer;
ptrToInstance = &inpInstance;
traceGeom = {};
traceGeom.sType = VK_STRUCTURE_TYPE_GEOMETRY_NV;
traceGeom.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_NV;
traceGeom.geometry.triangles.sType = VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV;
traceGeom.geometry.triangles.vertexData = vertBuffer.buffer;
traceGeom.geometry.triangles.vertexOffset = 0;
traceGeom.geometry.triangles.vertexCount = vertBuffer.getSize() / vertexSize;
traceGeom.geometry.triangles.vertexStride = vertexSize;
traceGeom.geometry.triangles.vertexFormat = (VkFormat)R32G32B32F;
traceGeom.geometry.triangles.indexOffset = 0;
traceGeom.geometry.triangles.indexCount = 0;
traceGeom.geometry.triangles.indexType = VK_INDEX_TYPE_NONE_NV;
traceGeom.geometry.triangles.transformOffset = 0;
traceGeom.geometry.aabbs = { }; // Do not write any of these two lines... remove!
traceGeom.geometry.aabbs.sType = VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV; // Yeah, remove this too!
if (!isAlphaKeyed) traceGeom.flags = VK_GEOMETRY_OPAQUE_BIT_NV;
dirty = true;
}
void NVIDIA_RTX::RTXGeometry::CreateOrUpdate()
{
if (!created)
{
CreateAccelStruct(true, &traceGeom, nullptr, *ptrToInstance);
created = true;
dirty = false;
}
if (dirty)
{
SubmitCreateAccelStruct(true, &traceGeom, nullptr, true);
dirty = false;
}
}
void NVIDIA_RTX::RTXGeometry::SetDirty()
{
// This should be called when geometry changes inside a shader...
dirty = true;
}
KHR_RT::RTGeometry::RTGeometry()
{
ptrToInstance = nullptr;
created = false;
dirty = false;
}
KHR_RT::RTGeometry::~RTGeometry()
{
ptrToInstance = nullptr;
created = false;
dirty = false;
}
void KHR_RT::RTGeometry::SetGeom(BufferClass & vertBuffer, unsigned int vertexSize, bool isAlphaKeyed, InstanceClass & inpInstance)
{
if (vertBuffer.getSize() == 0) return;
completeVertexBufferRef = &vertBuffer;
ptrToInstance = &inpInstance;
VkBufferDeviceAddressInfo bufDevAdInfo = {};
bufDevAdInfo.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO;
bufDevAdInfo.buffer = vertBuffer.buffer;
bufferDeviceAddress = RTInstance::vkGetBufferDeviceAddressKHR(Instance.device, &bufDevAdInfo);
traceGeom = {};
traceGeom.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
traceGeom.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
traceGeom.geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
traceGeom.geometry.triangles.vertexFormat = (VkFormat)R32G32B32F;
traceGeom.geometry.triangles.vertexData.deviceAddress = bufferDeviceAddress;
traceGeom.geometry.triangles.vertexStride = vertexSize;
traceGeom.geometry.triangles.maxVertex = (vertBuffer.getSize() / vertexSize) - 1;
traceGeom.geometry.triangles.indexData.deviceAddress = (VkDeviceAddress)0;
traceGeom.geometry.triangles.indexType = VK_INDEX_TYPE_NONE_KHR;
traceGeom.geometry.triangles.transformData.deviceAddress = (VkDeviceAddress)0;
if (!isAlphaKeyed) traceGeom.flags = VK_GEOMETRY_OPAQUE_BIT_KHR;
traceGeomOffset = {};
traceGeomOffset.primitiveCount = vertBuffer.getSize() / (vertexSize * 3);
dirty = true;
}
void KHR_RT::RTGeometry::CreateOrUpdate()
{
if (!created)
{
CreateAccelStruct(true, &traceGeom, &traceGeomOffset, nullptr, *ptrToInstance);
created = true;
dirty = false;
}
if (dirty)
{
UpdateAccelStruct(true, &traceGeom, &traceGeomOffset, nullptr);
dirty = false;
}
}
void KHR_RT::RTGeometry::SetDirty()
{
// This should be called when geometry changes inside a shader...
dirty = true;
}
InstanceClass
is a Vulkan instance and InstanceProperties
is a geometry instance property struct. Make sure to not confuse these.
Additionally, allInstanceData
is populated and used as an instance property buffer (i.e. property fetching during ray hits for example).
You won't necessarily see what will happen to material
here. It will be used later to build a variable count descriptor set for materials.
Here's something that might catch your attention: in both Add(...)
and Update(...)
the instance transform is set to the identity matrix.
Why you might ask? All instances in this application are backed by memory... for reasons beyond the scope of this writing. The reasoning is however, somewhat related to how skinned crowds are transformed/re-fit.
If you do instance your BLASes, modifying this code to supply said 3x4 affine transformation matrix should be trivial.
In terms of Vulkan related material, most of what you should focus on is how curInst
is set. Especially the fact that a buffer device address is fed in rather than an acceleration structure handle.
The call to GetAccelerationStructureHandle
is more or less defensive programming... just making sure the acceleration structure handle is fetched in case it wasn't created.
void NVIDIA_RTX::RTXScene::RemovePast()
{
if (!ptrToInstance) return;
needUpdate = false;
needReCreation = false;
ptrToInstance = nullptr;
createdEssentials = false;
signifyNewScene = false;
}
NVIDIA_RTX::RTXScene::RTXScene()
{
needUpdate = false;
needReCreation = false;
ptrToInstance = nullptr;
createdEssentials = false;
signifyNewScene = false;
}
NVIDIA_RTX::RTXScene::~RTXScene()
{
RemovePast();
}
TraceItem NVIDIA_RTX::RTXScene::Add(std::vector & material, RTXGeometry & geom, InstanceProperties & inpInstanceProps, InstanceClass & inpInstance)
{
if (!ptrToInstance) ptrToInstance = &inpInstance;
if (geom.dirty || !geom.created)
{
if (!geom.created) needReCreation = true;
needUpdate = true;
geom.CreateOrUpdate();
}
TraceItem retVal;
retVal.geomRef = &geom;
retVal.material = material;
retVal.instanceData = inpInstanceProps;
retVal.itemId = mersenneTwister64BitPRNG ();
uint64_t accelerationStructureHandle;
VkResult code = RTXInstance::vkGetAccelerationStructureHandleNV(ptrToInstance->device, geom.accelStruct, sizeof(uint64_t), &accelerationStructureHandle);
if (code != VK_SUCCESS) { throw std::runtime_error("Could not get opaque handle for accel struct"); }
VkGeometryInstance curInst;
for (int i = 0; i != 12; i++)
curInst.transform[i] = ((i % 4) == (i / 4)) ? 1.0f : 0.0f;
curInst.instanceId = 0; // This will be assigned later
curInst.mask = 0xff;
curInst.instanceOffset = 0;
curInst.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV;
curInst.accelerationStructureHandle = accelerationStructureHandle;
retVal.rtxInstanceData = curInst;
allTraceItems[retVal.itemId] = retVal;
needReCreation = true;
return retVal;
}
void NVIDIA_RTX::RTXScene::Update(TraceItem inpTraceItem, InstanceProperties & inpInstanceProps)
{
TraceItem & curItem = allTraceItems[inpTraceItem.itemId];
for (int i = 0; i != 12; i++)
curItem.rtxInstanceData.transform[i] = ((i % 4) == (i / 4)) ? 1.0f : 0.0f;
curItem.instanceData = inpInstanceProps;
needUpdate = true;
}
void NVIDIA_RTX::RTXScene::Remove(TraceItem inpTraceItem)
{
allTraceItems.erase(inpTraceItem.itemId);
needReCreation = true;
}
void NVIDIA_RTX::RTXScene::CreateOrRecreate()
{
if (createdEssentials) RTXAccelStruct::~RTXAccelStruct();
createdEssentials = true;
CreateInstanceData(instances);
try { CreateAccelStruct(false, nullptr, &instances, *ptrToInstance); }
catch (...) { RemovePast(); throw; }
}
void NVIDIA_RTX::RTXScene::CreateInstanceData(std::vector & instances)
{
instances.clear();
instances.reserve(allTraceItems.size());
unsigned int bufferInstanceCount = (((unsigned int)allTraceItems.size() / 1000) + 1) * 1000;
allInstanceData.clear();
allInstanceData.reserve(bufferInstanceCount);
if (((unsigned int)instancePropertiesBuffer.getSize()) / ((unsigned int)sizeof(InstanceProperties)) < bufferInstanceCount)
{
instancePropertiesBuffer.Buffer(MEMORY_HOST_VISIBLE, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_SSBO, *ptrToInstance, nullptr, (VkDeviceSize)(bufferInstanceCount * (unsigned int)sizeof(InstanceProperties)));
}
unsigned int instanceIdCount = 0;
for (std::pair & traceItemKV : allTraceItems)
{
traceItemKV.second.rtxInstanceData.instanceId = instanceIdCount;
instances.push_back(traceItemKV.second.rtxInstanceData);
allInstanceData.push_back(traceItemKV.second.instanceData);
instanceIdCount++;
}
instancePropertiesBuffer.UploadSubData(0, allInstanceData.data(), (unsigned int)allInstanceData.size() * sizeof(InstanceProperties));
}
void NVIDIA_RTX::RTXScene::Refresh()
{
for (std::pair & curTraceItemKV : allTraceItems)
{
TraceItem & curTraceItem = curTraceItemKV.second;
if (curTraceItem.geomRef->dirty || !curTraceItem.geomRef->created)
{
bool thisPieceNeedsRecreation = false;
if (!curTraceItem.geomRef->created)
{
needReCreation = true;
thisPieceNeedsRecreation = true;
}
needUpdate = true;
curTraceItem.geomRef->CreateOrUpdate();
if (thisPieceNeedsRecreation)
{
uint64_t accelerationStructureHandle;
VkResult code = RTXInstance::vkGetAccelerationStructureHandleNV(ptrToInstance->device, curTraceItem.geomRef->accelStruct, sizeof(uint64_t), &accelerationStructureHandle);
if (code != VK_SUCCESS) { throw std::runtime_error("Could not get opaque handle for accel struct"); }
curTraceItem.rtxInstanceData.accelerationStructureHandle = accelerationStructureHandle;
}
}
}
if (needReCreation)
{
CreateOrRecreate();
needReCreation = false;
needUpdate = false;
signifyNewScene = true;
}
if (needUpdate)
{
CreateInstanceData(instances);
SubmitCreateAccelStruct(false, nullptr, &instances, true);
needUpdate = false;
}
}
bool NVIDIA_RTX::RTXScene::IsSceneNew()
{
return signifyNewScene;
}
void NVIDIA_RTX::RTXScene::MarkSceneOld()
{
signifyNewScene = false;
}
void KHR_RT::RTScene::RemovePast()
{
if (!ptrToInstance) return;
needUpdate = false;
needReCreation = false;
ptrToInstance = nullptr;
createdEssentials = false;
signifyNewScene = false;
}
KHR_RT::RTScene::RTScene()
{
needUpdate = false;
needReCreation = false;
ptrToInstance = nullptr;
createdEssentials = false;
signifyNewScene = false;
}
KHR_RT::RTScene::~RTScene()
{
RemovePast();
}
TraceItem KHR_RT::RTScene::Add(std::vector & material, RTGeometry & geom, InstanceProperties & inpInstanceProps, InstanceClass & inpInstance)
{
if (!ptrToInstance) ptrToInstance = &inpInstance;
if (geom.dirty || !geom.created)
{
if (!geom.created) needReCreation = true;
needUpdate = true;
geom.CreateOrUpdate();
}
TraceItem retVal;
retVal.geomRef = &geom;
retVal.material = material;
retVal.instanceData = inpInstanceProps;
retVal.itemId = mersenneTwister64BitPRNG ();
VkAccelerationStructureDeviceAddressInfoKHR addressInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR };
addressInfo.accelerationStructure = geom.accelStruct;
VkDeviceAddress blasAddress = RTInstance::vkGetAccelerationStructureDeviceAddressKHR(inpInstance.device, &addressInfo);
VkAccelerationStructureInstanceKHR curInst;
for (int i = 0; i != 3; i++)
for (int j = 0; j != 4; j++)
curInst.transform.matrix[i][j] = (i == j) ? 1.0f : 0.0f;
curInst.instanceCustomIndex = 0; // This will be assigned later
curInst.mask = 0xff;
curInst.instanceShaderBindingTableRecordOffset = 0;
curInst.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
curInst.accelerationStructureReference = blasAddress;
retVal.rtInstanceData = curInst;
allTraceItems[retVal.itemId] = retVal;
needReCreation = true;
return retVal;
}
void KHR_RT::RTScene::Update(TraceItem inpTraceItem, InstanceProperties & inpInstanceProps)
{
TraceItem & curItem = allTraceItems[inpTraceItem.itemId];
curItem.instanceData = inpInstanceProps;
needUpdate = true;
}
void KHR_RT::RTScene::Remove(TraceItem inpTraceItem)
{
allTraceItems.erase(inpTraceItem.itemId);
needReCreation = true;
}
void KHR_RT::RTScene::CreateOrRecreate()
{
if (createdEssentials) RTAccelStruct::~RTAccelStruct();
createdEssentials = true;
CreateInstanceData(instances);
try { CreateAccelStruct(false, nullptr, nullptr, &instances, *ptrToInstance); }
catch (...) { RemovePast(); throw; }
}
void KHR_RT::RTScene::CreateInstanceData(std::vector & instances)
{
instances.clear();
instances.reserve(allTraceItems.size());
unsigned int bufferInstanceCount = (((unsigned int)allTraceItems.size() / 1000) + 1) * 1000;
allInstanceData.clear();
allInstanceData.reserve(bufferInstanceCount);
if (((unsigned int)instancePropertiesBuffer.getSize()) / ((unsigned int)sizeof(InstanceProperties)) < bufferInstanceCount)
{
instancePropertiesBuffer.Buffer(MEMORY_HOST_VISIBLE, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_SSBO, *ptrToInstance, nullptr, (VkDeviceSize)(bufferInstanceCount * (unsigned int)sizeof(InstanceProperties)));
}
unsigned int instanceIdCount = 0;
for (std::pair & traceItemKV : allTraceItems)
{
traceItemKV.second.rtInstanceData.instanceCustomIndex = instanceIdCount;
instances.push_back(traceItemKV.second.rtInstanceData);
allInstanceData.push_back(traceItemKV.second.instanceData);
instanceIdCount++;
}
instancePropertiesBuffer.UploadSubData(0, allInstanceData.data(), (unsigned int)allInstanceData.size() * sizeof(InstanceProperties));
}
void KHR_RT::RTScene::Refresh()
{
for (std::pair & curTraceItemKV : allTraceItems)
{
TraceItem & curTraceItem = curTraceItemKV.second;
if (curTraceItem.geomRef->dirty || !curTraceItem.geomRef->created)
{
bool thisPieceNeedsRecreation = false;
if (!curTraceItem.geomRef->created)
{
needReCreation = true;
thisPieceNeedsRecreation = true;
}
needUpdate = true;
curTraceItem.geomRef->CreateOrUpdate();
if (thisPieceNeedsRecreation)
{
VkAccelerationStructureDeviceAddressInfoKHR addressInfo{ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR };
addressInfo.accelerationStructure = curTraceItem.geomRef->accelStruct;
curTraceItem.rtInstanceData.accelerationStructureReference = RTInstance::vkGetAccelerationStructureDeviceAddressKHR(ptrToInstance->device, &addressInfo);
}
}
}
if (needReCreation)
{
CreateOrRecreate();
needReCreation = false;
needUpdate = false;
signifyNewScene = true;
}
if (needUpdate)
{
CreateInstanceData(instances);
UpdateAccelStruct(false, nullptr, nullptr, &instances);
needUpdate = false;
}
}
bool KHR_RT::RTScene::IsSceneNew()
{
return signifyNewScene;
}
void KHR_RT::RTScene::MarkSceneOld()
{
signifyNewScene = false;
}
void NVIDIA_RTX::RTXPipelineStateClass::ErasePipelineState()
{
if (!ptrToInstance) return;
if (haveRTXPipeline) vkDestroyPipeline(ptrToInstance->device, pipeline, nullptr);
if (havePipelineLayout) vkDestroyPipelineLayout(ptrToInstance->device, pipelineLayout, nullptr);
if (havePipelineCache) vkDestroyPipelineCache(ptrToInstance->device, pipelineCache, nullptr);
havePipelineLayout = false;
haveRTXPipeline = false;
havePipelineCache = false;
ptrToInstance = nullptr;
}
NVIDIA_RTX::RTXPipelineStateClass::RTXPipelineStateClass()
{
havePipelineLayout = false;
haveRTXPipeline = false;
havePipelineCache = false;
ptrToInstance = nullptr;
}
NVIDIA_RTX::RTXPipelineStateClass::RTXPipelineStateClass(InstanceClass & renderInst, DescriptorSetLayout & DescSetLayout, ShaderResourceSet & inpShader)
{
ptrToInstance = &renderInst;
VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {};
pPipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pPipelineLayoutCreateInfo.setLayoutCount = (uint32_t)DescSetLayout.descriptorSetLayouts.size();
pPipelineLayoutCreateInfo.pSetLayouts = DescSetLayout.descriptorSetLayouts.data();
VkResult result = vkCreatePipelineLayout(ptrToInstance->device, &pPipelineLayoutCreateInfo, nullptr, &pipelineLayout);
if (result != VK_SUCCESS) { ErasePipelineState(); throw std::runtime_error("Could not create pipeline layout"); }
havePipelineLayout = true;
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rtx_raygen_shader, inpShader.getRaygenEntry(), VK_SHADER_STAGE_RAYGEN_BIT_NV)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create raygen shader"); }
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rtx_raymiss_shader, inpShader.getRaymissEntry(), VK_SHADER_STAGE_MISS_BIT_NV)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create raymiss shader"); }
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rtx_raychit_shader, inpShader.getRaychitEntry(), VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create raychit shader"); }
if (inpShader.getRayahitEntry())
{
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rtx_rayahit_shader, inpShader.getRayahitEntry(), VK_SHADER_STAGE_ANY_HIT_BIT_NV)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create rayahit shader"); }
}
shaderGroups.push_back({ VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, nullptr, VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, 0, VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV });
shaderGroups.push_back({ VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, nullptr, VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, 1, VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV });
shaderGroups.push_back({ VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, nullptr, VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, VK_SHADER_UNUSED_NV, 2, inpShader.getRayahitEntry() ? 3 : VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV });
VkRayTracingPipelineCreateInfoNV pipelineCreateInfo = {};
pipelineCreateInfo.stageCount = (uint32_t)shaderStages.size();
pipelineCreateInfo.pStages = shaderStages.data();
pipelineCreateInfo.groupCount = (uint32_t)shaderGroups.size();
pipelineCreateInfo.pGroups = shaderGroups.data();
pipelineCreateInfo.maxRecursionDepth = 8;
pipelineCreateInfo.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV;
pipelineCreateInfo.layout = pipelineLayout;
VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {};
pipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
result = vkCreatePipelineCache(ptrToInstance->device, &pipelineCacheCreateInfo, nullptr, &pipelineCache);
if (result != VK_SUCCESS) { throw std::runtime_error("Could not create a pipeline cache"); }
havePipelineCache = true;
result = RTXInstance::vkCreateRayTracingPipelinesNV(ptrToInstance->device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &pipeline);
if (result != VK_SUCCESS) { ErasePipelineState(); throw std::runtime_error("Could not create rtx pipeline"); }
haveRTXPipeline = true;
/* We were not aligning here... */
uint32_t shaderBindingTableSize = RTXInstance::raytracingProperties.shaderGroupHandleSize * (uint32_t)shaderGroups.size();
shaderBindingTable.Buffer(MEMORY_HOST_VISIBLE, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_SRC, Instance, nullptr, (VkDeviceSize)shaderBindingTableSize);
unsigned char *sbtData = new unsigned char[shaderBindingTableSize];
result = RTXInstance::vkGetRayTracingShaderGroupHandlesNV(ptrToInstance->device, pipeline, 0, (uint32_t)shaderGroups.size(), shaderBindingTableSize, sbtData);
if (result != VK_SUCCESS) { ErasePipelineState(); throw std::runtime_error("Could not get shader group handle"); }
shaderBindingTable.UploadSubData(0, sbtData, shaderBindingTableSize);
delete[] sbtData;
}
bool NVIDIA_RTX::RTXPipelineStateClass::IsInitialized()
{
return haveRTXPipeline;
}
NVIDIA_RTX::RTXPipelineStateClass::~RTXPipelineStateClass()
{
ErasePipelineState();
}
void NVIDIA_RTX::RTXPipelineStateClass::ErasePipelineState()
{
if (!ptrToInstance) return;
if (haveRTXPipeline) vkDestroyPipeline(ptrToInstance->device, pipeline, nullptr);
if (havePipelineLayout) vkDestroyPipelineLayout(ptrToInstance->device, pipelineLayout, nullptr);
if (havePipelineCache) vkDestroyPipelineCache(ptrToInstance->device, pipelineCache, nullptr);
havePipelineLayout = false;
haveRTXPipeline = false;
havePipelineCache = false;
ptrToInstance = nullptr;
}
NVIDIA_RTX::RTXPipelineStateClass::RTXPipelineStateClass()
{
havePipelineLayout = false;
haveRTXPipeline = false;
havePipelineCache = false;
ptrToInstance = nullptr;
}
NVIDIA_RTX::RTXPipelineStateClass::RTXPipelineStateClass(InstanceClass & renderInst, DescriptorSetLayout & DescSetLayout, ShaderResourceSet & inpShader)
{
ptrToInstance = &renderInst;
VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {};
pPipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pPipelineLayoutCreateInfo.setLayoutCount = (uint32_t)DescSetLayout.descriptorSetLayouts.size();
pPipelineLayoutCreateInfo.pSetLayouts = DescSetLayout.descriptorSetLayouts.data();
VkResult result = vkCreatePipelineLayout(ptrToInstance->device, &pPipelineLayoutCreateInfo, nullptr, &pipelineLayout);
if (result != VK_SUCCESS) { ErasePipelineState(); throw std::runtime_error("Could not create pipeline layout"); }
havePipelineLayout = true;
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rtx_raygen_shader, inpShader.getRaygenEntry(), VK_SHADER_STAGE_RAYGEN_BIT_NV)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create raygen shader"); }
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rtx_raymiss_shader, inpShader.getRaymissEntry(), VK_SHADER_STAGE_MISS_BIT_NV)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create raymiss shader"); }
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rtx_raychit_shader, inpShader.getRaychitEntry(), VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create raychit shader"); }
if (inpShader.getRayahitEntry())
{
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rtx_rayahit_shader, inpShader.getRayahitEntry(), VK_SHADER_STAGE_ANY_HIT_BIT_NV)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create rayahit shader"); }
}
shaderGroups.push_back({ VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, nullptr, VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, 0, VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV });
shaderGroups.push_back({ VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, nullptr, VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, 1, VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV });
shaderGroups.push_back({ VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, nullptr, VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, VK_SHADER_UNUSED_NV, 2, inpShader.getRayahitEntry() ? 3 : VK_SHADER_UNUSED_NV, VK_SHADER_UNUSED_NV });
VkRayTracingPipelineCreateInfoNV pipelineCreateInfo = {};
pipelineCreateInfo.stageCount = (uint32_t)shaderStages.size();
pipelineCreateInfo.pStages = shaderStages.data();
pipelineCreateInfo.groupCount = (uint32_t)shaderGroups.size();
pipelineCreateInfo.pGroups = shaderGroups.data();
pipelineCreateInfo.maxRecursionDepth = 8;
pipelineCreateInfo.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV;
pipelineCreateInfo.layout = pipelineLayout;
VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {};
pipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
result = vkCreatePipelineCache(ptrToInstance->device, &pipelineCacheCreateInfo, nullptr, &pipelineCache);
if (result != VK_SUCCESS) { throw std::runtime_error("Could not create a pipeline cache"); }
havePipelineCache = true;
result = RTXInstance::vkCreateRayTracingPipelinesNV(ptrToInstance->device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &pipeline);
if (result != VK_SUCCESS) { ErasePipelineState(); throw std::runtime_error("Could not create rtx pipeline"); }
haveRTXPipeline = true;
/* We are now aligning! */
unsigned int sbtChunkSize = (unsigned int)ceil((double)RTXInstance::raytracingProperties.shaderGroupHandleSize / (double)RTXInstance::raytracingProperties.shaderGroupBaseAlignment) * RTXInstance::raytracingProperties.shaderGroupBaseAlignment;
uint32_t shaderBindingTableSize = RTXInstance::raytracingProperties.shaderGroupHandleSize * (uint32_t)shaderGroups.size();
uint32_t shaderBindingTableSizeAligned = sbtChunkSize * (uint32_t)shaderGroups.size();
shaderBindingTable.Buffer(MEMORY_HOST_VISIBLE, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_SRC, Instance, nullptr, (VkDeviceSize)shaderBindingTableSizeAligned);
unsigned char *sbtData = new unsigned char[shaderBindingTableSize];
unsigned char *sbtDataAligned = new unsigned char[shaderBindingTableSizeAligned];
result = RTXInstance::vkGetRayTracingShaderGroupHandlesNV(ptrToInstance->device, pipeline, 0, (uint32_t)shaderGroups.size(), shaderBindingTableSize, sbtData);
if (result != VK_SUCCESS) { ErasePipelineState(); throw std::runtime_error("Could not get shader group handle"); }
for (int i = 0; i != (uint32_t)shaderGroups.size(); i++)
memcpy(&sbtDataAligned[i * sbtChunkSize], &sbtData[i * RTXInstance::raytracingProperties.shaderGroupHandleSize], RTXInstance::raytracingProperties.shaderGroupHandleSize);
shaderBindingTable.UploadSubData(0, sbtDataAligned, shaderBindingTableSizeAligned);
delete[] sbtData;
delete[] sbtDataAligned;
}
bool NVIDIA_RTX::RTXPipelineStateClass::IsInitialized()
{
return haveRTXPipeline;
}
NVIDIA_RTX::RTXPipelineStateClass::~RTXPipelineStateClass()
{
ErasePipelineState();
}
void KHR_RT::RTPipelineStateClass::ErasePipelineState()
{
if (!ptrToInstance) return;
if (haveRTPipeline) vkDestroyPipeline(ptrToInstance->device, pipeline, nullptr);
if (havePipelineLayout) vkDestroyPipelineLayout(ptrToInstance->device, pipelineLayout, nullptr);
if (havePipelineCache) vkDestroyPipelineCache(ptrToInstance->device, pipelineCache, nullptr);
havePipelineLayout = false;
haveRTPipeline = false;
havePipelineCache = false;
ptrToInstance = nullptr;
}
KHR_RT::RTPipelineStateClass::RTPipelineStateClass()
{
havePipelineLayout = false;
haveRTPipeline = false;
havePipelineCache = false;
ptrToInstance = nullptr;
}
KHR_RT::RTPipelineStateClass::RTPipelineStateClass(InstanceClass & renderInst, DescriptorSetLayout & DescSetLayout, ShaderResourceSet & inpShader)
{
ptrToInstance = &renderInst;
VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {};
pPipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pPipelineLayoutCreateInfo.setLayoutCount = (uint32_t)DescSetLayout.descriptorSetLayouts.size();
pPipelineLayoutCreateInfo.pSetLayouts = DescSetLayout.descriptorSetLayouts.data();
VkResult result = vkCreatePipelineLayout(ptrToInstance->device, &pPipelineLayoutCreateInfo, nullptr, &pipelineLayout);
if (result != VK_SUCCESS) { ErasePipelineState(); throw std::runtime_error("Could not create pipeline layout"); }
havePipelineLayout = true;
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rt_raygen_shader, inpShader.getRaygenEntry(), VK_SHADER_STAGE_RAYGEN_BIT_KHR)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create raygen shader"); }
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rt_raymiss_shader, inpShader.getRaymissEntry(), VK_SHADER_STAGE_MISS_BIT_KHR)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create raymiss shader"); }
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rt_raychit_shader, inpShader.getRaychitEntry(), VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create raychit shader"); }
if (inpShader.getRayahitEntry())
{
try { shaderStages.push_back(AddOrFindCachedShaderStage(*ptrToInstance, inpShader.rt_rayahit_shader, inpShader.getRayahitEntry(), VK_SHADER_STAGE_ANY_HIT_BIT_KHR)->elem.stage); }
catch (...) { ErasePipelineState(); throw std::runtime_error("Could not create rayahit shader"); }
}
shaderGroups.push_back({ VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, nullptr, VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, 0, VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR });
shaderGroups.push_back({ VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, nullptr, VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, 1, VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR });
shaderGroups.push_back({ VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, nullptr, VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, VK_SHADER_UNUSED_KHR, 2, inpShader.getRayahitEntry() ? 3 : VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR });
VkRayTracingPipelineCreateInfoKHR pipelineCreateInfo = {};
pipelineCreateInfo.stageCount = (uint32_t)shaderStages.size();
pipelineCreateInfo.pStages = shaderStages.data();
pipelineCreateInfo.groupCount = (uint32_t)shaderGroups.size();
pipelineCreateInfo.pGroups = shaderGroups.data();
pipelineCreateInfo.maxPipelineRayRecursionDepth = 8;
pipelineCreateInfo.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR;
pipelineCreateInfo.layout = pipelineLayout;
VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {};
pipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
result = vkCreatePipelineCache(ptrToInstance->device, &pipelineCacheCreateInfo, nullptr, &pipelineCache);
if (result != VK_SUCCESS) { throw std::runtime_error("Could not create a pipeline cache"); }
havePipelineCache = true;
result = RTInstance::vkCreateRayTracingPipelinesKHR(ptrToInstance->device, VK_NULL_HANDLE, pipelineCache, 1, &pipelineCreateInfo, nullptr, &pipeline);
if (result != VK_SUCCESS) { ErasePipelineState(); throw std::runtime_error("Could not create rt pipeline"); }
haveRTPipeline = true;
/* Aligning as per the spec. */
unsigned int sbtChunkSize = (RTInstance::raytracingPipelineProperties.shaderGroupHandleSize + (RTInstance::raytracingPipelineProperties.shaderGroupBaseAlignment - 1)) & (~(RTInstance::raytracingPipelineProperties.shaderGroupBaseAlignment - 1));
uint32_t shaderBindingTableSize = RTInstance::raytracingPipelineProperties.shaderGroupHandleSize * (uint32_t)shaderGroups.size();
uint32_t shaderBindingTableSizeAligned = sbtChunkSize * (uint32_t)shaderGroups.size();
shaderBindingTable.Buffer(MEMORY_HOST_VISIBLE | MEMORY_HOST_COHERENT, SHARING_EXCLUSIVE, MODE_CREATE, USAGE_SRC | USAGE_SBT | USAGE_DEVICE_ADDRESS, Instance, nullptr, (VkDeviceSize)shaderBindingTableSizeAligned);
unsigned char *sbtData = new unsigned char[shaderBindingTableSize];
unsigned char *sbtDataAligned = new unsigned char[shaderBindingTableSizeAligned];
result = RTInstance::vkGetRayTracingShaderGroupHandlesKHR(ptrToInstance->device, pipeline, 0, (uint32_t)shaderGroups.size(), shaderBindingTableSize, sbtData);
if (result != VK_SUCCESS) { ErasePipelineState(); throw std::runtime_error("Could not get shader group handle"); }
for (int i = 0; i != (uint32_t)shaderGroups.size(); i++)
memcpy(&sbtDataAligned[i * sbtChunkSize], &sbtData[i * RTInstance::raytracingPipelineProperties.shaderGroupHandleSize], RTInstance::raytracingPipelineProperties.shaderGroupHandleSize);
shaderBindingTable.UploadSubData(0, sbtDataAligned, shaderBindingTableSizeAligned);
delete[] sbtData;
delete[] sbtDataAligned;
}
bool KHR_RT::RTPipelineStateClass::IsInitialized()
{
return haveRTPipeline;
}
KHR_RT::RTPipelineStateClass::~RTPipelineStateClass()
{
ErasePipelineState();
}
NVIDIA_RTX::RTXTracelet::RTXTracelet()
{
ptrToInstance = nullptr;
}
NVIDIA_RTX::RTXTracelet::RTXTracelet(InstanceClass & inpInstance)
{
ptrToInstance = &inpInstance;
traceSem.Semaphore(ptrToInstance);
}
void NVIDIA_RTX::RTXTracelet::Submit(unsigned int inpWidth, unsigned int inpHeight, unsigned int inpDepth, std::vector & inpTracingResources, bool updateResources, ShaderResourceSet & inpRTXShaderResourceSet)
{
if (!ptrToInstance) return;
if (updateResources) recordedCmdBuf = false;
if (!recordedCmdBuf)
{
if (!recordedPSO)
{
DSL.CreateDescriptorSetLayout(inpTracingResources, ptrToInstance);
PSO.RTXPipelineState(*ptrToInstance, DSL, inpRTXShaderResourceSet);
DS.Make(&DSL);
DS.WriteDescriptorSets(inpTracingResources);
recordedPSO = true;
}
else if (updateResources)
{
DS.RewriteDescriptorSets(inpTracingResources);
}
BeginCommandBuffer(*ptrToInstance);
vkCmdBindPipeline(cmdBuffers[0], VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, PSO.pipeline);
vkCmdBindDescriptorSets(cmdBuffers[0], VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, PSO.pipelineLayout, 0, (uint32_t)DS.descriptorSets.size(), DS.descriptorSets.data(), 0, 0);
unsigned int sbtChunkSize = (unsigned int)ceil((double)RTXInstance::raytracingProperties.shaderGroupHandleSize / (double)RTXInstance::raytracingProperties.shaderGroupBaseAlignment) * RTXInstance::raytracingProperties.shaderGroupBaseAlignment;
RTXInstance::vkCmdTraceRaysNV(cmdBuffers[0],
PSO.shaderBindingTable.buffer, 0,
PSO.shaderBindingTable.buffer, 1 * sbtChunkSize, sbtChunkSize,
PSO.shaderBindingTable.buffer, 2 * sbtChunkSize, sbtChunkSize,
VK_NULL_HANDLE, 0, 0,
inpWidth, inpHeight, inpDepth);
EndCommandBuffer();
recordedCmdBuf = true;
}
WaitSubmitSignalCommandBuffer(submissionMode, traceSem);
}
void NVIDIA_RTX::RTXTracelet::makeAsync()
{
submissionMode = SUBMIT_ASYNC;
}
void NVIDIA_RTX::RTXTracelet::makeSerial()
{
submissionMode = SUBMIT_SERIAL;
}
KHR_RT::RTTracelet::RTTracelet()
{
ptrToInstance = nullptr;
}
KHR_RT::RTTracelet::RTTracelet(InstanceClass & inpInstance)
{
ptrToInstance = &inpInstance;
traceSem.Semaphore(ptrToInstance);
}
void KHR_RT::RTTracelet::Submit(unsigned int inpWidth, unsigned int inpHeight, unsigned int inpDepth, std::vector & inpTracingResources, bool updateResources, ShaderResourceSet & inpRTShaderResourceSet)
{
if (!ptrToInstance) return;
if (updateResources) recordedCmdBuf = false;
if (!recordedCmdBuf)
{
if (!recordedPSO)
{
DSL.CreateDescriptorSetLayout(inpTracingResources, ptrToInstance);
PSO.RTPipelineState(*ptrToInstance, DSL, inpRTShaderResourceSet);
DS.Make(&DSL);
DS.WriteDescriptorSets(inpTracingResources);
recordedPSO = true;
}
else if (updateResources)
{
DS.RewriteDescriptorSets(inpTracingResources);
}
BeginCommandBuffer(*ptrToInstance);
vkCmdBindPipeline(cmdBuffers[0], VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, PSO.pipeline);
vkCmdBindDescriptorSets(cmdBuffers[0], VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, PSO.pipelineLayout, 0, (uint32_t)DS.descriptorSets.size(), DS.descriptorSets.data(), 0, 0);
unsigned int sbtChunkSize = (RTInstance::raytracingPipelineProperties.shaderGroupHandleSize + (RTInstance::raytracingPipelineProperties.shaderGroupBaseAlignment - 1)) & (~(RTInstance::raytracingPipelineProperties.shaderGroupBaseAlignment - 1));
VkBufferDeviceAddressInfo bufDevAdInfo{ VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO };
bufDevAdInfo.buffer = PSO.shaderBindingTable.buffer;
VkDeviceAddress sbtDeviceAddress = RTInstance::vkGetBufferDeviceAddressKHR(Instance.device, &bufDevAdInfo);
std::array strideAddresses{
VkStridedDeviceAddressRegionKHR{sbtDeviceAddress + 0u * sbtChunkSize, sbtChunkSize, sbtChunkSize},
VkStridedDeviceAddressRegionKHR{sbtDeviceAddress + 1u * sbtChunkSize, sbtChunkSize, sbtChunkSize},
VkStridedDeviceAddressRegionKHR{sbtDeviceAddress + 2u * sbtChunkSize, sbtChunkSize, sbtChunkSize},
VkStridedDeviceAddressRegionKHR{0u,0u,0u} };
RTInstance::vkCmdTraceRaysKHR(cmdBuffers[0], &strideAddresses[0], &strideAddresses[1], &strideAddresses[2], &strideAddresses[3], inpWidth, inpHeight, inpDepth);
EndCommandBuffer();
recordedCmdBuf = true;
}
WaitSubmitSignalCommandBuffer(submissionMode, traceSem);
}
void KHR_RT::RTTracelet::makeAsync()
{
submissionMode = SUBMIT_ASYNC;
}
void KHR_RT::RTTracelet::makeSerial()
{
submissionMode = SUBMIT_SERIAL;
}
for %%A in (*.rgen) do glslangvalidator -V %%A -o %%A.spv
for %%A in (*.rchit) do glslangvalidator -V %%A -o %%A.spv
for %%A in (*.rmiss) do glslangvalidator -V %%A -o %%A.spv
for %%A in (*.rahit) do glslangvalidator -V %%A -o %%A.spv
for %%A in (*.rgen) do glslangvalidator --target-env spirv1.4 -V %%A -o %%A.spv
for %%A in (*.rchit) do glslangvalidator --target-env spirv1.4 -V %%A -o %%A.spv
for %%A in (*.rmiss) do glslangvalidator --target-env spirv1.4 -V %%A -o %%A.spv
for %%A in (*.rahit) do glslangvalidator --target-env spirv1.4 -V %%A -o %%A.spv
#extension GL_NV_ray_tracing : require
with #extension GL_EXT_ray_tracing : require
traceNV
is now traceRayEXT
. Uses exact same parameters.
ignoreIntersectionNV();
is now ignoreIntersectionEXT;
. Same deal for terminateRay
.NV
should be now ending with EXT
.