diff options
Diffstat (limited to 'src/layer.cc')
| -rw-r--r-- | src/layer.cc | 586 |
1 files changed, 336 insertions, 250 deletions
diff --git a/src/layer.cc b/src/layer.cc index 5e652f0..cead7cd 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -5,6 +5,9 @@ #include <unordered_map> #include <utility> +// hack +#include <deque> + #include <vulkan/utility/vk_dispatch_table.h> #include <vulkan/vk_layer.h> #include <vulkan/vk_platform.h> @@ -16,6 +19,7 @@ #include "instance_context.hh" #include "layer_context.hh" #include "queue_context.hh" +#include "timestamp_pool.hh" namespace low_latency { @@ -25,8 +29,9 @@ LayerContext layer_context; } // namespace -template <typename T, typename sType> -static T* get_link_info(const void* const head, const sType& stype) { +template <typename T, typename sType, typename fType> +static T* get_link_info(const void* const head, const sType& stype, + const fType& ftype) { for (auto i = reinterpret_cast<const VkBaseInStructure*>(head); i; i = i->pNext) { @@ -35,7 +40,7 @@ static T* get_link_info(const void* const head, const sType& stype) { } const auto info = reinterpret_cast<const T*>(i); - if (info->function != VK_LAYER_LINK_INFO) { + if (info->function != ftype) { continue; } @@ -49,7 +54,8 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) { const auto link_info = get_link_info<VkLayerInstanceCreateInfo>( - pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO); + pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO, + VK_LAYER_LINK_INFO); if (!link_info || !link_info->u.pLayerInfo) { return VK_ERROR_INITIALIZATION_FAILED; @@ -78,23 +84,23 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, } const auto key = layer_context.get_key(*pInstance); + +#define INSTANCE_VTABLE_LOAD(name) \ + .name = reinterpret_cast<PFN_vk##name>(gipa(*pInstance, "vk" #name)) auto vtable = VkuInstanceDispatchTable{ - .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( - gipa(*pInstance, "vkDestroyInstance")), - .EnumeratePhysicalDevices = - reinterpret_cast<PFN_vkEnumeratePhysicalDevices>( - gipa(*pInstance, "vkEnumeratePhysicalDevices")), - .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>( - gipa(*pInstance, "vkGetInstanceProcAddr")), - .EnumerateDeviceExtensionProperties = - reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>( - gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")), + INSTANCE_VTABLE_LOAD(DestroyInstance), + INSTANCE_VTABLE_LOAD(EnumeratePhysicalDevices), + INSTANCE_VTABLE_LOAD(GetInstanceProcAddr), + INSTANCE_VTABLE_LOAD(CreateDevice), + INSTANCE_VTABLE_LOAD(EnumerateDeviceExtensionProperties), }; +#undef INSTANCE_VTABLE_LOAD const auto lock = std::scoped_lock{layer_context.mutex}; assert(!layer_context.contexts.contains(key)); + layer_context.contexts.try_emplace( - key, std::make_unique<InstanceContext>(*pInstance, std::move(vtable))); + key, std::make_shared<InstanceContext>(*pInstance, std::move(vtable))); return VK_SUCCESS; } @@ -102,11 +108,55 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, static VKAPI_ATTR void VKAPI_CALL DestroyInstance(VkInstance instance, const VkAllocationCallbacks* allocator) { + const auto destroy_instance_func = [&]() -> auto { + const auto context = layer_context.get_context(instance); + const auto lock = std::scoped_lock{layer_context.mutex}; + + // Erase our physical devices owned by this instance from the global + // context. + for (const auto& [key, _] : context->phys_devices) { + assert(layer_context.contexts.erase(key)); + } + + const auto key = layer_context.get_key(instance); + assert(layer_context.contexts.erase(key)); + + // Should be the last ptr now like DestroyDevice. + assert(context.unique()); + return context->vtable.DestroyInstance; + }(); + + destroy_instance_func(instance, allocator); +} + +static VKAPI_ATTR VkResult VKAPI_CALL EnumeratePhysicalDevices( + VkInstance instance, std::uint32_t* count, VkPhysicalDevice* devices) { + + const auto context = layer_context.get_context(instance); + + if (const auto result = + context->vtable.EnumeratePhysicalDevices(instance, count, devices); + !devices || !count || result != VK_SUCCESS) { + + return result; + } + const auto lock = std::scoped_lock{layer_context.mutex}; + const auto C = *count; + for (auto i = std::uint32_t{0}; i < C; ++i) { + const auto& device = devices[i]; + + const auto key = layer_context.get_key(device); + const auto [it, inserted] = + layer_context.contexts.try_emplace(key, nullptr); - const auto key = layer_context.get_key(instance); - assert(layer_context.contexts.contains(key)); - layer_context.contexts.erase(key); + if (inserted) { + it->second = + std::make_shared<PhysicalDeviceContext>(*context, device); + } + } + + return VK_SUCCESS; } static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( @@ -114,56 +164,64 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { const auto create_info = get_link_info<VkLayerDeviceCreateInfo>( - pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO); + pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, + VK_LAYER_LINK_INFO); if (!create_info || !create_info->u.pLayerInfo) { return VK_ERROR_INITIALIZATION_FAILED; } + const auto callback_info = get_link_info<VkLayerDeviceCreateInfo>( + pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, + VK_LOADER_DATA_CALLBACK); + if (!callback_info || !callback_info->u.pLayerInfo) { + return VK_ERROR_INITIALIZATION_FAILED; + } + + const auto sdld = callback_info->u.pfnSetDeviceLoaderData; const auto gipa = create_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; const auto gdpa = create_info->u.pLayerInfo->pfnNextGetDeviceProcAddr; - if (!gipa || !gdpa) { + if (!sdld || !gipa || !gdpa) { return VK_ERROR_INITIALIZATION_FAILED; } create_info->u.pLayerInfo = create_info->u.pLayerInfo->pNext; - const auto lock = std::scoped_lock{layer_context.mutex}; - - auto& context = layer_context.get_context<InstanceContext>(physical_device); + const auto physical_device_context = + layer_context.get_context(physical_device); + auto& instance_context = physical_device_context->instance; const auto next_extensions = [&]() -> std::optional<std::vector<const char*>> { - const auto supported_extensions = - [&]() -> std::optional<std::vector<VkExtensionProperties>> { - const auto enumerate_device_extensions = - reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(gipa( - context.instance, "vkEnumerateDeviceExtensionProperties")); - if (!enumerate_device_extensions) { - return std::nullopt; - } + const auto enumerate_device_extensions = + reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>( + gipa(instance_context.instance, + "vkEnumerateDeviceExtensionProperties")); + if (!enumerate_device_extensions) { + return std::nullopt; + } - auto count = std::uint32_t{}; - if (enumerate_device_extensions(physical_device, nullptr, &count, - nullptr) != VK_SUCCESS) { + auto count = std::uint32_t{}; + if (enumerate_device_extensions(physical_device, nullptr, &count, + nullptr) != VK_SUCCESS) { - return std::nullopt; - } + return std::nullopt; + } - auto supported_extensions = - std::vector<VkExtensionProperties>(count); - if (enumerate_device_extensions(physical_device, nullptr, &count, - std::data(supported_extensions)) != - VK_SUCCESS) { + auto supported_extensions = std::vector<VkExtensionProperties>(count); + if (enumerate_device_extensions(physical_device, nullptr, &count, + std::data(supported_extensions)) != + VK_SUCCESS) { - return std::nullopt; - } + return std::nullopt; + } - return supported_extensions; - }(); + auto next_extensions = std::vector<const char*>{}; + if (pCreateInfo->enabledExtensionCount && + pCreateInfo->ppEnabledExtensionNames) { - auto next_extensions = - std::vector{*pCreateInfo->ppEnabledExtensionNames, - std::next(*pCreateInfo->ppEnabledExtensionNames + - pCreateInfo->enabledExtensionCount)}; + std::ranges::copy_n(pCreateInfo->ppEnabledExtensionNames, + pCreateInfo->enabledExtensionCount, + std::back_inserter(next_extensions)); + } const auto wanted_extensions = { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, @@ -180,12 +238,11 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( continue; // Already included, ignore it. } - if (std::ranges::none_of(*supported_extensions, - [&](const auto& supported_extension) { - return !std::strcmp( - supported_extension.extensionName, - wanted); - })) { + if (std::ranges::none_of( + supported_extensions, [&](const auto& supported_extension) { + return !std::strcmp(supported_extension.extensionName, + wanted); + })) { return std::nullopt; // We don't support it, the layer can't // work. @@ -201,8 +258,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( return VK_ERROR_INITIALIZATION_FAILED; } - const auto create_device = reinterpret_cast<PFN_vkCreateDevice>( - gipa(VK_NULL_HANDLE, "vkCreateDevice")); + const auto create_device = instance_context.vtable.CreateDevice; if (!create_device) { return VK_ERROR_INITIALIZATION_FAILED; } @@ -221,164 +277,199 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( return result; } +#define DEVICE_VTABLE_LOAD(name) \ + .name = reinterpret_cast<PFN_vk##name>(gdpa(*pDevice, "vk" #name)) auto vtable = VkuDeviceDispatchTable{ - .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>( - gdpa(*pDevice, "vkGetDeviceProcAddr")), - .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>( - gdpa(*pDevice, "vkDestroyDevice")), - .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>( - gdpa(*pDevice, "vkGetDeviceQueue")), - .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>( - gdpa(*pDevice, "vkQueueSubmit")), - .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>( - gdpa(*pDevice, "vkCreateSemaphore")), - .DestroySemaphore = reinterpret_cast<PFN_vkDestroySemaphore>( - gdpa(*pDevice, "vkDestroySemaphore")), - .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>( - gdpa(*pDevice, "vkCreateQueryPool")), - .DestroyQueryPool = reinterpret_cast<PFN_vkDestroyQueryPool>( - gdpa(*pDevice, "vkDestroyQueryPool")), - .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>( - gdpa(*pDevice, "vkGetQueryPoolResults")), - .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>( - gdpa(*pDevice, "vkCreateCommandPool")), - .DestroyCommandPool = reinterpret_cast<PFN_vkDestroyCommandPool>( - gdpa(*pDevice, "vkDestroyCommandPool")), - .AllocateCommandBuffers = - reinterpret_cast<PFN_vkAllocateCommandBuffers>( - gdpa(*pDevice, "vkAllocateCommandBuffers")), - .FreeCommandBuffers = reinterpret_cast<PFN_vkFreeCommandBuffers>( - gdpa(*pDevice, "vkFreeCommandBuffers")), - .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>( - gdpa(*pDevice, "vkBeginCommandBuffer")), - .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>( - gdpa(*pDevice, "vkEndCommandBuffer")), - .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>( - gdpa(*pDevice, "vkResetCommandBuffer")), - .CmdDraw = reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")), - .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>( - gdpa(*pDevice, "vkCmdDrawIndexed")), - .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>( - gdpa(*pDevice, "vkCmdResetQueryPool")), - .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>( - gdpa(*pDevice, "vkGetDeviceQueue2")), - .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>( - gdpa(*pDevice, "vkQueueSubmit2")), - .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>( - gdpa(*pDevice, "vkQueuePresentKHR")), - .GetSemaphoreCounterValueKHR = - reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>( - gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")), - .CmdWriteTimestamp2KHR = reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>( - gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")), - .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>( - gdpa(*pDevice, "vkQueueSubmit2KHR")), + DEVICE_VTABLE_LOAD(GetDeviceProcAddr), + DEVICE_VTABLE_LOAD(DestroyDevice), + DEVICE_VTABLE_LOAD(GetDeviceQueue), + DEVICE_VTABLE_LOAD(QueueSubmit), + DEVICE_VTABLE_LOAD(CreateSemaphore), + DEVICE_VTABLE_LOAD(DestroySemaphore), + DEVICE_VTABLE_LOAD(CreateQueryPool), + DEVICE_VTABLE_LOAD(DestroyQueryPool), + DEVICE_VTABLE_LOAD(GetQueryPoolResults), + DEVICE_VTABLE_LOAD(CreateCommandPool), + DEVICE_VTABLE_LOAD(DestroyCommandPool), + DEVICE_VTABLE_LOAD(AllocateCommandBuffers), + DEVICE_VTABLE_LOAD(FreeCommandBuffers), + DEVICE_VTABLE_LOAD(BeginCommandBuffer), + DEVICE_VTABLE_LOAD(EndCommandBuffer), + DEVICE_VTABLE_LOAD(ResetCommandBuffer), + DEVICE_VTABLE_LOAD(CmdResetQueryPool), + DEVICE_VTABLE_LOAD(CmdDraw), + DEVICE_VTABLE_LOAD(CmdDrawIndexed), + DEVICE_VTABLE_LOAD(GetDeviceQueue2), + DEVICE_VTABLE_LOAD(QueueSubmit2), + DEVICE_VTABLE_LOAD(AcquireNextImageKHR), + DEVICE_VTABLE_LOAD(QueuePresentKHR), + DEVICE_VTABLE_LOAD(AcquireNextImage2KHR), + DEVICE_VTABLE_LOAD(GetSemaphoreCounterValueKHR), + DEVICE_VTABLE_LOAD(CmdWriteTimestamp2KHR), + DEVICE_VTABLE_LOAD(QueueSubmit2KHR), }; +#undef DEVICE_VTABLE_LOAD const auto key = layer_context.get_key(*pDevice); + const auto lock = std::scoped_lock{layer_context.mutex}; assert(!layer_context.contexts.contains(key)); + layer_context.contexts.try_emplace( - key, - std::make_unique<DeviceContext>(context, *pDevice, std::move(vtable))); + key, std::make_shared<DeviceContext>(instance_context, *pDevice, sdld, + std::move(vtable))); return VK_SUCCESS; } static VKAPI_ATTR void VKAPI_CALL DestroyDevice(VkDevice device, const VkAllocationCallbacks* allocator) { - const auto lock = std::scoped_lock{layer_context.mutex}; - const auto key = layer_context.get_key(device); - assert(layer_context.contexts.contains(key)); - layer_context.contexts.erase(key); + + const auto destroy_device_func = [&]() -> auto { + const auto device_context = layer_context.get_context(device); + + const auto func = device_context->vtable.DestroyDevice; + const auto lock = std::scoped_lock{layer_context.mutex}; + // Remove all owned queues from our global context pool. + for (const auto& [queue, _] : device_context->queues) { + const auto key = layer_context.get_key(queue); + assert(layer_context.contexts.erase(key)); + } + + const auto key = layer_context.get_key(device); + assert(layer_context.contexts.erase(key)); + + // should be the last shared ptr now, so its destructor can be called. + // the destructor should expect its owned queues to be unique as well! + assert(device_context.unique()); + + return func; + }(); + + destroy_device_func(device, allocator); } -// Small amount of duplication, we can't assume gdq2 is available apparently. static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index, std::uint32_t queue_index, VkQueue* queue) { - const auto lock = std::scoped_lock{layer_context.mutex}; - - auto& device_context = layer_context.get_context<DeviceContext>(device); + const auto device_context = layer_context.get_context(device); - device_context.vtable.GetDeviceQueue(device, queue_family_index, - queue_index, queue); + device_context->vtable.GetDeviceQueue(device, queue_family_index, + queue_index, queue); if (!queue || !*queue) { return; } - auto& queue_contexts = device_context.queue_contexts; - if (!queue_contexts.contains(*queue)) { - queue_contexts.try_emplace( - *queue, std::make_unique<QueueContext>(device_context, *queue, - queue_family_index)); + // Look in our layer context, which has everything. If we were able to + // insert a nullptr key, then it didn't already exist so we should + // construct a new one. + const auto key = layer_context.get_key(*queue); + const auto lock = std::scoped_lock{layer_context.mutex}; + const auto [it, inserted] = layer_context.contexts.try_emplace(key); + if (inserted) { + it->second = std::make_shared<QueueContext>(*device_context, *queue, + queue_family_index); } + + // it->second should be QueueContext, also it might already be there + // but this is expected. + const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second); + assert(ptr); + device_context->queues.emplace(*queue, ptr); } +// Identical logic to gdq so some amount of duplication, we can't assume gdq1 is +// available apparently, what do I know? static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2( VkDevice device, const VkDeviceQueueInfo2* info, VkQueue* queue) { - const auto lock = std::scoped_lock{layer_context.mutex}; - auto& device_context = layer_context.get_context<DeviceContext>(device); + const auto device_context = layer_context.get_context(device); - device_context.vtable.GetDeviceQueue2(device, info, queue); + device_context->vtable.GetDeviceQueue2(device, info, queue); if (!queue || !*queue) { return; } - auto& queue_contexts = device_context.queue_contexts; - if (!queue_contexts.contains(*queue)) { - queue_contexts.try_emplace( - *queue, std::make_unique<QueueContext>(device_context, *queue, - info->queueFamilyIndex)); + const auto key = layer_context.get_key(*queue); + const auto lock = std::scoped_lock{layer_context.mutex}; + const auto [it, inserted] = layer_context.contexts.try_emplace(key); + if (inserted) { + it->second = std::make_shared<QueueContext>(*device_context, *queue, + info->queueFamilyIndex); + } + + const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second); + assert(ptr); + device_context->queues.emplace(*queue, ptr); +} + +static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( + VkDevice device, VkSwapchainKHR swapchain, std::uint64_t timeout, + VkSemaphore semaphore, VkFence fence, std::uint32_t* pImageIndex) { + + const auto context = layer_context.get_context(device); + if (const auto result = context->vtable.AcquireNextImageKHR( + device, swapchain, timeout, semaphore, fence, pImageIndex); + result != VK_SUCCESS) { + + return result; + } + + return VK_SUCCESS; +} + +static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR( + VkDevice device, const VkAcquireNextImageInfoKHR* pAcquireInfo, + std::uint32_t* pImageIndex) { + + const auto context = layer_context.get_context(device); + if (const auto result = context->vtable.AcquireNextImage2KHR( + device, pAcquireInfo, pImageIndex); + result != VK_SUCCESS) { + + return result; } + + return VK_SUCCESS; } static VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, const VkSubmitInfo* submit_info, VkFence fence) { - const auto lock = std::scoped_lock{layer_context.mutex}; - - auto& queue_context = layer_context.get_context<QueueContext>(queue); - const auto& vtable = queue_context.device_context.vtable; + const auto& queue_context = layer_context.get_context(queue); + const auto& vtable = queue_context->device_context.vtable; if (!submit_count) { // no-op submit we shouldn't worry about return vtable.QueueSubmit(queue, submit_count, submit_info, fence); } - // Create a new vector of submit infos, copy their existing ones. + // Create a new vector of submit infos. auto next_submit_infos = std::vector<VkSubmitInfo>{}; - next_submit_infos.reserve(submit_count + 2); - auto timestamp_handle = queue_context.timestamp_pool->acquire(); + auto timestamp_handle = queue_context->timestamp_pool->acquire(); timestamp_handle->setup_command_buffers(vtable); const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers; - // The first submit info we use will steal their wait semaphores. - next_submit_infos.push_back(VkSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = submit_info->pNext, - .waitSemaphoreCount = submit_info[0].waitSemaphoreCount, - .pWaitSemaphores = submit_info[0].pWaitSemaphores, - .pWaitDstStageMask = submit_info[0].pWaitDstStageMask, - .commandBufferCount = 1, - .pCommandBuffers = &head_cb, - }); + const auto next_command_buffers = [&]() -> auto { + auto next_command_buffers = std::vector<VkCommandBuffer>{head_cb}; + std::ranges::copy_n(submit_info[0].pCommandBuffers, + submit_info[0].commandBufferCount, + std::back_inserter(next_command_buffers)); + return next_command_buffers; + }(); - // Fill in original submit infos but erase the wait semaphores on the - // first because we stole them earlier. std::ranges::copy_n(submit_info, submit_count, std::back_inserter(next_submit_infos)); - next_submit_infos[1].pWaitSemaphores = nullptr; - next_submit_infos[1].waitSemaphoreCount = 0u; + next_submit_infos[0].pCommandBuffers = std::data(next_command_buffers); + next_submit_infos[0].commandBufferCount = std::size(next_command_buffers); - const auto TODO_next = std::uint64_t{layer_context.current_frame + 1}; + const auto next_signal = queue_context->semaphore_sequence + 1; const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, .signalSemaphoreValueCount = 1, - .pSignalSemaphoreValues = &TODO_next, + .pSignalSemaphoreValues = &next_signal, }; next_submit_infos.push_back(VkSubmitInfo{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, @@ -386,7 +477,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, .commandBufferCount = 1, .pCommandBuffers = &tail_cb, .signalSemaphoreCount = 1, - .pSignalSemaphores = &queue_context.semaphore, + .pSignalSemaphores = &queue_context->semaphore, }); if (const auto res = @@ -397,6 +488,14 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, return res; } + // Hack for now, store timestamp handles. + queue_context->handle_hack.push_front(std::move(timestamp_handle)); + if (std::size(queue_context->handle_hack) > 250) { + queue_context->handle_hack.pop_back(); + } + + ++queue_context->semaphore_sequence; + return VK_SUCCESS; } @@ -405,55 +504,69 @@ static VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, const VkSubmitInfo2* submit_infos, VkFence fence) { - const auto lock = std::scoped_lock{layer_context.mutex}; - auto& queue_context = layer_context.get_context<QueueContext>(queue); - const auto& vtable = queue_context.device_context.vtable; + const auto queue_context = layer_context.get_context(queue); + const auto& vtable = queue_context->device_context.vtable; - if (!submit_count) { // another no-op submit + if (!submit_count) { return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence); } - auto next_submit_infos = std::vector<VkSubmitInfo2>(); - next_submit_infos.reserve(submit_count + 2); - - auto timestamp_handle = queue_context.timestamp_pool->acquire(); + auto timestamp_handle = queue_context->timestamp_pool->acquire(); timestamp_handle->setup_command_buffers(vtable); const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers; - const auto head_cb_info = VkCommandBufferSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, - .commandBuffer = head_cb, - }; - next_submit_infos.push_back(VkSubmitInfo2{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, - .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount, - .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos, - .commandBufferInfoCount = 1, - .pCommandBufferInfos = &head_cb_info, - }); + const auto next_command_buffers = [&]() -> auto { + auto next_command_buffers = std::vector<VkCommandBufferSubmitInfo>{}; + next_command_buffers.push_back(VkCommandBufferSubmitInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = head_cb, + }); + std::ranges::copy_n(submit_infos[0].pCommandBufferInfos, + submit_infos[0].commandBufferInfoCount, + std::back_inserter(next_command_buffers)); + return next_command_buffers; + }(); + + auto next_submit_infos = std::vector<VkSubmitInfo2>(); std::ranges::copy_n(submit_infos, submit_count, std::back_inserter(next_submit_infos)); - next_submit_infos[1].pWaitSemaphoreInfos = nullptr; - next_submit_infos[1].waitSemaphoreInfoCount = 0; - - const auto tail_cb_info = VkCommandBufferSubmitInfo{ + next_submit_infos[0].pCommandBufferInfos = std::data(next_command_buffers); + next_submit_infos[0].commandBufferInfoCount = + std::size(next_command_buffers); + + const auto tail_ssi = VkSemaphoreSubmitInfo{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = queue_context->semaphore, + .value = queue_context->semaphore_sequence + 1, + .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + }; + const auto tail_cbsi = VkCommandBufferSubmitInfo{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, .commandBuffer = tail_cb, }; next_submit_infos.push_back(VkSubmitInfo2{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, - .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount, - .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos, .commandBufferInfoCount = 1, - .pCommandBufferInfos = &tail_cb_info, + .pCommandBufferInfos = &tail_cbsi, + .signalSemaphoreInfoCount = 1, + .pSignalSemaphoreInfos = &tail_ssi, }); if (const auto res = - vtable.QueueSubmit2(queue, submit_count, submit_infos, fence); + vtable.QueueSubmit2(queue, std::size(next_submit_infos), + std::data(next_submit_infos), fence); res != VK_SUCCESS) { return res; } + // hack + queue_context->handle_hack.push_front(std::move(timestamp_handle)); + if (std::size(queue_context->handle_hack) > 250) { + queue_context->handle_hack.pop_back(); + } + + ++queue_context->semaphore_sequence; + return VK_SUCCESS; } @@ -467,9 +580,8 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count, static VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { - const auto lock = std::scoped_lock{layer_context.mutex}; - auto& queue_context = layer_context.get_context<QueueContext>(queue); - const auto& vtable = queue_context.device_context.vtable; + const auto& vtable = + layer_context.get_context(queue)->device_context.vtable; if (const auto res = vtable.QueuePresentKHR(queue, present_info); res != VK_SUCCESS) { @@ -477,69 +589,49 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { return res; } - std::cout << "queuePresentKHR called for queue " << queue << '\n'; - - // Update all of our information about this queue's timestamp pool! - queue_context.timestamp_pool->poll(); - - // While we might be submitting on this queue, let's see what our timeline - // semaphore says we're at. - uint64_t value = 0; - if (const auto res = vtable.GetSemaphoreCounterValueKHR( - queue_context.device_context.device, queue_context.semaphore, - &value); - res != VK_SUCCESS) { - - return res; - } - - std::cout << " frame_index: " << layer_context.current_frame << '\n'; - std::cout << " semaphore: " << value << '\n'; - std::cout << " queue: " << queue << '\n'; - - ++layer_context.current_frame; return VK_SUCCESS; } } // namespace low_latency -static const auto instance_functions = - std::unordered_map<std::string_view, const PFN_vkVoidFunction>{ - {"vkGetInstanceProcAddr", - reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetInstanceProcAddr)}, +using func_map_t = std::unordered_map<std::string_view, PFN_vkVoidFunction>; +#define HOOK_ENTRY(vk_name_literal, fn_sym) \ + {vk_name_literal, reinterpret_cast<PFN_vkVoidFunction>(fn_sym)} +static const auto instance_functions = func_map_t{ + HOOK_ENTRY("vkCreateDevice", low_latency::CreateDevice), - {"vkCreateInstance", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateInstance)}, - {"vkDestroyInstance", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyInstance)}, - }; + HOOK_ENTRY("vkGetInstanceProcAddr", LowLatency_GetInstanceProcAddr), + HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr), -static const auto device_functions = - std::unordered_map<std::string_view, const PFN_vkVoidFunction>{ - {"vkGetDeviceProcAddr", - reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetDeviceProcAddr)}, + HOOK_ENTRY("vkEnumeratePhysicalDevices", + low_latency::EnumeratePhysicalDevices), - {"vkCreateDevice", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateDevice)}, - {"vkDestroyDevice", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyDevice)}, + HOOK_ENTRY("vkCreateInstance", low_latency::CreateInstance), + HOOK_ENTRY("vkDestroyInstance", low_latency::DestroyInstance), +}; +static const auto device_functions = func_map_t{ + HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr), - {"vkGetDeviceQueue", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue)}, - {"vkGetDeviceQueue2", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue2)}, + HOOK_ENTRY("vkDestroyDevice", low_latency::DestroyDevice), - {"vkQueueSubmit", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit)}, - {"vkQueueSubmit2", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit2)}, + HOOK_ENTRY("vkGetDeviceQueue", low_latency::GetDeviceQueue), + HOOK_ENTRY("vkGetDeviceQueue2", low_latency::GetDeviceQueue2), - {"vkQueuePresentKHR", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueuePresentKHR)}, - }; + HOOK_ENTRY("vkQueueSubmit", low_latency::vkQueueSubmit), + HOOK_ENTRY("vkQueueSubmit2", low_latency::vkQueueSubmit2), + + HOOK_ENTRY("vkQueuePresentKHR", low_latency::vkQueuePresentKHR), + + HOOK_ENTRY("vkAcquireNextImageKHR", low_latency::vkAcquireNextImageKHR), + HOOK_ENTRY("vkAcquireNextImage2KHR", low_latency::vkAcquireNextImage2KHR), +}; +#undef HOOK_ENTRY VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) { + if (!pName || !device) { + return nullptr; + } if (const auto it = device_functions.find(pName); it != std::end(device_functions)) { @@ -547,26 +639,20 @@ LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) { return it->second; } - const auto lock = std::scoped_lock{low_latency::layer_context.mutex}; - using namespace low_latency; - const auto& context = layer_context.get_context<DeviceContext>(device); - return context.vtable.GetDeviceProcAddr(device, pName); + const auto& vtable = layer_context.get_context(device)->vtable; + return vtable.GetDeviceProcAddr(device, pName); } VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL LowLatency_GetInstanceProcAddr(VkInstance instance, const char* const pName) { + if (const auto it = instance_functions.find(pName); + it != std::end(instance_functions)) { - for (const auto& functions : {device_functions, instance_functions}) { - - if (const auto it = functions.find(pName); it != std::end(functions)) { - return it->second; - } + return it->second; } - const auto lock = std::scoped_lock{low_latency::layer_context.mutex}; - using namespace low_latency; - const auto& context = layer_context.get_context<InstanceContext>(instance); - return context.vtable.GetInstanceProcAddr(instance, pName); -}
\ No newline at end of file + const auto& vtable = layer_context.get_context(instance)->vtable; + return vtable.GetInstanceProcAddr(instance, pName); +} |
