diff options
| author | Nicolas James <Eele1Ephe7uZahRie@tutanota.com> | 2026-02-11 23:19:15 +1100 |
|---|---|---|
| committer | Nicolas James <Eele1Ephe7uZahRie@tutanota.com> | 2026-02-11 23:19:15 +1100 |
| commit | 76f3ef1d7c2b4393a8e8b402deb924e606448d27 (patch) | |
| tree | a291bec0544f007536a41ec1f590338aee1163e9 /src | |
| parent | 77e2be172718878b38999efc247ce7571435fcc8 (diff) | |
More cleanup, fix lifetime and mutex issues
Diffstat (limited to 'src')
| -rw-r--r-- | src/context.cc | 9 | ||||
| -rw-r--r-- | src/context.hh | 21 | ||||
| -rw-r--r-- | src/device_context.cc | 12 | ||||
| -rw-r--r-- | src/device_context.hh | 15 | ||||
| -rw-r--r-- | src/instance_context.cc | 9 | ||||
| -rw-r--r-- | src/instance_context.hh | 17 | ||||
| -rw-r--r-- | src/latency_controller.hh | 15 | ||||
| -rw-r--r-- | src/layer.cc | 586 | ||||
| -rw-r--r-- | src/layer_context.hh | 72 | ||||
| -rw-r--r-- | src/physical_device_context.cc | 11 | ||||
| -rw-r--r-- | src/physical_device_context.hh | 26 | ||||
| -rw-r--r-- | src/queue_context.cc | 5 | ||||
| -rw-r--r-- | src/queue_context.hh | 16 | ||||
| -rw-r--r-- | src/timestamp_pool.cc | 7 | ||||
| -rw-r--r-- | src/timestamp_pool.hh | 18 |
15 files changed, 503 insertions, 336 deletions
diff --git a/src/context.cc b/src/context.cc new file mode 100644 index 0000000..ff93c36 --- /dev/null +++ b/src/context.cc @@ -0,0 +1,9 @@ +#include "context.hh" + +namespace low_latency { + +Context::Context() {} + +Context::~Context() {} + +} // namespace low_latency
\ No newline at end of file diff --git a/src/context.hh b/src/context.hh new file mode 100644 index 0000000..5972740 --- /dev/null +++ b/src/context.hh @@ -0,0 +1,21 @@ +#ifndef CONTEXT_HH_ +#define CONTEXT_HH_ + +// The purpose of this class is to provide a base class for Context classes. + +namespace low_latency { + +class Context { + +public: + Context(); + Context(const Context& context) = delete; + Context(Context&& context) = delete; + Context operator=(const Context& context) = delete; + Context operator=(Context&& context) = delete; + virtual ~Context(); +}; + +} // namespace low_latency + +#endif
\ No newline at end of file diff --git a/src/device_context.cc b/src/device_context.cc index 4be1872..5f5c1f7 100644 --- a/src/device_context.cc +++ b/src/device_context.cc @@ -7,9 +7,17 @@ namespace low_latency { DeviceContext::DeviceContext(InstanceContext& parent_instance, const VkDevice& device, + const PFN_vkSetDeviceLoaderData& sdld, VkuDeviceDispatchTable&& vtable) - : instance(parent_instance), device(device), vtable(std::move(vtable)) + : instance(parent_instance), device(device), sdld(sdld), + vtable(std::move(vtable)) {} -{} +DeviceContext::~DeviceContext() { + // We will let the destructor handle clearing here, but they should be + // unique by now (ie, removed from the layer's context map). + for (const auto& [queue, queue_context] : this->queues) { + assert(queue_context.unique()); + } +} } // namespace low_latency
\ No newline at end of file diff --git a/src/device_context.hh b/src/device_context.hh index a936d6d..3406da1 100644 --- a/src/device_context.hh +++ b/src/device_context.hh @@ -5,29 +5,32 @@ #include <unordered_map> #include <vulkan/utility/vk_dispatch_table.h> +#include <vulkan/vk_layer.h> #include <vulkan/vulkan.hpp> +#include "context.hh" #include "instance_context.hh" namespace low_latency { class QueueContext; -struct DeviceContext { +struct DeviceContext final : public Context { InstanceContext& instance; const VkDevice device; const VkuDeviceDispatchTable vtable; - std::unordered_map<VkQueue, std::unique_ptr<QueueContext>> queue_contexts; + // Do we need to use this unless we wrap dispatchable objects? + const PFN_vkSetDeviceLoaderData sdld; + + std::unordered_map<VkQueue, std::shared_ptr<QueueContext>> queues; public: DeviceContext(InstanceContext& parent_instance, const VkDevice& device, + const PFN_vkSetDeviceLoaderData& sdld, VkuDeviceDispatchTable&& vtable); - DeviceContext(const DeviceContext&) = delete; - DeviceContext(DeviceContext&&) = delete; - DeviceContext operator==(const DeviceContext&) = delete; - DeviceContext operator==(DeviceContext&&) = delete; + virtual ~DeviceContext(); }; }; // namespace low_latency diff --git a/src/instance_context.cc b/src/instance_context.cc index 36d2c66..d12766f 100644 --- a/src/instance_context.cc +++ b/src/instance_context.cc @@ -1,5 +1,6 @@ #include "instance_context.hh" +#include <cassert> #include <utility> namespace low_latency { @@ -8,6 +9,12 @@ InstanceContext::InstanceContext(const VkInstance& instance, VkuInstanceDispatchTable&& vtable) : instance(instance), vtable(std::move(vtable)) {} -InstanceContext::~InstanceContext() {} +InstanceContext::~InstanceContext() { + // Similar to devices, we should own the only shared ptr at this point so + // they destruct now. + for (const auto& [device, device_context] : this->phys_devices) { + assert(device_context.unique()); + } +} } // namespace low_latency
\ No newline at end of file diff --git a/src/instance_context.hh b/src/instance_context.hh index 0a0b999..3b71a82 100644 --- a/src/instance_context.hh +++ b/src/instance_context.hh @@ -3,21 +3,26 @@ #include <vulkan/utility/vk_dispatch_table.h> +#include <memory> +#include <unordered_map> + +#include "context.hh" + namespace low_latency { -struct InstanceContext { +class PhysicalDeviceContext; + +struct InstanceContext final : public Context { const VkInstance instance; const VkuInstanceDispatchTable vtable; + std::unordered_map<void*, std::shared_ptr<PhysicalDeviceContext>> phys_devices; + public: InstanceContext(const VkInstance& instance, VkuInstanceDispatchTable&& vtable); - InstanceContext(const InstanceContext&) = delete; - InstanceContext(InstanceContext&&) = delete; - InstanceContext operator==(const InstanceContext&) = delete; - InstanceContext operator==(InstanceContext&&) = delete; - ~InstanceContext(); + virtual ~InstanceContext(); }; }; // namespace low_latency diff --git a/src/latency_controller.hh b/src/latency_controller.hh deleted file mode 100644 index 6672d5a..0000000 --- a/src/latency_controller.hh +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef LATENCY_CONTROLLER_HH_ -#define LATENCY_CONTROLLER_HH_ - -// The purpose of this file is to provide - -namespace low_latency { - -class LatencyController final { - - -}; - -}; - -#endif
\ No newline at end of file diff --git a/src/layer.cc b/src/layer.cc index 5e652f0..cead7cd 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -5,6 +5,9 @@ #include <unordered_map> #include <utility> +// hack +#include <deque> + #include <vulkan/utility/vk_dispatch_table.h> #include <vulkan/vk_layer.h> #include <vulkan/vk_platform.h> @@ -16,6 +19,7 @@ #include "instance_context.hh" #include "layer_context.hh" #include "queue_context.hh" +#include "timestamp_pool.hh" namespace low_latency { @@ -25,8 +29,9 @@ LayerContext layer_context; } // namespace -template <typename T, typename sType> -static T* get_link_info(const void* const head, const sType& stype) { +template <typename T, typename sType, typename fType> +static T* get_link_info(const void* const head, const sType& stype, + const fType& ftype) { for (auto i = reinterpret_cast<const VkBaseInStructure*>(head); i; i = i->pNext) { @@ -35,7 +40,7 @@ static T* get_link_info(const void* const head, const sType& stype) { } const auto info = reinterpret_cast<const T*>(i); - if (info->function != VK_LAYER_LINK_INFO) { + if (info->function != ftype) { continue; } @@ -49,7 +54,8 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) { const auto link_info = get_link_info<VkLayerInstanceCreateInfo>( - pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO); + pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO, + VK_LAYER_LINK_INFO); if (!link_info || !link_info->u.pLayerInfo) { return VK_ERROR_INITIALIZATION_FAILED; @@ -78,23 +84,23 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, } const auto key = layer_context.get_key(*pInstance); + +#define INSTANCE_VTABLE_LOAD(name) \ + .name = reinterpret_cast<PFN_vk##name>(gipa(*pInstance, "vk" #name)) auto vtable = VkuInstanceDispatchTable{ - .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( - gipa(*pInstance, "vkDestroyInstance")), - .EnumeratePhysicalDevices = - reinterpret_cast<PFN_vkEnumeratePhysicalDevices>( - gipa(*pInstance, "vkEnumeratePhysicalDevices")), - .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>( - gipa(*pInstance, "vkGetInstanceProcAddr")), - .EnumerateDeviceExtensionProperties = - reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>( - gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")), + INSTANCE_VTABLE_LOAD(DestroyInstance), + INSTANCE_VTABLE_LOAD(EnumeratePhysicalDevices), + INSTANCE_VTABLE_LOAD(GetInstanceProcAddr), + INSTANCE_VTABLE_LOAD(CreateDevice), + INSTANCE_VTABLE_LOAD(EnumerateDeviceExtensionProperties), }; +#undef INSTANCE_VTABLE_LOAD const auto lock = std::scoped_lock{layer_context.mutex}; assert(!layer_context.contexts.contains(key)); + layer_context.contexts.try_emplace( - key, std::make_unique<InstanceContext>(*pInstance, std::move(vtable))); + key, std::make_shared<InstanceContext>(*pInstance, std::move(vtable))); return VK_SUCCESS; } @@ -102,11 +108,55 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, static VKAPI_ATTR void VKAPI_CALL DestroyInstance(VkInstance instance, const VkAllocationCallbacks* allocator) { + const auto destroy_instance_func = [&]() -> auto { + const auto context = layer_context.get_context(instance); + const auto lock = std::scoped_lock{layer_context.mutex}; + + // Erase our physical devices owned by this instance from the global + // context. + for (const auto& [key, _] : context->phys_devices) { + assert(layer_context.contexts.erase(key)); + } + + const auto key = layer_context.get_key(instance); + assert(layer_context.contexts.erase(key)); + + // Should be the last ptr now like DestroyDevice. + assert(context.unique()); + return context->vtable.DestroyInstance; + }(); + + destroy_instance_func(instance, allocator); +} + +static VKAPI_ATTR VkResult VKAPI_CALL EnumeratePhysicalDevices( + VkInstance instance, std::uint32_t* count, VkPhysicalDevice* devices) { + + const auto context = layer_context.get_context(instance); + + if (const auto result = + context->vtable.EnumeratePhysicalDevices(instance, count, devices); + !devices || !count || result != VK_SUCCESS) { + + return result; + } + const auto lock = std::scoped_lock{layer_context.mutex}; + const auto C = *count; + for (auto i = std::uint32_t{0}; i < C; ++i) { + const auto& device = devices[i]; + + const auto key = layer_context.get_key(device); + const auto [it, inserted] = + layer_context.contexts.try_emplace(key, nullptr); - const auto key = layer_context.get_key(instance); - assert(layer_context.contexts.contains(key)); - layer_context.contexts.erase(key); + if (inserted) { + it->second = + std::make_shared<PhysicalDeviceContext>(*context, device); + } + } + + return VK_SUCCESS; } static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( @@ -114,56 +164,64 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { const auto create_info = get_link_info<VkLayerDeviceCreateInfo>( - pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO); + pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, + VK_LAYER_LINK_INFO); if (!create_info || !create_info->u.pLayerInfo) { return VK_ERROR_INITIALIZATION_FAILED; } + const auto callback_info = get_link_info<VkLayerDeviceCreateInfo>( + pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, + VK_LOADER_DATA_CALLBACK); + if (!callback_info || !callback_info->u.pLayerInfo) { + return VK_ERROR_INITIALIZATION_FAILED; + } + + const auto sdld = callback_info->u.pfnSetDeviceLoaderData; const auto gipa = create_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; const auto gdpa = create_info->u.pLayerInfo->pfnNextGetDeviceProcAddr; - if (!gipa || !gdpa) { + if (!sdld || !gipa || !gdpa) { return VK_ERROR_INITIALIZATION_FAILED; } create_info->u.pLayerInfo = create_info->u.pLayerInfo->pNext; - const auto lock = std::scoped_lock{layer_context.mutex}; - - auto& context = layer_context.get_context<InstanceContext>(physical_device); + const auto physical_device_context = + layer_context.get_context(physical_device); + auto& instance_context = physical_device_context->instance; const auto next_extensions = [&]() -> std::optional<std::vector<const char*>> { - const auto supported_extensions = - [&]() -> std::optional<std::vector<VkExtensionProperties>> { - const auto enumerate_device_extensions = - reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(gipa( - context.instance, "vkEnumerateDeviceExtensionProperties")); - if (!enumerate_device_extensions) { - return std::nullopt; - } + const auto enumerate_device_extensions = + reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>( + gipa(instance_context.instance, + "vkEnumerateDeviceExtensionProperties")); + if (!enumerate_device_extensions) { + return std::nullopt; + } - auto count = std::uint32_t{}; - if (enumerate_device_extensions(physical_device, nullptr, &count, - nullptr) != VK_SUCCESS) { + auto count = std::uint32_t{}; + if (enumerate_device_extensions(physical_device, nullptr, &count, + nullptr) != VK_SUCCESS) { - return std::nullopt; - } + return std::nullopt; + } - auto supported_extensions = - std::vector<VkExtensionProperties>(count); - if (enumerate_device_extensions(physical_device, nullptr, &count, - std::data(supported_extensions)) != - VK_SUCCESS) { + auto supported_extensions = std::vector<VkExtensionProperties>(count); + if (enumerate_device_extensions(physical_device, nullptr, &count, + std::data(supported_extensions)) != + VK_SUCCESS) { - return std::nullopt; - } + return std::nullopt; + } - return supported_extensions; - }(); + auto next_extensions = std::vector<const char*>{}; + if (pCreateInfo->enabledExtensionCount && + pCreateInfo->ppEnabledExtensionNames) { - auto next_extensions = - std::vector{*pCreateInfo->ppEnabledExtensionNames, - std::next(*pCreateInfo->ppEnabledExtensionNames + - pCreateInfo->enabledExtensionCount)}; + std::ranges::copy_n(pCreateInfo->ppEnabledExtensionNames, + pCreateInfo->enabledExtensionCount, + std::back_inserter(next_extensions)); + } const auto wanted_extensions = { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, @@ -180,12 +238,11 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( continue; // Already included, ignore it. } - if (std::ranges::none_of(*supported_extensions, - [&](const auto& supported_extension) { - return !std::strcmp( - supported_extension.extensionName, - wanted); - })) { + if (std::ranges::none_of( + supported_extensions, [&](const auto& supported_extension) { + return !std::strcmp(supported_extension.extensionName, + wanted); + })) { return std::nullopt; // We don't support it, the layer can't // work. @@ -201,8 +258,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( return VK_ERROR_INITIALIZATION_FAILED; } - const auto create_device = reinterpret_cast<PFN_vkCreateDevice>( - gipa(VK_NULL_HANDLE, "vkCreateDevice")); + const auto create_device = instance_context.vtable.CreateDevice; if (!create_device) { return VK_ERROR_INITIALIZATION_FAILED; } @@ -221,164 +277,199 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( return result; } +#define DEVICE_VTABLE_LOAD(name) \ + .name = reinterpret_cast<PFN_vk##name>(gdpa(*pDevice, "vk" #name)) auto vtable = VkuDeviceDispatchTable{ - .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>( - gdpa(*pDevice, "vkGetDeviceProcAddr")), - .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>( - gdpa(*pDevice, "vkDestroyDevice")), - .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>( - gdpa(*pDevice, "vkGetDeviceQueue")), - .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>( - gdpa(*pDevice, "vkQueueSubmit")), - .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>( - gdpa(*pDevice, "vkCreateSemaphore")), - .DestroySemaphore = reinterpret_cast<PFN_vkDestroySemaphore>( - gdpa(*pDevice, "vkDestroySemaphore")), - .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>( - gdpa(*pDevice, "vkCreateQueryPool")), - .DestroyQueryPool = reinterpret_cast<PFN_vkDestroyQueryPool>( - gdpa(*pDevice, "vkDestroyQueryPool")), - .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>( - gdpa(*pDevice, "vkGetQueryPoolResults")), - .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>( - gdpa(*pDevice, "vkCreateCommandPool")), - .DestroyCommandPool = reinterpret_cast<PFN_vkDestroyCommandPool>( - gdpa(*pDevice, "vkDestroyCommandPool")), - .AllocateCommandBuffers = - reinterpret_cast<PFN_vkAllocateCommandBuffers>( - gdpa(*pDevice, "vkAllocateCommandBuffers")), - .FreeCommandBuffers = reinterpret_cast<PFN_vkFreeCommandBuffers>( - gdpa(*pDevice, "vkFreeCommandBuffers")), - .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>( - gdpa(*pDevice, "vkBeginCommandBuffer")), - .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>( - gdpa(*pDevice, "vkEndCommandBuffer")), - .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>( - gdpa(*pDevice, "vkResetCommandBuffer")), - .CmdDraw = reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")), - .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>( - gdpa(*pDevice, "vkCmdDrawIndexed")), - .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>( - gdpa(*pDevice, "vkCmdResetQueryPool")), - .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>( - gdpa(*pDevice, "vkGetDeviceQueue2")), - .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>( - gdpa(*pDevice, "vkQueueSubmit2")), - .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>( - gdpa(*pDevice, "vkQueuePresentKHR")), - .GetSemaphoreCounterValueKHR = - reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>( - gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")), - .CmdWriteTimestamp2KHR = reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>( - gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")), - .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>( - gdpa(*pDevice, "vkQueueSubmit2KHR")), + DEVICE_VTABLE_LOAD(GetDeviceProcAddr), + DEVICE_VTABLE_LOAD(DestroyDevice), + DEVICE_VTABLE_LOAD(GetDeviceQueue), + DEVICE_VTABLE_LOAD(QueueSubmit), + DEVICE_VTABLE_LOAD(CreateSemaphore), + DEVICE_VTABLE_LOAD(DestroySemaphore), + DEVICE_VTABLE_LOAD(CreateQueryPool), + DEVICE_VTABLE_LOAD(DestroyQueryPool), + DEVICE_VTABLE_LOAD(GetQueryPoolResults), + DEVICE_VTABLE_LOAD(CreateCommandPool), + DEVICE_VTABLE_LOAD(DestroyCommandPool), + DEVICE_VTABLE_LOAD(AllocateCommandBuffers), + DEVICE_VTABLE_LOAD(FreeCommandBuffers), + DEVICE_VTABLE_LOAD(BeginCommandBuffer), + DEVICE_VTABLE_LOAD(EndCommandBuffer), + DEVICE_VTABLE_LOAD(ResetCommandBuffer), + DEVICE_VTABLE_LOAD(CmdResetQueryPool), + DEVICE_VTABLE_LOAD(CmdDraw), + DEVICE_VTABLE_LOAD(CmdDrawIndexed), + DEVICE_VTABLE_LOAD(GetDeviceQueue2), + DEVICE_VTABLE_LOAD(QueueSubmit2), + DEVICE_VTABLE_LOAD(AcquireNextImageKHR), + DEVICE_VTABLE_LOAD(QueuePresentKHR), + DEVICE_VTABLE_LOAD(AcquireNextImage2KHR), + DEVICE_VTABLE_LOAD(GetSemaphoreCounterValueKHR), + DEVICE_VTABLE_LOAD(CmdWriteTimestamp2KHR), + DEVICE_VTABLE_LOAD(QueueSubmit2KHR), }; +#undef DEVICE_VTABLE_LOAD const auto key = layer_context.get_key(*pDevice); + const auto lock = std::scoped_lock{layer_context.mutex}; assert(!layer_context.contexts.contains(key)); + layer_context.contexts.try_emplace( - key, - std::make_unique<DeviceContext>(context, *pDevice, std::move(vtable))); + key, std::make_shared<DeviceContext>(instance_context, *pDevice, sdld, + std::move(vtable))); return VK_SUCCESS; } static VKAPI_ATTR void VKAPI_CALL DestroyDevice(VkDevice device, const VkAllocationCallbacks* allocator) { - const auto lock = std::scoped_lock{layer_context.mutex}; - const auto key = layer_context.get_key(device); - assert(layer_context.contexts.contains(key)); - layer_context.contexts.erase(key); + + const auto destroy_device_func = [&]() -> auto { + const auto device_context = layer_context.get_context(device); + + const auto func = device_context->vtable.DestroyDevice; + const auto lock = std::scoped_lock{layer_context.mutex}; + // Remove all owned queues from our global context pool. + for (const auto& [queue, _] : device_context->queues) { + const auto key = layer_context.get_key(queue); + assert(layer_context.contexts.erase(key)); + } + + const auto key = layer_context.get_key(device); + assert(layer_context.contexts.erase(key)); + + // should be the last shared ptr now, so its destructor can be called. + // the destructor should expect its owned queues to be unique as well! + assert(device_context.unique()); + + return func; + }(); + + destroy_device_func(device, allocator); } -// Small amount of duplication, we can't assume gdq2 is available apparently. static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index, std::uint32_t queue_index, VkQueue* queue) { - const auto lock = std::scoped_lock{layer_context.mutex}; - - auto& device_context = layer_context.get_context<DeviceContext>(device); + const auto device_context = layer_context.get_context(device); - device_context.vtable.GetDeviceQueue(device, queue_family_index, - queue_index, queue); + device_context->vtable.GetDeviceQueue(device, queue_family_index, + queue_index, queue); if (!queue || !*queue) { return; } - auto& queue_contexts = device_context.queue_contexts; - if (!queue_contexts.contains(*queue)) { - queue_contexts.try_emplace( - *queue, std::make_unique<QueueContext>(device_context, *queue, - queue_family_index)); + // Look in our layer context, which has everything. If we were able to + // insert a nullptr key, then it didn't already exist so we should + // construct a new one. + const auto key = layer_context.get_key(*queue); + const auto lock = std::scoped_lock{layer_context.mutex}; + const auto [it, inserted] = layer_context.contexts.try_emplace(key); + if (inserted) { + it->second = std::make_shared<QueueContext>(*device_context, *queue, + queue_family_index); } + + // it->second should be QueueContext, also it might already be there + // but this is expected. + const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second); + assert(ptr); + device_context->queues.emplace(*queue, ptr); } +// Identical logic to gdq so some amount of duplication, we can't assume gdq1 is +// available apparently, what do I know? static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2( VkDevice device, const VkDeviceQueueInfo2* info, VkQueue* queue) { - const auto lock = std::scoped_lock{layer_context.mutex}; - auto& device_context = layer_context.get_context<DeviceContext>(device); + const auto device_context = layer_context.get_context(device); - device_context.vtable.GetDeviceQueue2(device, info, queue); + device_context->vtable.GetDeviceQueue2(device, info, queue); if (!queue || !*queue) { return; } - auto& queue_contexts = device_context.queue_contexts; - if (!queue_contexts.contains(*queue)) { - queue_contexts.try_emplace( - *queue, std::make_unique<QueueContext>(device_context, *queue, - info->queueFamilyIndex)); + const auto key = layer_context.get_key(*queue); + const auto lock = std::scoped_lock{layer_context.mutex}; + const auto [it, inserted] = layer_context.contexts.try_emplace(key); + if (inserted) { + it->second = std::make_shared<QueueContext>(*device_context, *queue, + info->queueFamilyIndex); + } + + const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second); + assert(ptr); + device_context->queues.emplace(*queue, ptr); +} + +static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( + VkDevice device, VkSwapchainKHR swapchain, std::uint64_t timeout, + VkSemaphore semaphore, VkFence fence, std::uint32_t* pImageIndex) { + + const auto context = layer_context.get_context(device); + if (const auto result = context->vtable.AcquireNextImageKHR( + device, swapchain, timeout, semaphore, fence, pImageIndex); + result != VK_SUCCESS) { + + return result; + } + + return VK_SUCCESS; +} + +static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR( + VkDevice device, const VkAcquireNextImageInfoKHR* pAcquireInfo, + std::uint32_t* pImageIndex) { + + const auto context = layer_context.get_context(device); + if (const auto result = context->vtable.AcquireNextImage2KHR( + device, pAcquireInfo, pImageIndex); + result != VK_SUCCESS) { + + return result; } + + return VK_SUCCESS; } static VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, const VkSubmitInfo* submit_info, VkFence fence) { - const auto lock = std::scoped_lock{layer_context.mutex}; - - auto& queue_context = layer_context.get_context<QueueContext>(queue); - const auto& vtable = queue_context.device_context.vtable; + const auto& queue_context = layer_context.get_context(queue); + const auto& vtable = queue_context->device_context.vtable; if (!submit_count) { // no-op submit we shouldn't worry about return vtable.QueueSubmit(queue, submit_count, submit_info, fence); } - // Create a new vector of submit infos, copy their existing ones. + // Create a new vector of submit infos. auto next_submit_infos = std::vector<VkSubmitInfo>{}; - next_submit_infos.reserve(submit_count + 2); - auto timestamp_handle = queue_context.timestamp_pool->acquire(); + auto timestamp_handle = queue_context->timestamp_pool->acquire(); timestamp_handle->setup_command_buffers(vtable); const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers; - // The first submit info we use will steal their wait semaphores. - next_submit_infos.push_back(VkSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = submit_info->pNext, - .waitSemaphoreCount = submit_info[0].waitSemaphoreCount, - .pWaitSemaphores = submit_info[0].pWaitSemaphores, - .pWaitDstStageMask = submit_info[0].pWaitDstStageMask, - .commandBufferCount = 1, - .pCommandBuffers = &head_cb, - }); + const auto next_command_buffers = [&]() -> auto { + auto next_command_buffers = std::vector<VkCommandBuffer>{head_cb}; + std::ranges::copy_n(submit_info[0].pCommandBuffers, + submit_info[0].commandBufferCount, + std::back_inserter(next_command_buffers)); + return next_command_buffers; + }(); - // Fill in original submit infos but erase the wait semaphores on the - // first because we stole them earlier. std::ranges::copy_n(submit_info, submit_count, std::back_inserter(next_submit_infos)); - next_submit_infos[1].pWaitSemaphores = nullptr; - next_submit_infos[1].waitSemaphoreCount = 0u; + next_submit_infos[0].pCommandBuffers = std::data(next_command_buffers); + next_submit_infos[0].commandBufferCount = std::size(next_command_buffers); - const auto TODO_next = std::uint64_t{layer_context.current_frame + 1}; + const auto next_signal = queue_context->semaphore_sequence + 1; const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, .signalSemaphoreValueCount = 1, - .pSignalSemaphoreValues = &TODO_next, + .pSignalSemaphoreValues = &next_signal, }; next_submit_infos.push_back(VkSubmitInfo{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, @@ -386,7 +477,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, .commandBufferCount = 1, .pCommandBuffers = &tail_cb, .signalSemaphoreCount = 1, - .pSignalSemaphores = &queue_context.semaphore, + .pSignalSemaphores = &queue_context->semaphore, }); if (const auto res = @@ -397,6 +488,14 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, return res; } + // Hack for now, store timestamp handles. + queue_context->handle_hack.push_front(std::move(timestamp_handle)); + if (std::size(queue_context->handle_hack) > 250) { + queue_context->handle_hack.pop_back(); + } + + ++queue_context->semaphore_sequence; + return VK_SUCCESS; } @@ -405,55 +504,69 @@ static VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, const VkSubmitInfo2* submit_infos, VkFence fence) { - const auto lock = std::scoped_lock{layer_context.mutex}; - auto& queue_context = layer_context.get_context<QueueContext>(queue); - const auto& vtable = queue_context.device_context.vtable; + const auto queue_context = layer_context.get_context(queue); + const auto& vtable = queue_context->device_context.vtable; - if (!submit_count) { // another no-op submit + if (!submit_count) { return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence); } - auto next_submit_infos = std::vector<VkSubmitInfo2>(); - next_submit_infos.reserve(submit_count + 2); - - auto timestamp_handle = queue_context.timestamp_pool->acquire(); + auto timestamp_handle = queue_context->timestamp_pool->acquire(); timestamp_handle->setup_command_buffers(vtable); const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers; - const auto head_cb_info = VkCommandBufferSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, - .commandBuffer = head_cb, - }; - next_submit_infos.push_back(VkSubmitInfo2{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, - .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount, - .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos, - .commandBufferInfoCount = 1, - .pCommandBufferInfos = &head_cb_info, - }); + const auto next_command_buffers = [&]() -> auto { + auto next_command_buffers = std::vector<VkCommandBufferSubmitInfo>{}; + next_command_buffers.push_back(VkCommandBufferSubmitInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = head_cb, + }); + std::ranges::copy_n(submit_infos[0].pCommandBufferInfos, + submit_infos[0].commandBufferInfoCount, + std::back_inserter(next_command_buffers)); + return next_command_buffers; + }(); + + auto next_submit_infos = std::vector<VkSubmitInfo2>(); std::ranges::copy_n(submit_infos, submit_count, std::back_inserter(next_submit_infos)); - next_submit_infos[1].pWaitSemaphoreInfos = nullptr; - next_submit_infos[1].waitSemaphoreInfoCount = 0; - - const auto tail_cb_info = VkCommandBufferSubmitInfo{ + next_submit_infos[0].pCommandBufferInfos = std::data(next_command_buffers); + next_submit_infos[0].commandBufferInfoCount = + std::size(next_command_buffers); + + const auto tail_ssi = VkSemaphoreSubmitInfo{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = queue_context->semaphore, + .value = queue_context->semaphore_sequence + 1, + .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + }; + const auto tail_cbsi = VkCommandBufferSubmitInfo{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, .commandBuffer = tail_cb, }; next_submit_infos.push_back(VkSubmitInfo2{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, - .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount, - .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos, .commandBufferInfoCount = 1, - .pCommandBufferInfos = &tail_cb_info, + .pCommandBufferInfos = &tail_cbsi, + .signalSemaphoreInfoCount = 1, + .pSignalSemaphoreInfos = &tail_ssi, }); if (const auto res = - vtable.QueueSubmit2(queue, submit_count, submit_infos, fence); + vtable.QueueSubmit2(queue, std::size(next_submit_infos), + std::data(next_submit_infos), fence); res != VK_SUCCESS) { return res; } + // hack + queue_context->handle_hack.push_front(std::move(timestamp_handle)); + if (std::size(queue_context->handle_hack) > 250) { + queue_context->handle_hack.pop_back(); + } + + ++queue_context->semaphore_sequence; + return VK_SUCCESS; } @@ -467,9 +580,8 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count, static VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { - const auto lock = std::scoped_lock{layer_context.mutex}; - auto& queue_context = layer_context.get_context<QueueContext>(queue); - const auto& vtable = queue_context.device_context.vtable; + const auto& vtable = + layer_context.get_context(queue)->device_context.vtable; if (const auto res = vtable.QueuePresentKHR(queue, present_info); res != VK_SUCCESS) { @@ -477,69 +589,49 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { return res; } - std::cout << "queuePresentKHR called for queue " << queue << '\n'; - - // Update all of our information about this queue's timestamp pool! - queue_context.timestamp_pool->poll(); - - // While we might be submitting on this queue, let's see what our timeline - // semaphore says we're at. - uint64_t value = 0; - if (const auto res = vtable.GetSemaphoreCounterValueKHR( - queue_context.device_context.device, queue_context.semaphore, - &value); - res != VK_SUCCESS) { - - return res; - } - - std::cout << " frame_index: " << layer_context.current_frame << '\n'; - std::cout << " semaphore: " << value << '\n'; - std::cout << " queue: " << queue << '\n'; - - ++layer_context.current_frame; return VK_SUCCESS; } } // namespace low_latency -static const auto instance_functions = - std::unordered_map<std::string_view, const PFN_vkVoidFunction>{ - {"vkGetInstanceProcAddr", - reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetInstanceProcAddr)}, +using func_map_t = std::unordered_map<std::string_view, PFN_vkVoidFunction>; +#define HOOK_ENTRY(vk_name_literal, fn_sym) \ + {vk_name_literal, reinterpret_cast<PFN_vkVoidFunction>(fn_sym)} +static const auto instance_functions = func_map_t{ + HOOK_ENTRY("vkCreateDevice", low_latency::CreateDevice), - {"vkCreateInstance", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateInstance)}, - {"vkDestroyInstance", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyInstance)}, - }; + HOOK_ENTRY("vkGetInstanceProcAddr", LowLatency_GetInstanceProcAddr), + HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr), -static const auto device_functions = - std::unordered_map<std::string_view, const PFN_vkVoidFunction>{ - {"vkGetDeviceProcAddr", - reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetDeviceProcAddr)}, + HOOK_ENTRY("vkEnumeratePhysicalDevices", + low_latency::EnumeratePhysicalDevices), - {"vkCreateDevice", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateDevice)}, - {"vkDestroyDevice", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyDevice)}, + HOOK_ENTRY("vkCreateInstance", low_latency::CreateInstance), + HOOK_ENTRY("vkDestroyInstance", low_latency::DestroyInstance), +}; +static const auto device_functions = func_map_t{ + HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr), - {"vkGetDeviceQueue", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue)}, - {"vkGetDeviceQueue2", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue2)}, + HOOK_ENTRY("vkDestroyDevice", low_latency::DestroyDevice), - {"vkQueueSubmit", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit)}, - {"vkQueueSubmit2", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit2)}, + HOOK_ENTRY("vkGetDeviceQueue", low_latency::GetDeviceQueue), + HOOK_ENTRY("vkGetDeviceQueue2", low_latency::GetDeviceQueue2), - {"vkQueuePresentKHR", - reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueuePresentKHR)}, - }; + HOOK_ENTRY("vkQueueSubmit", low_latency::vkQueueSubmit), + HOOK_ENTRY("vkQueueSubmit2", low_latency::vkQueueSubmit2), + + HOOK_ENTRY("vkQueuePresentKHR", low_latency::vkQueuePresentKHR), + + HOOK_ENTRY("vkAcquireNextImageKHR", low_latency::vkAcquireNextImageKHR), + HOOK_ENTRY("vkAcquireNextImage2KHR", low_latency::vkAcquireNextImage2KHR), +}; +#undef HOOK_ENTRY VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) { + if (!pName || !device) { + return nullptr; + } if (const auto it = device_functions.find(pName); it != std::end(device_functions)) { @@ -547,26 +639,20 @@ LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) { return it->second; } - const auto lock = std::scoped_lock{low_latency::layer_context.mutex}; - using namespace low_latency; - const auto& context = layer_context.get_context<DeviceContext>(device); - return context.vtable.GetDeviceProcAddr(device, pName); + const auto& vtable = layer_context.get_context(device)->vtable; + return vtable.GetDeviceProcAddr(device, pName); } VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL LowLatency_GetInstanceProcAddr(VkInstance instance, const char* const pName) { + if (const auto it = instance_functions.find(pName); + it != std::end(instance_functions)) { - for (const auto& functions : {device_functions, instance_functions}) { - - if (const auto it = functions.find(pName); it != std::end(functions)) { - return it->second; - } + return it->second; } - const auto lock = std::scoped_lock{low_latency::layer_context.mutex}; - using namespace low_latency; - const auto& context = layer_context.get_context<InstanceContext>(instance); - return context.vtable.GetInstanceProcAddr(instance, pName); -}
\ No newline at end of file + const auto& vtable = layer_context.get_context(instance)->vtable; + return vtable.GetInstanceProcAddr(instance, pName); +} diff --git a/src/layer_context.hh b/src/layer_context.hh index 228efa3..59861a7 100644 --- a/src/layer_context.hh +++ b/src/layer_context.hh @@ -2,10 +2,13 @@ #define LAYER_CONTEXT_HH_ #include <mutex> -#include <variant> +#include <unordered_map> +#include <vulkan/vulkan_core.h> +#include "context.hh" #include "device_context.hh" #include "instance_context.hh" +#include "physical_device_context.hh" #include "queue_context.hh" // The purpose of this file is to provide a definition for the highest level @@ -19,63 +22,58 @@ namespace low_latency { +// All these templates do is make it so we can go from some DispatchableType +// to their respective context's with nice syntax. + template <typename T> concept DispatchableType = std::same_as<std::remove_cvref_t<T>, VkInstance> || - std::same_as<std::remove_cvref_t<T>, VkDevice> || std::same_as<std::remove_cvref_t<T>, VkPhysicalDevice> || + std::same_as<std::remove_cvref_t<T>, VkDevice> || std::same_as<std::remove_cvref_t<T>, VkQueue>; -struct LayerContext { - public: - using ContextVariant = std::variant<std::unique_ptr<DeviceContext>, - std::unique_ptr<InstanceContext>>; +template <class D> struct context_for_t; +template <> struct context_for_t<VkInstance> { + using context = InstanceContext; +}; +template <> struct context_for_t<VkPhysicalDevice> { + using context = PhysicalDeviceContext; +}; +template <> struct context_for_t<VkDevice> { + using context = DeviceContext; +}; +template <> struct context_for_t<VkQueue> { + using context = QueueContext; +}; +template <DispatchableType D> +using dispatch_context_t = typename context_for_t<D>::context; +struct LayerContext final : public Context { public: std::mutex mutex; - std::unordered_map<void*, ContextVariant> contexts; - std::uint64_t current_frame = 0; + std::unordered_map<void*, std::shared_ptr<Context>> contexts; public: LayerContext(); - LayerContext(const LayerContext&) = delete; - LayerContext(LayerContext&&) = delete; - LayerContext operator==(const LayerContext&) = delete; - LayerContext operator==(LayerContext&&) = delete; - ~LayerContext(); + virtual ~LayerContext(); public: - template <DispatchableType T> static void* get_key(const T& dt) { - return *reinterpret_cast<void**>(dt); + template <DispatchableType DT> static void* get_key(const DT& dt) { + return reinterpret_cast<void*>(dt); } - template <typename T, DispatchableType DispatchableType> - requires(!std::same_as<T, QueueContext>) - T& get_context(const DispatchableType& dt) { + template <DispatchableType DT> + std::shared_ptr<dispatch_context_t<DT>> get_context(const DT& dt) { const auto key = get_key(dt); + const auto lock = std::scoped_lock(this->mutex); const auto it = this->contexts.find(key); assert(it != std::end(this->contexts)); - const auto ptr = std::get_if<std::unique_ptr<T>>(&it->second); - assert(ptr && *ptr); - - return **ptr; - } - - // QueueContext's are actually owned by a device so look there instead. - template <typename T, DispatchableType DispatchableType> - requires(std::same_as<T, QueueContext>) - T& get_context(const DispatchableType& dt) { - - const auto& device_context = this->get_context<DeviceContext>(dt); - const auto& queue_context = device_context.queue_contexts; - - const auto it = device_context.queue_contexts.find(dt); - assert(it != std::end(queue_context)); - - const auto& ptr = it->second; - return *ptr; + using context_t = dispatch_context_t<DT>; + auto ptr = std::dynamic_pointer_cast<context_t>(it->second); + assert(ptr); + return ptr; } }; diff --git a/src/physical_device_context.cc b/src/physical_device_context.cc new file mode 100644 index 0000000..105b840 --- /dev/null +++ b/src/physical_device_context.cc @@ -0,0 +1,11 @@ +#include "physical_device_context.hh" + +namespace low_latency { + +PhysicalDeviceContext::PhysicalDeviceContext( + InstanceContext& instance_context, const VkPhysicalDevice& physical_device) + : instance(instance_context), physical_device(physical_device) {} + +PhysicalDeviceContext::~PhysicalDeviceContext() {} + +} // namespace low_latency
\ No newline at end of file diff --git a/src/physical_device_context.hh b/src/physical_device_context.hh new file mode 100644 index 0000000..639fa0f --- /dev/null +++ b/src/physical_device_context.hh @@ -0,0 +1,26 @@ +#ifndef PHYSICAL_DEVICE_CONTEXT_HH_ +#define PHYSICAL_DEVICE_CONTEXT_HH_ + +#include "instance_context.hh" + +#include <vulkan/vulkan.hpp> + +#include "context.hh" + +namespace low_latency { + +class PhysicalDeviceContext final : public Context { + public: + InstanceContext& instance; + + const VkPhysicalDevice physical_device; + + public: + PhysicalDeviceContext(InstanceContext& instance_context, + const VkPhysicalDevice& physical_device); + virtual ~PhysicalDeviceContext(); +}; + +} // namespace low_latency + +#endif
\ No newline at end of file diff --git a/src/queue_context.cc b/src/queue_context.cc index 8f7d571..930b0c5 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -51,12 +51,15 @@ QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue, timestamp_pool(std::make_unique<TimestampPool>(*this)) {} QueueContext::~QueueContext() { + + // nuke our handles, so we avoid segfaults for now + this->handle_hack.clear(); + // Ugly - destructors of timestamp_pool should be called before we destroy // our vulkan objects. this->timestamp_pool.reset(); const auto& vtable = this->device_context.vtable; - vtable.DestroySemaphore(this->device_context.device, this->semaphore, nullptr); vtable.DestroyCommandPool(this->device_context.device, this->command_pool, diff --git a/src/queue_context.hh b/src/queue_context.hh index 49bfcdf..184e31d 100644 --- a/src/queue_context.hh +++ b/src/queue_context.hh @@ -1,37 +1,39 @@ #ifndef QUEUE_STATE_HH_ #define QUEUE_STATE_HH_ +#include "context.hh" #include "timestamp_pool.hh" #include <vulkan/utility/vk_dispatch_table.h> #include <vulkan/vulkan.hpp> #include <memory> +#include <deque> namespace low_latency { - + class DeviceContext; -class QueueContext final { +class QueueContext final : public Context { public: DeviceContext& device_context; const VkQueue queue; const std::uint32_t queue_family_index; + // this is incremented and tied to our semaphore + std::uint64_t semaphore_sequence = 0; VkSemaphore semaphore; + VkCommandPool command_pool; std::unique_ptr<TimestampPool> timestamp_pool; + std::deque<std::unique_ptr<TimestampPool::Handle>> handle_hack; public: QueueContext(DeviceContext& device_context, const VkQueue& queue, const std::uint32_t& queue_family_index); - QueueContext(const QueueContext&) = delete; - QueueContext(QueueContext&&) = delete; - QueueContext operator==(const QueueContext&) = delete; - QueueContext operator==(QueueContext&&) = delete; - ~QueueContext(); + virtual ~QueueContext(); }; }; // namespace low_latency diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc index a70c299..e37dcd2 100644 --- a/src/timestamp_pool.cc +++ b/src/timestamp_pool.cc @@ -17,6 +17,7 @@ TimestampPool::Block TimestampPool::allocate() { .queryCount = this->TIMESTAMP_QUERY_POOL_SIZE}; auto query_pool = VkQueryPool{}; + device_context.vtable.CreateQueryPool(device_context.device, &qpci, nullptr, &query_pool); return query_pool; @@ -42,6 +43,9 @@ TimestampPool::Block TimestampPool::allocate() { }; device_context.vtable.AllocateCommandBuffers( device_context.device, &cbai, std::data(command_buffers)); + std::ranges::for_each(command_buffers, [&](const auto& cb) { + device_context.sdld(device_context.device, cb); + }); return std::make_unique<std::vector<VkCommandBuffer>>(command_buffers); }(); @@ -103,7 +107,7 @@ TimestampPool::Handle::Handle( command_buffers(command_buffers) {} TimestampPool::Handle::~Handle() { - this->index_origin.insert(this->query_index); + assert(this->index_origin.insert(this->query_index).second); } void TimestampPool::Handle::setup_command_buffers( @@ -174,7 +178,6 @@ std::uint64_t TimestampPool::get_polled(const Handle& handle) { TimestampPool::~TimestampPool() { const auto& device = this->queue_context.device_context.device; const auto& vtable = this->queue_context.device_context.vtable; - for (const auto& block : this->blocks) { vtable.FreeCommandBuffers(device, this->queue_context.command_pool, std::size(*block.command_buffers), diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh index 82c4721..cc67b18 100644 --- a/src/timestamp_pool.hh +++ b/src/timestamp_pool.hh @@ -69,18 +69,18 @@ class TimestampPool final { public: // A handle represents two std::uint64_t blocks of timestamp memory and two // command buffers. - struct Handle { + struct Handle final { private: friend class TimestampPool; private: available_query_indicies_t& index_origin; - std::size_t block_index; + const std::size_t block_index; public: - VkQueryPool query_pool; - std::uint64_t query_index; - std::array<VkCommandBuffer, 2> command_buffers; + const VkQueryPool query_pool; + const std::uint64_t query_index; + const std::array<VkCommandBuffer, 2> command_buffers; public: Handle(TimestampPool::available_query_indicies_t& index_origin, @@ -89,8 +89,8 @@ class TimestampPool final { const std::array<VkCommandBuffer, 2>& command_buffers); Handle(const Handle& handle) = delete; Handle(Handle&&) = delete; - Handle operator==(const Handle& handle) = delete; - Handle operator==(Handle&&) = delete; + Handle operator=(const Handle& handle) = delete; + Handle operator=(Handle&&) = delete; ~Handle(); // frees from the pool public: @@ -104,8 +104,8 @@ class TimestampPool final { TimestampPool(QueueContext& queue_context); TimestampPool(const TimestampPool&) = delete; TimestampPool(TimestampPool&&) = delete; - TimestampPool operator==(const TimestampPool&) = delete; - TimestampPool operator==(TimestampPool&&) = delete; + TimestampPool operator=(const TimestampPool&) = delete; + TimestampPool operator=(TimestampPool&&) = delete; ~TimestampPool(); public: |
