diff options
| author | Nicolas James <nj3ahxac@gmail.com> | 2026-02-10 10:49:17 +1100 |
|---|---|---|
| committer | Nicolas James <nj3ahxac@gmail.com> | 2026-02-10 10:49:17 +1100 |
| commit | 77e2be172718878b38999efc247ce7571435fcc8 (patch) | |
| tree | 557344a614dd89ecec3ac5dbcd83dbcc1375bf55 /src | |
| parent | 5ab5046b643b04b9c31fd41cdfca39b9d5f6b99e (diff) | |
cleanup, wip
Diffstat (limited to 'src')
| -rw-r--r-- | src/device_context.cc | 15 | ||||
| -rw-r--r-- | src/device_context.hh | 35 | ||||
| -rw-r--r-- | src/instance_context.cc | 13 | ||||
| -rw-r--r-- | src/instance_context.hh | 25 | ||||
| -rw-r--r-- | src/layer.cc | 320 | ||||
| -rw-r--r-- | src/layer_context.cc | 9 | ||||
| -rw-r--r-- | src/layer_context.hh | 84 | ||||
| -rw-r--r-- | src/queue_context.cc | 73 | ||||
| -rw-r--r-- | src/queue_context.hh | 23 | ||||
| -rw-r--r-- | src/timestamp_pool.cc | 66 | ||||
| -rw-r--r-- | src/timestamp_pool.hh | 23 |
11 files changed, 427 insertions, 259 deletions
diff --git a/src/device_context.cc b/src/device_context.cc new file mode 100644 index 0000000..4be1872 --- /dev/null +++ b/src/device_context.cc @@ -0,0 +1,15 @@ +#include "device_context.hh" +#include "queue_context.hh" + +#include <utility> + +namespace low_latency { + +DeviceContext::DeviceContext(InstanceContext& parent_instance, + const VkDevice& device, + VkuDeviceDispatchTable&& vtable) + : instance(parent_instance), device(device), vtable(std::move(vtable)) + +{} + +} // namespace low_latency
\ No newline at end of file diff --git a/src/device_context.hh b/src/device_context.hh new file mode 100644 index 0000000..a936d6d --- /dev/null +++ b/src/device_context.hh @@ -0,0 +1,35 @@ +#ifndef DEVICE_CONTEXT_HH_ +#define DEVICE_CONTEXT_HH_ + +#include <memory> +#include <unordered_map> + +#include <vulkan/utility/vk_dispatch_table.h> +#include <vulkan/vulkan.hpp> + +#include "instance_context.hh" + +namespace low_latency { + +class QueueContext; + +struct DeviceContext { + InstanceContext& instance; + + const VkDevice device; + const VkuDeviceDispatchTable vtable; + + std::unordered_map<VkQueue, std::unique_ptr<QueueContext>> queue_contexts; + + public: + DeviceContext(InstanceContext& parent_instance, const VkDevice& device, + VkuDeviceDispatchTable&& vtable); + DeviceContext(const DeviceContext&) = delete; + DeviceContext(DeviceContext&&) = delete; + DeviceContext operator==(const DeviceContext&) = delete; + DeviceContext operator==(DeviceContext&&) = delete; +}; + +}; // namespace low_latency + +#endif
\ No newline at end of file diff --git a/src/instance_context.cc b/src/instance_context.cc new file mode 100644 index 0000000..36d2c66 --- /dev/null +++ b/src/instance_context.cc @@ -0,0 +1,13 @@ +#include "instance_context.hh" + +#include <utility> + +namespace low_latency { + +InstanceContext::InstanceContext(const VkInstance& instance, + VkuInstanceDispatchTable&& vtable) + : instance(instance), vtable(std::move(vtable)) {} + +InstanceContext::~InstanceContext() {} + +} // namespace low_latency
\ No newline at end of file diff --git a/src/instance_context.hh b/src/instance_context.hh new file mode 100644 index 0000000..0a0b999 --- /dev/null +++ b/src/instance_context.hh @@ -0,0 +1,25 @@ +#ifndef INSTANCE_CONTEXT_HH_ +#define INSTANCE_CONTEXT_HH_ + +#include <vulkan/utility/vk_dispatch_table.h> + +namespace low_latency { + +struct InstanceContext { + + const VkInstance instance; + const VkuInstanceDispatchTable vtable; + + public: + InstanceContext(const VkInstance& instance, + VkuInstanceDispatchTable&& vtable); + InstanceContext(const InstanceContext&) = delete; + InstanceContext(InstanceContext&&) = delete; + InstanceContext operator==(const InstanceContext&) = delete; + InstanceContext operator==(InstanceContext&&) = delete; + ~InstanceContext(); +}; + +}; // namespace low_latency + +#endif
\ No newline at end of file diff --git a/src/layer.cc b/src/layer.cc index 94b4969..5e652f0 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -1,6 +1,10 @@ #include "layer.hh" +#include <iostream> +#include <string_view> +#include <unordered_map> #include <utility> + #include <vulkan/utility/vk_dispatch_table.h> #include <vulkan/vk_layer.h> #include <vulkan/vk_platform.h> @@ -8,37 +12,18 @@ #include <vulkan/vulkan.hpp> #include <vulkan/vulkan_core.h> -#include <deque> -#include <iostream> -#include <mutex> -#include <string_view> -#include <unordered_map> -#include <unordered_set> - +#include "device_context.hh" +#include "instance_context.hh" +#include "layer_context.hh" #include "queue_context.hh" -#include "timestamp_pool.hh" namespace low_latency { -// Global mutex for layer data. -static auto mutex = std::mutex{}; - -// Mappings for device instances. -static std::unordered_map<VkPhysicalDevice, VkInstance> device_instances; -static std::unordered_map<void*, VkuInstanceDispatchTable> instance_vtables; -static std::unordered_map<void*, VkuDeviceDispatchTable> device_vtables; +namespace { -static std::uint64_t current_frame = 0; -static std::unordered_map<VkQueue, QueueContext> queue_contexts; +LayerContext layer_context; -template <typename T> -concept DispatchableType = - std::same_as<std::remove_cvref_t<T>, VkInstance> || - std::same_as<std::remove_cvref_t<T>, VkDevice> || - std::same_as<std::remove_cvref_t<T>, VkPhysicalDevice>; -template <DispatchableType T> void* get_key(const T& inst) { - return *reinterpret_cast<void**>(inst); -} +} // namespace template <typename T, typename sType> static T* get_link_info(const void* const head, const sType& stype) { @@ -92,23 +77,24 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, return result; } - const auto lock = std::scoped_lock{mutex}; - instance_vtables.emplace( - get_key(*pInstance), - VkuInstanceDispatchTable{ - .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( - gipa(*pInstance, "vkDestroyInstance")), - .EnumeratePhysicalDevices = - reinterpret_cast<PFN_vkEnumeratePhysicalDevices>( - gipa(*pInstance, "vkEnumeratePhysicalDevices")), - .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>( - gipa(*pInstance, "vkGetInstanceProcAddr")), - .EnumerateDeviceExtensionProperties = - reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>( - gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")), - } + const auto key = layer_context.get_key(*pInstance); + auto vtable = VkuInstanceDispatchTable{ + .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( + gipa(*pInstance, "vkDestroyInstance")), + .EnumeratePhysicalDevices = + reinterpret_cast<PFN_vkEnumeratePhysicalDevices>( + gipa(*pInstance, "vkEnumeratePhysicalDevices")), + .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>( + gipa(*pInstance, "vkGetInstanceProcAddr")), + .EnumerateDeviceExtensionProperties = + reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>( + gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")), + }; - ); + const auto lock = std::scoped_lock{layer_context.mutex}; + assert(!layer_context.contexts.contains(key)); + layer_context.contexts.try_emplace( + key, std::make_unique<InstanceContext>(*pInstance, std::move(vtable))); return VK_SUCCESS; } @@ -116,34 +102,11 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, static VKAPI_ATTR void VKAPI_CALL DestroyInstance(VkInstance instance, const VkAllocationCallbacks* allocator) { - const auto lock = std::scoped_lock{mutex}; + const auto lock = std::scoped_lock{layer_context.mutex}; - const auto key = get_key(instance); - assert(instance_vtables.contains(key)); - instance_vtables.erase(key); -} - -static VKAPI_ATTR VkResult VKAPI_CALL EnumeratePhysicalDevices( - VkInstance instance, std::uint32_t* count, VkPhysicalDevice* devices) { - - const auto lock = std::scoped_lock{mutex}; - - const auto it = instance_vtables.find(get_key(instance)); - assert(it != std::end(instance_vtables)); - const auto& vtable = it->second; - - if (const auto result = - vtable.EnumeratePhysicalDevices(instance, count, devices); - !devices || result != VK_SUCCESS) { - - return result; - } - - for (auto i = std::uint32_t{0}; i < *count; ++i) { - device_instances.emplace(devices[i], instance); - } - - return VK_SUCCESS; + const auto key = layer_context.get_key(instance); + assert(layer_context.contexts.contains(key)); + layer_context.contexts.erase(key); } static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( @@ -163,16 +126,17 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( } create_info->u.pLayerInfo = create_info->u.pLayerInfo->pNext; - const auto lock = std::scoped_lock{mutex}; + const auto lock = std::scoped_lock{layer_context.mutex}; + + auto& context = layer_context.get_context<InstanceContext>(physical_device); const auto next_extensions = [&]() -> std::optional<std::vector<const char*>> { const auto supported_extensions = [&]() -> std::optional<std::vector<VkExtensionProperties>> { const auto enumerate_device_extensions = - reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>( - gipa(device_instances[physical_device], - "vkEnumerateDeviceExtensionProperties")); + reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(gipa( + context.instance, "vkEnumerateDeviceExtensionProperties")); if (!enumerate_device_extensions) { return std::nullopt; } @@ -257,67 +221,75 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( return result; } - device_vtables.emplace( - get_key(*pDevice), - VkuDeviceDispatchTable{ - .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>( - gdpa(*pDevice, "vkGetDeviceProcAddr")), - .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>( - gdpa(*pDevice, "vkDestroyDevice")), - .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>( - gdpa(*pDevice, "vkGetDeviceQueue")), - .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>( - gdpa(*pDevice, "vkQueueSubmit")), - .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>( - gdpa(*pDevice, "vkCreateSemaphore")), - .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>( - gdpa(*pDevice, "vkCreateQueryPool")), - .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>( - gdpa(*pDevice, "vkGetQueryPoolResults")), - .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>( - gdpa(*pDevice, "vkCreateCommandPool")), - .AllocateCommandBuffers = - reinterpret_cast<PFN_vkAllocateCommandBuffers>( - gdpa(*pDevice, "vkAllocateCommandBuffers")), - .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>( - gdpa(*pDevice, "vkBeginCommandBuffer")), - .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>( - gdpa(*pDevice, "vkEndCommandBuffer")), - .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>( - gdpa(*pDevice, "vkResetCommandBuffer")), - .CmdDraw = - reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")), - .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>( - gdpa(*pDevice, "vkCmdDrawIndexed")), - .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>( - gdpa(*pDevice, "vkCmdResetQueryPool")), - .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>( - gdpa(*pDevice, "vkGetDeviceQueue2")), - .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>( - gdpa(*pDevice, "vkQueueSubmit2")), - .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>( - gdpa(*pDevice, "vkQueuePresentKHR")), - .GetSemaphoreCounterValueKHR = - reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>( - gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")), - .CmdWriteTimestamp2KHR = - reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>( - gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")), - .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>( - gdpa(*pDevice, "vkQueueSubmit2KHR")), - - }); + auto vtable = VkuDeviceDispatchTable{ + .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>( + gdpa(*pDevice, "vkGetDeviceProcAddr")), + .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>( + gdpa(*pDevice, "vkDestroyDevice")), + .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>( + gdpa(*pDevice, "vkGetDeviceQueue")), + .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>( + gdpa(*pDevice, "vkQueueSubmit")), + .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>( + gdpa(*pDevice, "vkCreateSemaphore")), + .DestroySemaphore = reinterpret_cast<PFN_vkDestroySemaphore>( + gdpa(*pDevice, "vkDestroySemaphore")), + .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>( + gdpa(*pDevice, "vkCreateQueryPool")), + .DestroyQueryPool = reinterpret_cast<PFN_vkDestroyQueryPool>( + gdpa(*pDevice, "vkDestroyQueryPool")), + .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>( + gdpa(*pDevice, "vkGetQueryPoolResults")), + .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>( + gdpa(*pDevice, "vkCreateCommandPool")), + .DestroyCommandPool = reinterpret_cast<PFN_vkDestroyCommandPool>( + gdpa(*pDevice, "vkDestroyCommandPool")), + .AllocateCommandBuffers = + reinterpret_cast<PFN_vkAllocateCommandBuffers>( + gdpa(*pDevice, "vkAllocateCommandBuffers")), + .FreeCommandBuffers = reinterpret_cast<PFN_vkFreeCommandBuffers>( + gdpa(*pDevice, "vkFreeCommandBuffers")), + .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>( + gdpa(*pDevice, "vkBeginCommandBuffer")), + .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>( + gdpa(*pDevice, "vkEndCommandBuffer")), + .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>( + gdpa(*pDevice, "vkResetCommandBuffer")), + .CmdDraw = reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")), + .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>( + gdpa(*pDevice, "vkCmdDrawIndexed")), + .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>( + gdpa(*pDevice, "vkCmdResetQueryPool")), + .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>( + gdpa(*pDevice, "vkGetDeviceQueue2")), + .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>( + gdpa(*pDevice, "vkQueueSubmit2")), + .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>( + gdpa(*pDevice, "vkQueuePresentKHR")), + .GetSemaphoreCounterValueKHR = + reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>( + gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")), + .CmdWriteTimestamp2KHR = reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>( + gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")), + .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>( + gdpa(*pDevice, "vkQueueSubmit2KHR")), + }; + + const auto key = layer_context.get_key(*pDevice); + assert(!layer_context.contexts.contains(key)); + layer_context.contexts.try_emplace( + key, + std::make_unique<DeviceContext>(context, *pDevice, std::move(vtable))); return VK_SUCCESS; } static VKAPI_ATTR void VKAPI_CALL DestroyDevice(VkDevice device, const VkAllocationCallbacks* allocator) { - - const auto lock = std::scoped_lock{mutex}; - const auto key = get_key(device); - assert(device_vtables.contains(key)); - device_vtables.erase(key); + const auto lock = std::scoped_lock{layer_context.mutex}; + const auto key = layer_context.get_key(device); + assert(layer_context.contexts.contains(key)); + layer_context.contexts.erase(key); } // Small amount of duplication, we can't assume gdq2 is available apparently. @@ -325,37 +297,40 @@ static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index, std::uint32_t queue_index, VkQueue* queue) { - const auto lock = std::scoped_lock{mutex}; - const auto& vtable = device_vtables[get_key(device)]; + const auto lock = std::scoped_lock{layer_context.mutex}; + + auto& device_context = layer_context.get_context<DeviceContext>(device); - vtable.GetDeviceQueue(device, queue_family_index, queue_index, queue); + device_context.vtable.GetDeviceQueue(device, queue_family_index, + queue_index, queue); if (!queue || !*queue) { return; } + auto& queue_contexts = device_context.queue_contexts; if (!queue_contexts.contains(*queue)) { - queue_contexts.emplace( - std::piecewise_construct, std::forward_as_tuple(*queue), - std::forward_as_tuple(device, *queue, queue_family_index, vtable)); + queue_contexts.try_emplace( + *queue, std::make_unique<QueueContext>(device_context, *queue, + queue_family_index)); } } static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2( VkDevice device, const VkDeviceQueueInfo2* info, VkQueue* queue) { - const auto lock = std::scoped_lock{mutex}; - const auto& vtable = device_vtables[get_key(device)]; + const auto lock = std::scoped_lock{layer_context.mutex}; + auto& device_context = layer_context.get_context<DeviceContext>(device); - vtable.GetDeviceQueue2(device, info, queue); + device_context.vtable.GetDeviceQueue2(device, info, queue); if (!queue || !*queue) { return; } + auto& queue_contexts = device_context.queue_contexts; if (!queue_contexts.contains(*queue)) { - queue_contexts.emplace( - std::piecewise_construct, std::forward_as_tuple(*queue), - std::forward_as_tuple(device, *queue, info->queueFamilyIndex, - vtable)); + queue_contexts.try_emplace( + *queue, std::make_unique<QueueContext>(device_context, *queue, + info->queueFamilyIndex)); } } @@ -363,14 +338,10 @@ static VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, const VkSubmitInfo* submit_info, VkFence fence) { - const auto lock = std::scoped_lock{mutex}; + const auto lock = std::scoped_lock{layer_context.mutex}; - auto& queue_context = [&]() -> auto& { - const auto& queue_context_it = queue_contexts.find(queue); - assert(queue_context_it != std::end(queue_contexts)); - return queue_context_it->second; - }(); - const auto& vtable = device_vtables[get_key(queue_context.device)]; + auto& queue_context = layer_context.get_context<QueueContext>(queue); + const auto& vtable = queue_context.device_context.vtable; if (!submit_count) { // no-op submit we shouldn't worry about return vtable.QueueSubmit(queue, submit_count, submit_info, fence); @@ -380,7 +351,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, auto next_submit_infos = std::vector<VkSubmitInfo>{}; next_submit_infos.reserve(submit_count + 2); - auto timestamp_handle = queue_context.timestamp_pool.acquire(); + auto timestamp_handle = queue_context.timestamp_pool->acquire(); timestamp_handle->setup_command_buffers(vtable); const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers; @@ -403,7 +374,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, next_submit_infos[1].pWaitSemaphores = nullptr; next_submit_infos[1].waitSemaphoreCount = 0u; - const auto TODO_next = std::uint64_t{current_frame + 1}; + const auto TODO_next = std::uint64_t{layer_context.current_frame + 1}; const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, .signalSemaphoreValueCount = 1, @@ -434,13 +405,9 @@ static VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, const VkSubmitInfo2* submit_infos, VkFence fence) { - const auto lock = std::scoped_lock{mutex}; - auto& queue_context = [&]() -> auto& { - const auto& queue_context_it = queue_contexts.find(queue); - assert(queue_context_it != std::end(queue_contexts)); - return queue_context_it->second; - }(); - const auto& vtable = device_vtables[get_key(queue_context.device)]; + const auto lock = std::scoped_lock{layer_context.mutex}; + auto& queue_context = layer_context.get_context<QueueContext>(queue); + const auto& vtable = queue_context.device_context.vtable; if (!submit_count) { // another no-op submit return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence); @@ -449,7 +416,7 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, auto next_submit_infos = std::vector<VkSubmitInfo2>(); next_submit_infos.reserve(submit_count + 2); - auto timestamp_handle = queue_context.timestamp_pool.acquire(); + auto timestamp_handle = queue_context.timestamp_pool->acquire(); timestamp_handle->setup_command_buffers(vtable); const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers; @@ -500,13 +467,9 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count, static VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { - const auto lock = std::scoped_lock{mutex}; - auto& queue_context = [&]() -> auto& { - const auto& queue_context_it = queue_contexts.find(queue); - assert(queue_context_it != std::end(queue_contexts)); - return queue_context_it->second; - }(); - const auto& vtable = device_vtables[get_key(queue_context.device)]; + const auto lock = std::scoped_lock{layer_context.mutex}; + auto& queue_context = layer_context.get_context<QueueContext>(queue); + const auto& vtable = queue_context.device_context.vtable; if (const auto res = vtable.QueuePresentKHR(queue, present_info); res != VK_SUCCESS) { @@ -517,23 +480,24 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { std::cout << "queuePresentKHR called for queue " << queue << '\n'; // Update all of our information about this queue's timestamp pool! - queue_context.timestamp_pool.poll(); + queue_context.timestamp_pool->poll(); // While we might be submitting on this queue, let's see what our timeline // semaphore says we're at. uint64_t value = 0; if (const auto res = vtable.GetSemaphoreCounterValueKHR( - queue_context.device, queue_context.semaphore, &value); + queue_context.device_context.device, queue_context.semaphore, + &value); res != VK_SUCCESS) { return res; } - std::cout << " frame_index: " << current_frame << '\n'; + std::cout << " frame_index: " << layer_context.current_frame << '\n'; std::cout << " semaphore: " << value << '\n'; std::cout << " queue: " << queue << '\n'; - ++current_frame; + ++layer_context.current_frame; return VK_SUCCESS; } @@ -548,10 +512,6 @@ static const auto instance_functions = reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateInstance)}, {"vkDestroyInstance", reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyInstance)}, - - {"vkEnumeratePhysicalDevices", - reinterpret_cast<PFN_vkVoidFunction>( - low_latency::EnumeratePhysicalDevices)}, }; static const auto device_functions = @@ -587,9 +547,11 @@ LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) { return it->second; } - const auto lock = std::scoped_lock{low_latency::mutex}; - return low_latency::device_vtables[low_latency::get_key(device)] - .GetDeviceProcAddr(device, pName); + const auto lock = std::scoped_lock{low_latency::layer_context.mutex}; + + using namespace low_latency; + const auto& context = layer_context.get_context<DeviceContext>(device); + return context.vtable.GetDeviceProcAddr(device, pName); } VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL @@ -602,7 +564,9 @@ LowLatency_GetInstanceProcAddr(VkInstance instance, const char* const pName) { } } - const auto lock = std::scoped_lock{low_latency::mutex}; - return low_latency::instance_vtables[low_latency::get_key(instance)] - .GetInstanceProcAddr(instance, pName); + const auto lock = std::scoped_lock{low_latency::layer_context.mutex}; + + using namespace low_latency; + const auto& context = layer_context.get_context<InstanceContext>(instance); + return context.vtable.GetInstanceProcAddr(instance, pName); }
\ No newline at end of file diff --git a/src/layer_context.cc b/src/layer_context.cc new file mode 100644 index 0000000..ceb0030 --- /dev/null +++ b/src/layer_context.cc @@ -0,0 +1,9 @@ +#include "layer_context.hh" + +namespace low_latency { + +LayerContext::LayerContext() {} + +LayerContext::~LayerContext() {} + +} // namespace low_latency
\ No newline at end of file diff --git a/src/layer_context.hh b/src/layer_context.hh new file mode 100644 index 0000000..228efa3 --- /dev/null +++ b/src/layer_context.hh @@ -0,0 +1,84 @@ +#ifndef LAYER_CONTEXT_HH_ +#define LAYER_CONTEXT_HH_ + +#include <mutex> +#include <variant> + +#include "device_context.hh" +#include "instance_context.hh" +#include "queue_context.hh" + +// The purpose of this file is to provide a definition for the highest level +// entry point struct of our vulkan state. +// +// All Context structs have deleted copy/move constructors. This is because we +// want to be extremely explicit with how/when we delete things, and this allows +// us to use destructors for cleanup without much worry about weird copies +// floating around. Most contexts will probably live inside std::unique_ptr's as +// a result so they can be used in standard containers. + +namespace low_latency { + +template <typename T> +concept DispatchableType = + std::same_as<std::remove_cvref_t<T>, VkInstance> || + std::same_as<std::remove_cvref_t<T>, VkDevice> || + std::same_as<std::remove_cvref_t<T>, VkPhysicalDevice> || + std::same_as<std::remove_cvref_t<T>, VkQueue>; + +struct LayerContext { + public: + using ContextVariant = std::variant<std::unique_ptr<DeviceContext>, + std::unique_ptr<InstanceContext>>; + + public: + std::mutex mutex; + std::unordered_map<void*, ContextVariant> contexts; + std::uint64_t current_frame = 0; + + public: + LayerContext(); + LayerContext(const LayerContext&) = delete; + LayerContext(LayerContext&&) = delete; + LayerContext operator==(const LayerContext&) = delete; + LayerContext operator==(LayerContext&&) = delete; + ~LayerContext(); + + public: + template <DispatchableType T> static void* get_key(const T& dt) { + return *reinterpret_cast<void**>(dt); + } + + template <typename T, DispatchableType DispatchableType> + requires(!std::same_as<T, QueueContext>) + T& get_context(const DispatchableType& dt) { + const auto key = get_key(dt); + + const auto it = this->contexts.find(key); + assert(it != std::end(this->contexts)); + + const auto ptr = std::get_if<std::unique_ptr<T>>(&it->second); + assert(ptr && *ptr); + + return **ptr; + } + + // QueueContext's are actually owned by a device so look there instead. + template <typename T, DispatchableType DispatchableType> + requires(std::same_as<T, QueueContext>) + T& get_context(const DispatchableType& dt) { + + const auto& device_context = this->get_context<DeviceContext>(dt); + const auto& queue_context = device_context.queue_contexts; + + const auto it = device_context.queue_contexts.find(dt); + assert(it != std::end(queue_context)); + + const auto& ptr = it->second; + return *ptr; + } +}; + +}; // namespace low_latency + +#endif
\ No newline at end of file diff --git a/src/queue_context.cc b/src/queue_context.cc index dbae4c0..8f7d571 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -1,10 +1,12 @@ #include "queue_context.hh" +#include "device_context.hh" +#include "timestamp_pool.hh" namespace low_latency { -static VkCommandPool make_command_pool(const VkDevice& device, - const std::uint32_t& queue_family_index, - const VkuDeviceDispatchTable& vtable) { +static VkCommandPool +make_command_pool(const DeviceContext& device_context, + const std::uint32_t& queue_family_index) { const auto cpci = VkCommandPoolCreateInfo{ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, @@ -14,38 +16,51 @@ static VkCommandPool make_command_pool(const VkDevice& device, }; auto command_pool = VkCommandPool{}; - vtable.CreateCommandPool(device, &cpci, nullptr, &command_pool); + device_context.vtable.CreateCommandPool(device_context.device, &cpci, + nullptr, &command_pool); return command_pool; } -QueueContext::QueueContext(const VkDevice& device, const VkQueue queue, - const std::uint32_t& queue_family_index, - const VkuDeviceDispatchTable& vtable) - : device(device), queue(queue), queue_family_index(queue_family_index), - vtable(vtable), - // Important we make the command pool before the timestamp pool, because it's a dependency. - command_pool(make_command_pool(device, queue_family_index, vtable)), - timestamp_pool(device, vtable, command_pool) { - - this->semaphore = [&]() -> VkSemaphore { - const auto stci = VkSemaphoreTypeCreateInfo{ - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, - .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, - .initialValue = 0, - }; - - const auto sci = VkSemaphoreCreateInfo{ - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = &stci, - }; - - auto semaphore = VkSemaphore{}; - vtable.CreateSemaphore(device, &sci, nullptr, &semaphore); - return semaphore; - }(); +static VkSemaphore make_semaphore(const DeviceContext& device_context) { + + const auto stci = VkSemaphoreTypeCreateInfo{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = 0, + }; + + const auto sci = VkSemaphoreCreateInfo{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &stci, + }; + + auto semaphore = VkSemaphore{}; + device_context.vtable.CreateSemaphore(device_context.device, &sci, nullptr, + &semaphore); + return semaphore; } +QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue, + const std::uint32_t& queue_family_index) + : device_context(device_context), queue(queue), + queue_family_index(queue_family_index), + // Important we make the command pool before the timestamp pool, because + // it's a dependency. + command_pool(make_command_pool(device_context, queue_family_index)), + semaphore(make_semaphore(device_context)), + timestamp_pool(std::make_unique<TimestampPool>(*this)) {} + QueueContext::~QueueContext() { + // Ugly - destructors of timestamp_pool should be called before we destroy + // our vulkan objects. + this->timestamp_pool.reset(); + + const auto& vtable = this->device_context.vtable; + + vtable.DestroySemaphore(this->device_context.device, this->semaphore, + nullptr); + vtable.DestroyCommandPool(this->device_context.device, this->command_pool, + nullptr); } } // namespace low_latency
\ No newline at end of file diff --git a/src/queue_context.hh b/src/queue_context.hh index eb3f2ea..49bfcdf 100644 --- a/src/queue_context.hh +++ b/src/queue_context.hh @@ -6,32 +6,27 @@ #include <vulkan/utility/vk_dispatch_table.h> #include <vulkan/vulkan.hpp> -#include <deque> -#include <vector> +#include <memory> namespace low_latency { + +class DeviceContext; class QueueContext final { public: - VkDevice device; - VkuDeviceDispatchTable vtable; + DeviceContext& device_context; - VkQueue queue; - std::uint32_t queue_family_index; + const VkQueue queue; + const std::uint32_t queue_family_index; VkSemaphore semaphore; VkCommandPool command_pool; - TimestampPool timestamp_pool; - - std::deque< - std::vector<std::pair<TimestampPool::Handle, TimestampPool::Handle>>> - tracked_queues; + std::unique_ptr<TimestampPool> timestamp_pool; public: - QueueContext(const VkDevice& device, const VkQueue queue, - const std::uint32_t& queue_family_index, - const VkuDeviceDispatchTable& vtable); + QueueContext(DeviceContext& device_context, const VkQueue& queue, + const std::uint32_t& queue_family_index); QueueContext(const QueueContext&) = delete; QueueContext(QueueContext&&) = delete; QueueContext operator==(const QueueContext&) = delete; diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc index 1dc37b2..a70c299 100644 --- a/src/timestamp_pool.cc +++ b/src/timestamp_pool.cc @@ -1,11 +1,15 @@ #include "timestamp_pool.hh" +#include "device_context.hh" +#include "queue_context.hh" #include <ranges> #include <vulkan/vulkan_core.h> namespace low_latency { -TimestampPool::block TimestampPool::allocate() { +TimestampPool::Block TimestampPool::allocate() { + const auto& device_context = this->queue_context.device_context; + const auto query_pool = [&]() -> VkQueryPool { const auto qpci = VkQueryPoolCreateInfo{ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, @@ -13,7 +17,8 @@ TimestampPool::block TimestampPool::allocate() { .queryCount = this->TIMESTAMP_QUERY_POOL_SIZE}; auto query_pool = VkQueryPool{}; - vtable.CreateQueryPool(device, &qpci, nullptr, &query_pool); + device_context.vtable.CreateQueryPool(device_context.device, &qpci, + nullptr, &query_pool); return query_pool; }(); @@ -21,34 +26,32 @@ TimestampPool::block TimestampPool::allocate() { std::views::iota(0u, this->TIMESTAMP_QUERY_POOL_SIZE / 2) | std::views::transform([](const std::uint64_t& i) { return 2 * i; }); - const auto available_keys = std::make_shared<available_query_indicies_t>( + auto available_indices = std::make_unique<available_query_indicies_t>( available_query_indicies_t{std::begin(key_range), std::end(key_range)}); - auto command_buffers = [this]() -> auto { + auto command_buffers = [&, this]() -> auto { auto command_buffers = std::vector<VkCommandBuffer>(this->TIMESTAMP_QUERY_POOL_SIZE); const auto cbai = VkCommandBufferAllocateInfo{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .commandPool = this->command_pool, + .commandPool = this->queue_context.command_pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = static_cast<std::uint32_t>(std::size(command_buffers)), }; - vtable.AllocateCommandBuffers(device, &cbai, - std::data(command_buffers)); + device_context.vtable.AllocateCommandBuffers( + device_context.device, &cbai, std::data(command_buffers)); return std::make_unique<std::vector<VkCommandBuffer>>(command_buffers); }(); - return block{.query_pool = query_pool, - .available_indicies = available_keys, + return Block{.query_pool = query_pool, + .available_indicies = std::move(available_indices), .command_buffers = std::move(command_buffers)}; } -TimestampPool::TimestampPool(const VkDevice& device, - const VkuDeviceDispatchTable& vtable, - const VkCommandPool& command_pool) - : device(device), vtable(vtable), command_pool(command_pool) { +TimestampPool::TimestampPool(QueueContext& queue_context) + : queue_context(queue_context) { // Allocate one block on construction, it's likely more than enough! this->blocks.emplace_back(this->allocate()); @@ -69,11 +72,11 @@ std::unique_ptr<TimestampPool::Handle> TimestampPool::acquire() { }(); const auto query_pool = vacant_iter->query_pool; - auto& available_indices = vacant_iter->available_indicies; + auto& available_indices = *vacant_iter->available_indicies; // Grab any element from our set and erase it immediately after. - const auto query_index = *std::begin(*available_indices); - available_indices->erase(std::begin(*available_indices)); + const auto query_index = *std::begin(available_indices); + available_indices.erase(std::begin(available_indices)); const auto command_buffers = [&]() -> auto { auto command_buffers = std::array<VkCommandBuffer, 2>{}; @@ -91,8 +94,7 @@ std::unique_ptr<TimestampPool::Handle> TimestampPool::acquire() { } TimestampPool::Handle::Handle( - const std::weak_ptr<TimestampPool::available_query_indicies_t>& - index_origin, + TimestampPool::available_query_indicies_t& index_origin, const std::size_t block_index, const VkQueryPool& query_pool, const std::uint64_t query_index, const std::array<VkCommandBuffer, 2>& command_buffers) @@ -101,10 +103,7 @@ TimestampPool::Handle::Handle( command_buffers(command_buffers) {} TimestampPool::Handle::~Handle() { - if (const auto origin = this->index_origin.lock(); origin) { - assert(!origin->contains(this->query_index)); - origin->insert(this->query_index); - } + this->index_origin.insert(this->query_index); } void TimestampPool::Handle::setup_command_buffers( @@ -136,6 +135,8 @@ void TimestampPool::poll() { this->cached_timestamps.clear(); this->cached_timestamps.reserve(std::size(this->blocks)); + const auto& device_context = this->queue_context.device_context; + std::ranges::transform( this->blocks, std::back_inserter(this->cached_timestamps), [&, this](const auto& block) { @@ -144,14 +145,15 @@ void TimestampPool::poll() { auto timestamps = std::make_unique<std::vector<std::uint64_t>>( this->TIMESTAMP_QUERY_POOL_SIZE); - const auto result = vtable.GetQueryPoolResults( - this->device, query_pool, 0, this->TIMESTAMP_QUERY_POOL_SIZE, + const auto result = device_context.vtable.GetQueryPoolResults( + device_context.device, query_pool, 0, + this->TIMESTAMP_QUERY_POOL_SIZE, this->TIMESTAMP_QUERY_POOL_SIZE * sizeof(std::uint64_t), std::data(*timestamps), sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); - // Might return not ready when any of them aren't ready, which is - // not an error for our use case. + // Might return not ready when any of them aren't ready, which + // is not an error for our use case. assert(result == VK_SUCCESS || result == VK_NOT_READY); return timestamps; @@ -169,4 +171,16 @@ std::uint64_t TimestampPool::get_polled(const Handle& handle) { return handle.query_index; } +TimestampPool::~TimestampPool() { + const auto& device = this->queue_context.device_context.device; + const auto& vtable = this->queue_context.device_context.vtable; + + for (const auto& block : this->blocks) { + vtable.FreeCommandBuffers(device, this->queue_context.command_pool, + std::size(*block.command_buffers), + std::data(*block.command_buffers)); + vtable.DestroyQueryPool(device, block.query_pool, nullptr); + } +} + } // namespace low_latency
\ No newline at end of file diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh index 7efa4ee..82c4721 100644 --- a/src/timestamp_pool.hh +++ b/src/timestamp_pool.hh @@ -43,25 +43,25 @@ namespace low_latency { +class QueueContext; + class TimestampPool final { private: static constexpr auto TIMESTAMP_QUERY_POOL_SIZE = 512u; static_assert(TIMESTAMP_QUERY_POOL_SIZE % 2 == 0); private: - VkuDeviceDispatchTable vtable; - VkDevice device; - VkCommandPool command_pool; + QueueContext& queue_context; // VkQueryPool with an unordered set of keys available for reading. using available_query_indicies_t = std::unordered_set<std::uint64_t>; - struct block { + struct Block { VkQueryPool query_pool; - std::shared_ptr<available_query_indicies_t> available_indicies; + std::unique_ptr<available_query_indicies_t> available_indicies; std::unique_ptr<std::vector<VkCommandBuffer>> command_buffers; }; - std::vector<block> blocks; // multiple blocks + std::vector<Block> blocks; // multiple blocks // A snapshot of all available blocks for reading after each poll. std::vector<std::unique_ptr<std::vector<std::uint64_t>>> cached_timestamps; @@ -74,7 +74,7 @@ class TimestampPool final { friend class TimestampPool; private: - std::weak_ptr<available_query_indicies_t> index_origin; + available_query_indicies_t& index_origin; std::size_t block_index; public: @@ -83,8 +83,7 @@ class TimestampPool final { std::array<VkCommandBuffer, 2> command_buffers; public: - Handle(const std::weak_ptr<TimestampPool::available_query_indicies_t>& - index_origin, + Handle(TimestampPool::available_query_indicies_t& index_origin, const std::size_t block_index, const VkQueryPool& query_pool, const std::uint64_t query_index, const std::array<VkCommandBuffer, 2>& command_buffers); @@ -99,15 +98,15 @@ class TimestampPool final { }; private: - block allocate(); + Block allocate(); public: - TimestampPool(const VkDevice& device, const VkuDeviceDispatchTable& vtable, - const VkCommandPool& command_pool); + TimestampPool(QueueContext& queue_context); TimestampPool(const TimestampPool&) = delete; TimestampPool(TimestampPool&&) = delete; TimestampPool operator==(const TimestampPool&) = delete; TimestampPool operator==(TimestampPool&&) = delete; + ~TimestampPool(); public: // Hands out a Handle with a pool and index of two uint64_t's. |
