aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/device_context.cc15
-rw-r--r--src/device_context.hh35
-rw-r--r--src/instance_context.cc13
-rw-r--r--src/instance_context.hh25
-rw-r--r--src/layer.cc320
-rw-r--r--src/layer_context.cc9
-rw-r--r--src/layer_context.hh84
-rw-r--r--src/queue_context.cc73
-rw-r--r--src/queue_context.hh23
-rw-r--r--src/timestamp_pool.cc66
-rw-r--r--src/timestamp_pool.hh23
11 files changed, 427 insertions, 259 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
new file mode 100644
index 0000000..4be1872
--- /dev/null
+++ b/src/device_context.cc
@@ -0,0 +1,15 @@
+#include "device_context.hh"
+#include "queue_context.hh"
+
+#include <utility>
+
+namespace low_latency {
+
+DeviceContext::DeviceContext(InstanceContext& parent_instance,
+ const VkDevice& device,
+ VkuDeviceDispatchTable&& vtable)
+ : instance(parent_instance), device(device), vtable(std::move(vtable))
+
+{}
+
+} // namespace low_latency \ No newline at end of file
diff --git a/src/device_context.hh b/src/device_context.hh
new file mode 100644
index 0000000..a936d6d
--- /dev/null
+++ b/src/device_context.hh
@@ -0,0 +1,35 @@
+#ifndef DEVICE_CONTEXT_HH_
+#define DEVICE_CONTEXT_HH_
+
+#include <memory>
+#include <unordered_map>
+
+#include <vulkan/utility/vk_dispatch_table.h>
+#include <vulkan/vulkan.hpp>
+
+#include "instance_context.hh"
+
+namespace low_latency {
+
+class QueueContext;
+
+struct DeviceContext {
+ InstanceContext& instance;
+
+ const VkDevice device;
+ const VkuDeviceDispatchTable vtable;
+
+ std::unordered_map<VkQueue, std::unique_ptr<QueueContext>> queue_contexts;
+
+ public:
+ DeviceContext(InstanceContext& parent_instance, const VkDevice& device,
+ VkuDeviceDispatchTable&& vtable);
+ DeviceContext(const DeviceContext&) = delete;
+ DeviceContext(DeviceContext&&) = delete;
+ DeviceContext operator==(const DeviceContext&) = delete;
+ DeviceContext operator==(DeviceContext&&) = delete;
+};
+
+}; // namespace low_latency
+
+#endif \ No newline at end of file
diff --git a/src/instance_context.cc b/src/instance_context.cc
new file mode 100644
index 0000000..36d2c66
--- /dev/null
+++ b/src/instance_context.cc
@@ -0,0 +1,13 @@
+#include "instance_context.hh"
+
+#include <utility>
+
+namespace low_latency {
+
+InstanceContext::InstanceContext(const VkInstance& instance,
+ VkuInstanceDispatchTable&& vtable)
+ : instance(instance), vtable(std::move(vtable)) {}
+
+InstanceContext::~InstanceContext() {}
+
+} // namespace low_latency \ No newline at end of file
diff --git a/src/instance_context.hh b/src/instance_context.hh
new file mode 100644
index 0000000..0a0b999
--- /dev/null
+++ b/src/instance_context.hh
@@ -0,0 +1,25 @@
+#ifndef INSTANCE_CONTEXT_HH_
+#define INSTANCE_CONTEXT_HH_
+
+#include <vulkan/utility/vk_dispatch_table.h>
+
+namespace low_latency {
+
+struct InstanceContext {
+
+ const VkInstance instance;
+ const VkuInstanceDispatchTable vtable;
+
+ public:
+ InstanceContext(const VkInstance& instance,
+ VkuInstanceDispatchTable&& vtable);
+ InstanceContext(const InstanceContext&) = delete;
+ InstanceContext(InstanceContext&&) = delete;
+ InstanceContext operator==(const InstanceContext&) = delete;
+ InstanceContext operator==(InstanceContext&&) = delete;
+ ~InstanceContext();
+};
+
+}; // namespace low_latency
+
+#endif \ No newline at end of file
diff --git a/src/layer.cc b/src/layer.cc
index 94b4969..5e652f0 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -1,6 +1,10 @@
#include "layer.hh"
+#include <iostream>
+#include <string_view>
+#include <unordered_map>
#include <utility>
+
#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vk_layer.h>
#include <vulkan/vk_platform.h>
@@ -8,37 +12,18 @@
#include <vulkan/vulkan.hpp>
#include <vulkan/vulkan_core.h>
-#include <deque>
-#include <iostream>
-#include <mutex>
-#include <string_view>
-#include <unordered_map>
-#include <unordered_set>
-
+#include "device_context.hh"
+#include "instance_context.hh"
+#include "layer_context.hh"
#include "queue_context.hh"
-#include "timestamp_pool.hh"
namespace low_latency {
-// Global mutex for layer data.
-static auto mutex = std::mutex{};
-
-// Mappings for device instances.
-static std::unordered_map<VkPhysicalDevice, VkInstance> device_instances;
-static std::unordered_map<void*, VkuInstanceDispatchTable> instance_vtables;
-static std::unordered_map<void*, VkuDeviceDispatchTable> device_vtables;
+namespace {
-static std::uint64_t current_frame = 0;
-static std::unordered_map<VkQueue, QueueContext> queue_contexts;
+LayerContext layer_context;
-template <typename T>
-concept DispatchableType =
- std::same_as<std::remove_cvref_t<T>, VkInstance> ||
- std::same_as<std::remove_cvref_t<T>, VkDevice> ||
- std::same_as<std::remove_cvref_t<T>, VkPhysicalDevice>;
-template <DispatchableType T> void* get_key(const T& inst) {
- return *reinterpret_cast<void**>(inst);
-}
+} // namespace
template <typename T, typename sType>
static T* get_link_info(const void* const head, const sType& stype) {
@@ -92,23 +77,24 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
return result;
}
- const auto lock = std::scoped_lock{mutex};
- instance_vtables.emplace(
- get_key(*pInstance),
- VkuInstanceDispatchTable{
- .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
- gipa(*pInstance, "vkDestroyInstance")),
- .EnumeratePhysicalDevices =
- reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
- gipa(*pInstance, "vkEnumeratePhysicalDevices")),
- .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>(
- gipa(*pInstance, "vkGetInstanceProcAddr")),
- .EnumerateDeviceExtensionProperties =
- reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
- gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")),
- }
+ const auto key = layer_context.get_key(*pInstance);
+ auto vtable = VkuInstanceDispatchTable{
+ .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
+ gipa(*pInstance, "vkDestroyInstance")),
+ .EnumeratePhysicalDevices =
+ reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
+ gipa(*pInstance, "vkEnumeratePhysicalDevices")),
+ .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>(
+ gipa(*pInstance, "vkGetInstanceProcAddr")),
+ .EnumerateDeviceExtensionProperties =
+ reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
+ gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")),
+ };
- );
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ assert(!layer_context.contexts.contains(key));
+ layer_context.contexts.try_emplace(
+ key, std::make_unique<InstanceContext>(*pInstance, std::move(vtable)));
return VK_SUCCESS;
}
@@ -116,34 +102,11 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
static VKAPI_ATTR void VKAPI_CALL
DestroyInstance(VkInstance instance, const VkAllocationCallbacks* allocator) {
- const auto lock = std::scoped_lock{mutex};
+ const auto lock = std::scoped_lock{layer_context.mutex};
- const auto key = get_key(instance);
- assert(instance_vtables.contains(key));
- instance_vtables.erase(key);
-}
-
-static VKAPI_ATTR VkResult VKAPI_CALL EnumeratePhysicalDevices(
- VkInstance instance, std::uint32_t* count, VkPhysicalDevice* devices) {
-
- const auto lock = std::scoped_lock{mutex};
-
- const auto it = instance_vtables.find(get_key(instance));
- assert(it != std::end(instance_vtables));
- const auto& vtable = it->second;
-
- if (const auto result =
- vtable.EnumeratePhysicalDevices(instance, count, devices);
- !devices || result != VK_SUCCESS) {
-
- return result;
- }
-
- for (auto i = std::uint32_t{0}; i < *count; ++i) {
- device_instances.emplace(devices[i], instance);
- }
-
- return VK_SUCCESS;
+ const auto key = layer_context.get_key(instance);
+ assert(layer_context.contexts.contains(key));
+ layer_context.contexts.erase(key);
}
static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
@@ -163,16 +126,17 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
}
create_info->u.pLayerInfo = create_info->u.pLayerInfo->pNext;
- const auto lock = std::scoped_lock{mutex};
+ const auto lock = std::scoped_lock{layer_context.mutex};
+
+ auto& context = layer_context.get_context<InstanceContext>(physical_device);
const auto next_extensions =
[&]() -> std::optional<std::vector<const char*>> {
const auto supported_extensions =
[&]() -> std::optional<std::vector<VkExtensionProperties>> {
const auto enumerate_device_extensions =
- reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
- gipa(device_instances[physical_device],
- "vkEnumerateDeviceExtensionProperties"));
+ reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(gipa(
+ context.instance, "vkEnumerateDeviceExtensionProperties"));
if (!enumerate_device_extensions) {
return std::nullopt;
}
@@ -257,67 +221,75 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
return result;
}
- device_vtables.emplace(
- get_key(*pDevice),
- VkuDeviceDispatchTable{
- .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>(
- gdpa(*pDevice, "vkGetDeviceProcAddr")),
- .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>(
- gdpa(*pDevice, "vkDestroyDevice")),
- .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>(
- gdpa(*pDevice, "vkGetDeviceQueue")),
- .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>(
- gdpa(*pDevice, "vkQueueSubmit")),
- .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>(
- gdpa(*pDevice, "vkCreateSemaphore")),
- .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>(
- gdpa(*pDevice, "vkCreateQueryPool")),
- .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>(
- gdpa(*pDevice, "vkGetQueryPoolResults")),
- .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>(
- gdpa(*pDevice, "vkCreateCommandPool")),
- .AllocateCommandBuffers =
- reinterpret_cast<PFN_vkAllocateCommandBuffers>(
- gdpa(*pDevice, "vkAllocateCommandBuffers")),
- .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>(
- gdpa(*pDevice, "vkBeginCommandBuffer")),
- .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>(
- gdpa(*pDevice, "vkEndCommandBuffer")),
- .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>(
- gdpa(*pDevice, "vkResetCommandBuffer")),
- .CmdDraw =
- reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")),
- .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>(
- gdpa(*pDevice, "vkCmdDrawIndexed")),
- .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>(
- gdpa(*pDevice, "vkCmdResetQueryPool")),
- .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>(
- gdpa(*pDevice, "vkGetDeviceQueue2")),
- .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>(
- gdpa(*pDevice, "vkQueueSubmit2")),
- .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>(
- gdpa(*pDevice, "vkQueuePresentKHR")),
- .GetSemaphoreCounterValueKHR =
- reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>(
- gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")),
- .CmdWriteTimestamp2KHR =
- reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>(
- gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")),
- .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>(
- gdpa(*pDevice, "vkQueueSubmit2KHR")),
-
- });
+ auto vtable = VkuDeviceDispatchTable{
+ .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>(
+ gdpa(*pDevice, "vkGetDeviceProcAddr")),
+ .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>(
+ gdpa(*pDevice, "vkDestroyDevice")),
+ .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>(
+ gdpa(*pDevice, "vkGetDeviceQueue")),
+ .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>(
+ gdpa(*pDevice, "vkQueueSubmit")),
+ .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>(
+ gdpa(*pDevice, "vkCreateSemaphore")),
+ .DestroySemaphore = reinterpret_cast<PFN_vkDestroySemaphore>(
+ gdpa(*pDevice, "vkDestroySemaphore")),
+ .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>(
+ gdpa(*pDevice, "vkCreateQueryPool")),
+ .DestroyQueryPool = reinterpret_cast<PFN_vkDestroyQueryPool>(
+ gdpa(*pDevice, "vkDestroyQueryPool")),
+ .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>(
+ gdpa(*pDevice, "vkGetQueryPoolResults")),
+ .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>(
+ gdpa(*pDevice, "vkCreateCommandPool")),
+ .DestroyCommandPool = reinterpret_cast<PFN_vkDestroyCommandPool>(
+ gdpa(*pDevice, "vkDestroyCommandPool")),
+ .AllocateCommandBuffers =
+ reinterpret_cast<PFN_vkAllocateCommandBuffers>(
+ gdpa(*pDevice, "vkAllocateCommandBuffers")),
+ .FreeCommandBuffers = reinterpret_cast<PFN_vkFreeCommandBuffers>(
+ gdpa(*pDevice, "vkFreeCommandBuffers")),
+ .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>(
+ gdpa(*pDevice, "vkBeginCommandBuffer")),
+ .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>(
+ gdpa(*pDevice, "vkEndCommandBuffer")),
+ .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>(
+ gdpa(*pDevice, "vkResetCommandBuffer")),
+ .CmdDraw = reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")),
+ .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>(
+ gdpa(*pDevice, "vkCmdDrawIndexed")),
+ .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>(
+ gdpa(*pDevice, "vkCmdResetQueryPool")),
+ .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>(
+ gdpa(*pDevice, "vkGetDeviceQueue2")),
+ .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>(
+ gdpa(*pDevice, "vkQueueSubmit2")),
+ .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>(
+ gdpa(*pDevice, "vkQueuePresentKHR")),
+ .GetSemaphoreCounterValueKHR =
+ reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>(
+ gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")),
+ .CmdWriteTimestamp2KHR = reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>(
+ gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")),
+ .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>(
+ gdpa(*pDevice, "vkQueueSubmit2KHR")),
+ };
+
+ const auto key = layer_context.get_key(*pDevice);
+ assert(!layer_context.contexts.contains(key));
+ layer_context.contexts.try_emplace(
+ key,
+ std::make_unique<DeviceContext>(context, *pDevice, std::move(vtable)));
return VK_SUCCESS;
}
static VKAPI_ATTR void VKAPI_CALL
DestroyDevice(VkDevice device, const VkAllocationCallbacks* allocator) {
-
- const auto lock = std::scoped_lock{mutex};
- const auto key = get_key(device);
- assert(device_vtables.contains(key));
- device_vtables.erase(key);
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ const auto key = layer_context.get_key(device);
+ assert(layer_context.contexts.contains(key));
+ layer_context.contexts.erase(key);
}
// Small amount of duplication, we can't assume gdq2 is available apparently.
@@ -325,37 +297,40 @@ static VKAPI_ATTR void VKAPI_CALL
GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index,
std::uint32_t queue_index, VkQueue* queue) {
- const auto lock = std::scoped_lock{mutex};
- const auto& vtable = device_vtables[get_key(device)];
+ const auto lock = std::scoped_lock{layer_context.mutex};
+
+ auto& device_context = layer_context.get_context<DeviceContext>(device);
- vtable.GetDeviceQueue(device, queue_family_index, queue_index, queue);
+ device_context.vtable.GetDeviceQueue(device, queue_family_index,
+ queue_index, queue);
if (!queue || !*queue) {
return;
}
+ auto& queue_contexts = device_context.queue_contexts;
if (!queue_contexts.contains(*queue)) {
- queue_contexts.emplace(
- std::piecewise_construct, std::forward_as_tuple(*queue),
- std::forward_as_tuple(device, *queue, queue_family_index, vtable));
+ queue_contexts.try_emplace(
+ *queue, std::make_unique<QueueContext>(device_context, *queue,
+ queue_family_index));
}
}
static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2(
VkDevice device, const VkDeviceQueueInfo2* info, VkQueue* queue) {
- const auto lock = std::scoped_lock{mutex};
- const auto& vtable = device_vtables[get_key(device)];
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ auto& device_context = layer_context.get_context<DeviceContext>(device);
- vtable.GetDeviceQueue2(device, info, queue);
+ device_context.vtable.GetDeviceQueue2(device, info, queue);
if (!queue || !*queue) {
return;
}
+ auto& queue_contexts = device_context.queue_contexts;
if (!queue_contexts.contains(*queue)) {
- queue_contexts.emplace(
- std::piecewise_construct, std::forward_as_tuple(*queue),
- std::forward_as_tuple(device, *queue, info->queueFamilyIndex,
- vtable));
+ queue_contexts.try_emplace(
+ *queue, std::make_unique<QueueContext>(device_context, *queue,
+ info->queueFamilyIndex));
}
}
@@ -363,14 +338,10 @@ static VKAPI_ATTR VkResult VKAPI_CALL
vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
const VkSubmitInfo* submit_info, VkFence fence) {
- const auto lock = std::scoped_lock{mutex};
+ const auto lock = std::scoped_lock{layer_context.mutex};
- auto& queue_context = [&]() -> auto& {
- const auto& queue_context_it = queue_contexts.find(queue);
- assert(queue_context_it != std::end(queue_contexts));
- return queue_context_it->second;
- }();
- const auto& vtable = device_vtables[get_key(queue_context.device)];
+ auto& queue_context = layer_context.get_context<QueueContext>(queue);
+ const auto& vtable = queue_context.device_context.vtable;
if (!submit_count) { // no-op submit we shouldn't worry about
return vtable.QueueSubmit(queue, submit_count, submit_info, fence);
@@ -380,7 +351,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
auto next_submit_infos = std::vector<VkSubmitInfo>{};
next_submit_infos.reserve(submit_count + 2);
- auto timestamp_handle = queue_context.timestamp_pool.acquire();
+ auto timestamp_handle = queue_context.timestamp_pool->acquire();
timestamp_handle->setup_command_buffers(vtable);
const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
@@ -403,7 +374,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
next_submit_infos[1].pWaitSemaphores = nullptr;
next_submit_infos[1].waitSemaphoreCount = 0u;
- const auto TODO_next = std::uint64_t{current_frame + 1};
+ const auto TODO_next = std::uint64_t{layer_context.current_frame + 1};
const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.signalSemaphoreValueCount = 1,
@@ -434,13 +405,9 @@ static VKAPI_ATTR VkResult VKAPI_CALL
vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
const VkSubmitInfo2* submit_infos, VkFence fence) {
- const auto lock = std::scoped_lock{mutex};
- auto& queue_context = [&]() -> auto& {
- const auto& queue_context_it = queue_contexts.find(queue);
- assert(queue_context_it != std::end(queue_contexts));
- return queue_context_it->second;
- }();
- const auto& vtable = device_vtables[get_key(queue_context.device)];
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ auto& queue_context = layer_context.get_context<QueueContext>(queue);
+ const auto& vtable = queue_context.device_context.vtable;
if (!submit_count) { // another no-op submit
return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
@@ -449,7 +416,7 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
auto next_submit_infos = std::vector<VkSubmitInfo2>();
next_submit_infos.reserve(submit_count + 2);
- auto timestamp_handle = queue_context.timestamp_pool.acquire();
+ auto timestamp_handle = queue_context.timestamp_pool->acquire();
timestamp_handle->setup_command_buffers(vtable);
const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
@@ -500,13 +467,9 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count,
static VKAPI_ATTR VkResult VKAPI_CALL
vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
- const auto lock = std::scoped_lock{mutex};
- auto& queue_context = [&]() -> auto& {
- const auto& queue_context_it = queue_contexts.find(queue);
- assert(queue_context_it != std::end(queue_contexts));
- return queue_context_it->second;
- }();
- const auto& vtable = device_vtables[get_key(queue_context.device)];
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ auto& queue_context = layer_context.get_context<QueueContext>(queue);
+ const auto& vtable = queue_context.device_context.vtable;
if (const auto res = vtable.QueuePresentKHR(queue, present_info);
res != VK_SUCCESS) {
@@ -517,23 +480,24 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
std::cout << "queuePresentKHR called for queue " << queue << '\n';
// Update all of our information about this queue's timestamp pool!
- queue_context.timestamp_pool.poll();
+ queue_context.timestamp_pool->poll();
// While we might be submitting on this queue, let's see what our timeline
// semaphore says we're at.
uint64_t value = 0;
if (const auto res = vtable.GetSemaphoreCounterValueKHR(
- queue_context.device, queue_context.semaphore, &value);
+ queue_context.device_context.device, queue_context.semaphore,
+ &value);
res != VK_SUCCESS) {
return res;
}
- std::cout << " frame_index: " << current_frame << '\n';
+ std::cout << " frame_index: " << layer_context.current_frame << '\n';
std::cout << " semaphore: " << value << '\n';
std::cout << " queue: " << queue << '\n';
- ++current_frame;
+ ++layer_context.current_frame;
return VK_SUCCESS;
}
@@ -548,10 +512,6 @@ static const auto instance_functions =
reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateInstance)},
{"vkDestroyInstance",
reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyInstance)},
-
- {"vkEnumeratePhysicalDevices",
- reinterpret_cast<PFN_vkVoidFunction>(
- low_latency::EnumeratePhysicalDevices)},
};
static const auto device_functions =
@@ -587,9 +547,11 @@ LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) {
return it->second;
}
- const auto lock = std::scoped_lock{low_latency::mutex};
- return low_latency::device_vtables[low_latency::get_key(device)]
- .GetDeviceProcAddr(device, pName);
+ const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
+
+ using namespace low_latency;
+ const auto& context = layer_context.get_context<DeviceContext>(device);
+ return context.vtable.GetDeviceProcAddr(device, pName);
}
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
@@ -602,7 +564,9 @@ LowLatency_GetInstanceProcAddr(VkInstance instance, const char* const pName) {
}
}
- const auto lock = std::scoped_lock{low_latency::mutex};
- return low_latency::instance_vtables[low_latency::get_key(instance)]
- .GetInstanceProcAddr(instance, pName);
+ const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
+
+ using namespace low_latency;
+ const auto& context = layer_context.get_context<InstanceContext>(instance);
+ return context.vtable.GetInstanceProcAddr(instance, pName);
} \ No newline at end of file
diff --git a/src/layer_context.cc b/src/layer_context.cc
new file mode 100644
index 0000000..ceb0030
--- /dev/null
+++ b/src/layer_context.cc
@@ -0,0 +1,9 @@
+#include "layer_context.hh"
+
+namespace low_latency {
+
+LayerContext::LayerContext() {}
+
+LayerContext::~LayerContext() {}
+
+} // namespace low_latency \ No newline at end of file
diff --git a/src/layer_context.hh b/src/layer_context.hh
new file mode 100644
index 0000000..228efa3
--- /dev/null
+++ b/src/layer_context.hh
@@ -0,0 +1,84 @@
+#ifndef LAYER_CONTEXT_HH_
+#define LAYER_CONTEXT_HH_
+
+#include <mutex>
+#include <variant>
+
+#include "device_context.hh"
+#include "instance_context.hh"
+#include "queue_context.hh"
+
+// The purpose of this file is to provide a definition for the highest level
+// entry point struct of our vulkan state.
+//
+// All Context structs have deleted copy/move constructors. This is because we
+// want to be extremely explicit with how/when we delete things, and this allows
+// us to use destructors for cleanup without much worry about weird copies
+// floating around. Most contexts will probably live inside std::unique_ptr's as
+// a result so they can be used in standard containers.
+
+namespace low_latency {
+
+template <typename T>
+concept DispatchableType =
+ std::same_as<std::remove_cvref_t<T>, VkInstance> ||
+ std::same_as<std::remove_cvref_t<T>, VkDevice> ||
+ std::same_as<std::remove_cvref_t<T>, VkPhysicalDevice> ||
+ std::same_as<std::remove_cvref_t<T>, VkQueue>;
+
+struct LayerContext {
+ public:
+ using ContextVariant = std::variant<std::unique_ptr<DeviceContext>,
+ std::unique_ptr<InstanceContext>>;
+
+ public:
+ std::mutex mutex;
+ std::unordered_map<void*, ContextVariant> contexts;
+ std::uint64_t current_frame = 0;
+
+ public:
+ LayerContext();
+ LayerContext(const LayerContext&) = delete;
+ LayerContext(LayerContext&&) = delete;
+ LayerContext operator==(const LayerContext&) = delete;
+ LayerContext operator==(LayerContext&&) = delete;
+ ~LayerContext();
+
+ public:
+ template <DispatchableType T> static void* get_key(const T& dt) {
+ return *reinterpret_cast<void**>(dt);
+ }
+
+ template <typename T, DispatchableType DispatchableType>
+ requires(!std::same_as<T, QueueContext>)
+ T& get_context(const DispatchableType& dt) {
+ const auto key = get_key(dt);
+
+ const auto it = this->contexts.find(key);
+ assert(it != std::end(this->contexts));
+
+ const auto ptr = std::get_if<std::unique_ptr<T>>(&it->second);
+ assert(ptr && *ptr);
+
+ return **ptr;
+ }
+
+ // QueueContext's are actually owned by a device so look there instead.
+ template <typename T, DispatchableType DispatchableType>
+ requires(std::same_as<T, QueueContext>)
+ T& get_context(const DispatchableType& dt) {
+
+ const auto& device_context = this->get_context<DeviceContext>(dt);
+ const auto& queue_context = device_context.queue_contexts;
+
+ const auto it = device_context.queue_contexts.find(dt);
+ assert(it != std::end(queue_context));
+
+ const auto& ptr = it->second;
+ return *ptr;
+ }
+};
+
+}; // namespace low_latency
+
+#endif \ No newline at end of file
diff --git a/src/queue_context.cc b/src/queue_context.cc
index dbae4c0..8f7d571 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -1,10 +1,12 @@
#include "queue_context.hh"
+#include "device_context.hh"
+#include "timestamp_pool.hh"
namespace low_latency {
-static VkCommandPool make_command_pool(const VkDevice& device,
- const std::uint32_t& queue_family_index,
- const VkuDeviceDispatchTable& vtable) {
+static VkCommandPool
+make_command_pool(const DeviceContext& device_context,
+ const std::uint32_t& queue_family_index) {
const auto cpci = VkCommandPoolCreateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
@@ -14,38 +16,51 @@ static VkCommandPool make_command_pool(const VkDevice& device,
};
auto command_pool = VkCommandPool{};
- vtable.CreateCommandPool(device, &cpci, nullptr, &command_pool);
+ device_context.vtable.CreateCommandPool(device_context.device, &cpci,
+ nullptr, &command_pool);
return command_pool;
}
-QueueContext::QueueContext(const VkDevice& device, const VkQueue queue,
- const std::uint32_t& queue_family_index,
- const VkuDeviceDispatchTable& vtable)
- : device(device), queue(queue), queue_family_index(queue_family_index),
- vtable(vtable),
- // Important we make the command pool before the timestamp pool, because it's a dependency.
- command_pool(make_command_pool(device, queue_family_index, vtable)),
- timestamp_pool(device, vtable, command_pool) {
-
- this->semaphore = [&]() -> VkSemaphore {
- const auto stci = VkSemaphoreTypeCreateInfo{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
- .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
- .initialValue = 0,
- };
-
- const auto sci = VkSemaphoreCreateInfo{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- .pNext = &stci,
- };
-
- auto semaphore = VkSemaphore{};
- vtable.CreateSemaphore(device, &sci, nullptr, &semaphore);
- return semaphore;
- }();
+static VkSemaphore make_semaphore(const DeviceContext& device_context) {
+
+ const auto stci = VkSemaphoreTypeCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
+ .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
+ .initialValue = 0,
+ };
+
+ const auto sci = VkSemaphoreCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = &stci,
+ };
+
+ auto semaphore = VkSemaphore{};
+ device_context.vtable.CreateSemaphore(device_context.device, &sci, nullptr,
+ &semaphore);
+ return semaphore;
}
+QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue,
+ const std::uint32_t& queue_family_index)
+ : device_context(device_context), queue(queue),
+ queue_family_index(queue_family_index),
+ // Important we make the command pool before the timestamp pool, because
+ // it's a dependency.
+ command_pool(make_command_pool(device_context, queue_family_index)),
+ semaphore(make_semaphore(device_context)),
+ timestamp_pool(std::make_unique<TimestampPool>(*this)) {}
+
QueueContext::~QueueContext() {
+ // Ugly - destructors of timestamp_pool should be called before we destroy
+ // our vulkan objects.
+ this->timestamp_pool.reset();
+
+ const auto& vtable = this->device_context.vtable;
+
+ vtable.DestroySemaphore(this->device_context.device, this->semaphore,
+ nullptr);
+ vtable.DestroyCommandPool(this->device_context.device, this->command_pool,
+ nullptr);
}
} // namespace low_latency \ No newline at end of file
diff --git a/src/queue_context.hh b/src/queue_context.hh
index eb3f2ea..49bfcdf 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -6,32 +6,27 @@
#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vulkan.hpp>
-#include <deque>
-#include <vector>
+#include <memory>
namespace low_latency {
+
+class DeviceContext;
class QueueContext final {
public:
- VkDevice device;
- VkuDeviceDispatchTable vtable;
+ DeviceContext& device_context;
- VkQueue queue;
- std::uint32_t queue_family_index;
+ const VkQueue queue;
+ const std::uint32_t queue_family_index;
VkSemaphore semaphore;
VkCommandPool command_pool;
- TimestampPool timestamp_pool;
-
- std::deque<
- std::vector<std::pair<TimestampPool::Handle, TimestampPool::Handle>>>
- tracked_queues;
+ std::unique_ptr<TimestampPool> timestamp_pool;
public:
- QueueContext(const VkDevice& device, const VkQueue queue,
- const std::uint32_t& queue_family_index,
- const VkuDeviceDispatchTable& vtable);
+ QueueContext(DeviceContext& device_context, const VkQueue& queue,
+ const std::uint32_t& queue_family_index);
QueueContext(const QueueContext&) = delete;
QueueContext(QueueContext&&) = delete;
QueueContext operator==(const QueueContext&) = delete;
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index 1dc37b2..a70c299 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -1,11 +1,15 @@
#include "timestamp_pool.hh"
+#include "device_context.hh"
+#include "queue_context.hh"
#include <ranges>
#include <vulkan/vulkan_core.h>
namespace low_latency {
-TimestampPool::block TimestampPool::allocate() {
+TimestampPool::Block TimestampPool::allocate() {
+ const auto& device_context = this->queue_context.device_context;
+
const auto query_pool = [&]() -> VkQueryPool {
const auto qpci = VkQueryPoolCreateInfo{
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
@@ -13,7 +17,8 @@ TimestampPool::block TimestampPool::allocate() {
.queryCount = this->TIMESTAMP_QUERY_POOL_SIZE};
auto query_pool = VkQueryPool{};
- vtable.CreateQueryPool(device, &qpci, nullptr, &query_pool);
+ device_context.vtable.CreateQueryPool(device_context.device, &qpci,
+ nullptr, &query_pool);
return query_pool;
}();
@@ -21,34 +26,32 @@ TimestampPool::block TimestampPool::allocate() {
std::views::iota(0u, this->TIMESTAMP_QUERY_POOL_SIZE / 2) |
std::views::transform([](const std::uint64_t& i) { return 2 * i; });
- const auto available_keys = std::make_shared<available_query_indicies_t>(
+ auto available_indices = std::make_unique<available_query_indicies_t>(
available_query_indicies_t{std::begin(key_range), std::end(key_range)});
- auto command_buffers = [this]() -> auto {
+ auto command_buffers = [&, this]() -> auto {
auto command_buffers =
std::vector<VkCommandBuffer>(this->TIMESTAMP_QUERY_POOL_SIZE);
const auto cbai = VkCommandBufferAllocateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
- .commandPool = this->command_pool,
+ .commandPool = this->queue_context.command_pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount =
static_cast<std::uint32_t>(std::size(command_buffers)),
};
- vtable.AllocateCommandBuffers(device, &cbai,
- std::data(command_buffers));
+ device_context.vtable.AllocateCommandBuffers(
+ device_context.device, &cbai, std::data(command_buffers));
return std::make_unique<std::vector<VkCommandBuffer>>(command_buffers);
}();
- return block{.query_pool = query_pool,
- .available_indicies = available_keys,
+ return Block{.query_pool = query_pool,
+ .available_indicies = std::move(available_indices),
.command_buffers = std::move(command_buffers)};
}
-TimestampPool::TimestampPool(const VkDevice& device,
- const VkuDeviceDispatchTable& vtable,
- const VkCommandPool& command_pool)
- : device(device), vtable(vtable), command_pool(command_pool) {
+TimestampPool::TimestampPool(QueueContext& queue_context)
+ : queue_context(queue_context) {
// Allocate one block on construction, it's likely more than enough!
this->blocks.emplace_back(this->allocate());
@@ -69,11 +72,11 @@ std::unique_ptr<TimestampPool::Handle> TimestampPool::acquire() {
}();
const auto query_pool = vacant_iter->query_pool;
- auto& available_indices = vacant_iter->available_indicies;
+ auto& available_indices = *vacant_iter->available_indicies;
// Grab any element from our set and erase it immediately after.
- const auto query_index = *std::begin(*available_indices);
- available_indices->erase(std::begin(*available_indices));
+ const auto query_index = *std::begin(available_indices);
+ available_indices.erase(std::begin(available_indices));
const auto command_buffers = [&]() -> auto {
auto command_buffers = std::array<VkCommandBuffer, 2>{};
@@ -91,8 +94,7 @@ std::unique_ptr<TimestampPool::Handle> TimestampPool::acquire() {
}
TimestampPool::Handle::Handle(
- const std::weak_ptr<TimestampPool::available_query_indicies_t>&
- index_origin,
+ TimestampPool::available_query_indicies_t& index_origin,
const std::size_t block_index, const VkQueryPool& query_pool,
const std::uint64_t query_index,
const std::array<VkCommandBuffer, 2>& command_buffers)
@@ -101,10 +103,7 @@ TimestampPool::Handle::Handle(
command_buffers(command_buffers) {}
TimestampPool::Handle::~Handle() {
- if (const auto origin = this->index_origin.lock(); origin) {
- assert(!origin->contains(this->query_index));
- origin->insert(this->query_index);
- }
+ this->index_origin.insert(this->query_index);
}
void TimestampPool::Handle::setup_command_buffers(
@@ -136,6 +135,8 @@ void TimestampPool::poll() {
this->cached_timestamps.clear();
this->cached_timestamps.reserve(std::size(this->blocks));
+ const auto& device_context = this->queue_context.device_context;
+
std::ranges::transform(
this->blocks, std::back_inserter(this->cached_timestamps),
[&, this](const auto& block) {
@@ -144,14 +145,15 @@ void TimestampPool::poll() {
auto timestamps = std::make_unique<std::vector<std::uint64_t>>(
this->TIMESTAMP_QUERY_POOL_SIZE);
- const auto result = vtable.GetQueryPoolResults(
- this->device, query_pool, 0, this->TIMESTAMP_QUERY_POOL_SIZE,
+ const auto result = device_context.vtable.GetQueryPoolResults(
+ device_context.device, query_pool, 0,
+ this->TIMESTAMP_QUERY_POOL_SIZE,
this->TIMESTAMP_QUERY_POOL_SIZE * sizeof(std::uint64_t),
std::data(*timestamps), sizeof(uint64_t),
VK_QUERY_RESULT_64_BIT);
- // Might return not ready when any of them aren't ready, which is
- // not an error for our use case.
+ // Might return not ready when any of them aren't ready, which
+ // is not an error for our use case.
assert(result == VK_SUCCESS || result == VK_NOT_READY);
return timestamps;
@@ -169,4 +171,16 @@ std::uint64_t TimestampPool::get_polled(const Handle& handle) {
return handle.query_index;
}
+TimestampPool::~TimestampPool() {
+ const auto& device = this->queue_context.device_context.device;
+ const auto& vtable = this->queue_context.device_context.vtable;
+
+ for (const auto& block : this->blocks) {
+ vtable.FreeCommandBuffers(device, this->queue_context.command_pool,
+ std::size(*block.command_buffers),
+ std::data(*block.command_buffers));
+ vtable.DestroyQueryPool(device, block.query_pool, nullptr);
+ }
+}
+
} // namespace low_latency \ No newline at end of file
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index 7efa4ee..82c4721 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -43,25 +43,25 @@
namespace low_latency {
+class QueueContext;
+
class TimestampPool final {
private:
static constexpr auto TIMESTAMP_QUERY_POOL_SIZE = 512u;
static_assert(TIMESTAMP_QUERY_POOL_SIZE % 2 == 0);
private:
- VkuDeviceDispatchTable vtable;
- VkDevice device;
- VkCommandPool command_pool;
+ QueueContext& queue_context;
// VkQueryPool with an unordered set of keys available for reading.
using available_query_indicies_t = std::unordered_set<std::uint64_t>;
- struct block {
+ struct Block {
VkQueryPool query_pool;
- std::shared_ptr<available_query_indicies_t> available_indicies;
+ std::unique_ptr<available_query_indicies_t> available_indicies;
std::unique_ptr<std::vector<VkCommandBuffer>> command_buffers;
};
- std::vector<block> blocks; // multiple blocks
+ std::vector<Block> blocks; // multiple blocks
// A snapshot of all available blocks for reading after each poll.
std::vector<std::unique_ptr<std::vector<std::uint64_t>>> cached_timestamps;
@@ -74,7 +74,7 @@ class TimestampPool final {
friend class TimestampPool;
private:
- std::weak_ptr<available_query_indicies_t> index_origin;
+ available_query_indicies_t& index_origin;
std::size_t block_index;
public:
@@ -83,8 +83,7 @@ class TimestampPool final {
std::array<VkCommandBuffer, 2> command_buffers;
public:
- Handle(const std::weak_ptr<TimestampPool::available_query_indicies_t>&
- index_origin,
+ Handle(TimestampPool::available_query_indicies_t& index_origin,
const std::size_t block_index, const VkQueryPool& query_pool,
const std::uint64_t query_index,
const std::array<VkCommandBuffer, 2>& command_buffers);
@@ -99,15 +98,15 @@ class TimestampPool final {
};
private:
- block allocate();
+ Block allocate();
public:
- TimestampPool(const VkDevice& device, const VkuDeviceDispatchTable& vtable,
- const VkCommandPool& command_pool);
+ TimestampPool(QueueContext& queue_context);
TimestampPool(const TimestampPool&) = delete;
TimestampPool(TimestampPool&&) = delete;
TimestampPool operator==(const TimestampPool&) = delete;
TimestampPool operator==(TimestampPool&&) = delete;
+ ~TimestampPool();
public:
// Hands out a Handle with a pool and index of two uint64_t's.