aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas James <Eele1Ephe7uZahRie@tutanota.com>2026-02-11 23:19:15 +1100
committerNicolas James <Eele1Ephe7uZahRie@tutanota.com>2026-02-11 23:19:15 +1100
commit76f3ef1d7c2b4393a8e8b402deb924e606448d27 (patch)
treea291bec0544f007536a41ec1f590338aee1163e9
parent77e2be172718878b38999efc247ce7571435fcc8 (diff)
More cleanup, fix lifetime and mutex issues
-rw-r--r--CMakeLists.txt19
-rw-r--r--src/context.cc9
-rw-r--r--src/context.hh21
-rw-r--r--src/device_context.cc12
-rw-r--r--src/device_context.hh15
-rw-r--r--src/instance_context.cc9
-rw-r--r--src/instance_context.hh17
-rw-r--r--src/latency_controller.hh15
-rw-r--r--src/layer.cc586
-rw-r--r--src/layer_context.hh72
-rw-r--r--src/physical_device_context.cc11
-rw-r--r--src/physical_device_context.hh26
-rw-r--r--src/queue_context.cc5
-rw-r--r--src/queue_context.hh16
-rw-r--r--src/timestamp_pool.cc7
-rw-r--r--src/timestamp_pool.hh18
16 files changed, 522 insertions, 336 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 09daa4c..021f56b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,3 +39,22 @@ add_custom_command(TARGET ${LIBRARY_NAME} POST_BUILD
"${CMAKE_CURRENT_SOURCE_DIR}/low_latency_layer.json"
"${OUTPUT_DIR}/"
)
+
+set(SANITIZE_FLAGS
+ -fsanitize=address,undefined,leak
+ -fno-omit-frame-pointer
+ -fno-optimize-sibling-calls
+ -fno-sanitize-recover=all
+ -fsanitize-address-use-after-scope
+)
+
+target_compile_options(${LIBRARY_NAME} PRIVATE
+ #${SANITIZE_FLAGS}
+ -g3
+ -O1
+ -D_GLIBCXX_ASSERTIONS
+)
+
+target_link_options(${LIBRARY_NAME} PRIVATE
+ #${SANITIZE_FLAGS}
+)
diff --git a/src/context.cc b/src/context.cc
new file mode 100644
index 0000000..ff93c36
--- /dev/null
+++ b/src/context.cc
@@ -0,0 +1,9 @@
+#include "context.hh"
+
+namespace low_latency {
+
+Context::Context() {}
+
+Context::~Context() {}
+
+} // namespace low_latency \ No newline at end of file
diff --git a/src/context.hh b/src/context.hh
new file mode 100644
index 0000000..5972740
--- /dev/null
+++ b/src/context.hh
@@ -0,0 +1,21 @@
+#ifndef CONTEXT_HH_
+#define CONTEXT_HH_
+
+// The purpose of this class is to provide a base class for Context classes.
+
+namespace low_latency {
+
+class Context {
+
+public:
+ Context();
+ Context(const Context& context) = delete;
+ Context(Context&& context) = delete;
+ Context operator=(const Context& context) = delete;
+ Context operator=(Context&& context) = delete;
+ virtual ~Context();
+};
+
+} // namespace low_latency
+
+#endif \ No newline at end of file
diff --git a/src/device_context.cc b/src/device_context.cc
index 4be1872..5f5c1f7 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -7,9 +7,17 @@ namespace low_latency {
DeviceContext::DeviceContext(InstanceContext& parent_instance,
const VkDevice& device,
+ const PFN_vkSetDeviceLoaderData& sdld,
VkuDeviceDispatchTable&& vtable)
- : instance(parent_instance), device(device), vtable(std::move(vtable))
+ : instance(parent_instance), device(device), sdld(sdld),
+ vtable(std::move(vtable)) {}
-{}
+DeviceContext::~DeviceContext() {
+ // We will let the destructor handle clearing here, but they should be
+ // unique by now (ie, removed from the layer's context map).
+ for (const auto& [queue, queue_context] : this->queues) {
+ assert(queue_context.unique());
+ }
+}
} // namespace low_latency \ No newline at end of file
diff --git a/src/device_context.hh b/src/device_context.hh
index a936d6d..3406da1 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -5,29 +5,32 @@
#include <unordered_map>
#include <vulkan/utility/vk_dispatch_table.h>
+#include <vulkan/vk_layer.h>
#include <vulkan/vulkan.hpp>
+#include "context.hh"
#include "instance_context.hh"
namespace low_latency {
class QueueContext;
-struct DeviceContext {
+struct DeviceContext final : public Context {
InstanceContext& instance;
const VkDevice device;
const VkuDeviceDispatchTable vtable;
- std::unordered_map<VkQueue, std::unique_ptr<QueueContext>> queue_contexts;
+ // Do we need to use this unless we wrap dispatchable objects?
+ const PFN_vkSetDeviceLoaderData sdld;
+
+ std::unordered_map<VkQueue, std::shared_ptr<QueueContext>> queues;
public:
DeviceContext(InstanceContext& parent_instance, const VkDevice& device,
+ const PFN_vkSetDeviceLoaderData& sdld,
VkuDeviceDispatchTable&& vtable);
- DeviceContext(const DeviceContext&) = delete;
- DeviceContext(DeviceContext&&) = delete;
- DeviceContext operator==(const DeviceContext&) = delete;
- DeviceContext operator==(DeviceContext&&) = delete;
+ virtual ~DeviceContext();
};
}; // namespace low_latency
diff --git a/src/instance_context.cc b/src/instance_context.cc
index 36d2c66..d12766f 100644
--- a/src/instance_context.cc
+++ b/src/instance_context.cc
@@ -1,5 +1,6 @@
#include "instance_context.hh"
+#include <cassert>
#include <utility>
namespace low_latency {
@@ -8,6 +9,12 @@ InstanceContext::InstanceContext(const VkInstance& instance,
VkuInstanceDispatchTable&& vtable)
: instance(instance), vtable(std::move(vtable)) {}
-InstanceContext::~InstanceContext() {}
+InstanceContext::~InstanceContext() {
+ // Similar to devices, we should own the only shared ptr at this point so
+ // they destruct now.
+ for (const auto& [device, device_context] : this->phys_devices) {
+ assert(device_context.unique());
+ }
+}
} // namespace low_latency \ No newline at end of file
diff --git a/src/instance_context.hh b/src/instance_context.hh
index 0a0b999..3b71a82 100644
--- a/src/instance_context.hh
+++ b/src/instance_context.hh
@@ -3,21 +3,26 @@
#include <vulkan/utility/vk_dispatch_table.h>
+#include <memory>
+#include <unordered_map>
+
+#include "context.hh"
+
namespace low_latency {
-struct InstanceContext {
+class PhysicalDeviceContext;
+
+struct InstanceContext final : public Context {
const VkInstance instance;
const VkuInstanceDispatchTable vtable;
+ std::unordered_map<void*, std::shared_ptr<PhysicalDeviceContext>> phys_devices;
+
public:
InstanceContext(const VkInstance& instance,
VkuInstanceDispatchTable&& vtable);
- InstanceContext(const InstanceContext&) = delete;
- InstanceContext(InstanceContext&&) = delete;
- InstanceContext operator==(const InstanceContext&) = delete;
- InstanceContext operator==(InstanceContext&&) = delete;
- ~InstanceContext();
+ virtual ~InstanceContext();
};
}; // namespace low_latency
diff --git a/src/latency_controller.hh b/src/latency_controller.hh
deleted file mode 100644
index 6672d5a..0000000
--- a/src/latency_controller.hh
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef LATENCY_CONTROLLER_HH_
-#define LATENCY_CONTROLLER_HH_
-
-// The purpose of this file is to provide
-
-namespace low_latency {
-
-class LatencyController final {
-
-
-};
-
-};
-
-#endif \ No newline at end of file
diff --git a/src/layer.cc b/src/layer.cc
index 5e652f0..cead7cd 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -5,6 +5,9 @@
#include <unordered_map>
#include <utility>
+// hack
+#include <deque>
+
#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vk_layer.h>
#include <vulkan/vk_platform.h>
@@ -16,6 +19,7 @@
#include "instance_context.hh"
#include "layer_context.hh"
#include "queue_context.hh"
+#include "timestamp_pool.hh"
namespace low_latency {
@@ -25,8 +29,9 @@ LayerContext layer_context;
} // namespace
-template <typename T, typename sType>
-static T* get_link_info(const void* const head, const sType& stype) {
+template <typename T, typename sType, typename fType>
+static T* get_link_info(const void* const head, const sType& stype,
+ const fType& ftype) {
for (auto i = reinterpret_cast<const VkBaseInStructure*>(head); i;
i = i->pNext) {
@@ -35,7 +40,7 @@ static T* get_link_info(const void* const head, const sType& stype) {
}
const auto info = reinterpret_cast<const T*>(i);
- if (info->function != VK_LAYER_LINK_INFO) {
+ if (info->function != ftype) {
continue;
}
@@ -49,7 +54,8 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) {
const auto link_info = get_link_info<VkLayerInstanceCreateInfo>(
- pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO);
+ pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO,
+ VK_LAYER_LINK_INFO);
if (!link_info || !link_info->u.pLayerInfo) {
return VK_ERROR_INITIALIZATION_FAILED;
@@ -78,23 +84,23 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
}
const auto key = layer_context.get_key(*pInstance);
+
+#define INSTANCE_VTABLE_LOAD(name) \
+ .name = reinterpret_cast<PFN_vk##name>(gipa(*pInstance, "vk" #name))
auto vtable = VkuInstanceDispatchTable{
- .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
- gipa(*pInstance, "vkDestroyInstance")),
- .EnumeratePhysicalDevices =
- reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
- gipa(*pInstance, "vkEnumeratePhysicalDevices")),
- .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>(
- gipa(*pInstance, "vkGetInstanceProcAddr")),
- .EnumerateDeviceExtensionProperties =
- reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
- gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")),
+ INSTANCE_VTABLE_LOAD(DestroyInstance),
+ INSTANCE_VTABLE_LOAD(EnumeratePhysicalDevices),
+ INSTANCE_VTABLE_LOAD(GetInstanceProcAddr),
+ INSTANCE_VTABLE_LOAD(CreateDevice),
+ INSTANCE_VTABLE_LOAD(EnumerateDeviceExtensionProperties),
};
+#undef INSTANCE_VTABLE_LOAD
const auto lock = std::scoped_lock{layer_context.mutex};
assert(!layer_context.contexts.contains(key));
+
layer_context.contexts.try_emplace(
- key, std::make_unique<InstanceContext>(*pInstance, std::move(vtable)));
+ key, std::make_shared<InstanceContext>(*pInstance, std::move(vtable)));
return VK_SUCCESS;
}
@@ -102,11 +108,55 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
static VKAPI_ATTR void VKAPI_CALL
DestroyInstance(VkInstance instance, const VkAllocationCallbacks* allocator) {
+ const auto destroy_instance_func = [&]() -> auto {
+ const auto context = layer_context.get_context(instance);
+ const auto lock = std::scoped_lock{layer_context.mutex};
+
+ // Erase our physical devices owned by this instance from the global
+ // context.
+ for (const auto& [key, _] : context->phys_devices) {
+ assert(layer_context.contexts.erase(key));
+ }
+
+ const auto key = layer_context.get_key(instance);
+ assert(layer_context.contexts.erase(key));
+
+ // Should be the last ptr now like DestroyDevice.
+ assert(context.unique());
+ return context->vtable.DestroyInstance;
+ }();
+
+ destroy_instance_func(instance, allocator);
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL EnumeratePhysicalDevices(
+ VkInstance instance, std::uint32_t* count, VkPhysicalDevice* devices) {
+
+ const auto context = layer_context.get_context(instance);
+
+ if (const auto result =
+ context->vtable.EnumeratePhysicalDevices(instance, count, devices);
+ !devices || !count || result != VK_SUCCESS) {
+
+ return result;
+ }
+
const auto lock = std::scoped_lock{layer_context.mutex};
+ const auto C = *count;
+ for (auto i = std::uint32_t{0}; i < C; ++i) {
+ const auto& device = devices[i];
+
+ const auto key = layer_context.get_key(device);
+ const auto [it, inserted] =
+ layer_context.contexts.try_emplace(key, nullptr);
- const auto key = layer_context.get_key(instance);
- assert(layer_context.contexts.contains(key));
- layer_context.contexts.erase(key);
+ if (inserted) {
+ it->second =
+ std::make_shared<PhysicalDeviceContext>(*context, device);
+ }
+ }
+
+ return VK_SUCCESS;
}
static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
@@ -114,56 +164,64 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) {
const auto create_info = get_link_info<VkLayerDeviceCreateInfo>(
- pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO);
+ pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
+ VK_LAYER_LINK_INFO);
if (!create_info || !create_info->u.pLayerInfo) {
return VK_ERROR_INITIALIZATION_FAILED;
}
+ const auto callback_info = get_link_info<VkLayerDeviceCreateInfo>(
+ pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
+ VK_LOADER_DATA_CALLBACK);
+ if (!callback_info || !callback_info->u.pLayerInfo) {
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+
+ const auto sdld = callback_info->u.pfnSetDeviceLoaderData;
const auto gipa = create_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
const auto gdpa = create_info->u.pLayerInfo->pfnNextGetDeviceProcAddr;
- if (!gipa || !gdpa) {
+ if (!sdld || !gipa || !gdpa) {
return VK_ERROR_INITIALIZATION_FAILED;
}
create_info->u.pLayerInfo = create_info->u.pLayerInfo->pNext;
- const auto lock = std::scoped_lock{layer_context.mutex};
-
- auto& context = layer_context.get_context<InstanceContext>(physical_device);
+ const auto physical_device_context =
+ layer_context.get_context(physical_device);
+ auto& instance_context = physical_device_context->instance;
const auto next_extensions =
[&]() -> std::optional<std::vector<const char*>> {
- const auto supported_extensions =
- [&]() -> std::optional<std::vector<VkExtensionProperties>> {
- const auto enumerate_device_extensions =
- reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(gipa(
- context.instance, "vkEnumerateDeviceExtensionProperties"));
- if (!enumerate_device_extensions) {
- return std::nullopt;
- }
+ const auto enumerate_device_extensions =
+ reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
+ gipa(instance_context.instance,
+ "vkEnumerateDeviceExtensionProperties"));
+ if (!enumerate_device_extensions) {
+ return std::nullopt;
+ }
- auto count = std::uint32_t{};
- if (enumerate_device_extensions(physical_device, nullptr, &count,
- nullptr) != VK_SUCCESS) {
+ auto count = std::uint32_t{};
+ if (enumerate_device_extensions(physical_device, nullptr, &count,
+ nullptr) != VK_SUCCESS) {
- return std::nullopt;
- }
+ return std::nullopt;
+ }
- auto supported_extensions =
- std::vector<VkExtensionProperties>(count);
- if (enumerate_device_extensions(physical_device, nullptr, &count,
- std::data(supported_extensions)) !=
- VK_SUCCESS) {
+ auto supported_extensions = std::vector<VkExtensionProperties>(count);
+ if (enumerate_device_extensions(physical_device, nullptr, &count,
+ std::data(supported_extensions)) !=
+ VK_SUCCESS) {
- return std::nullopt;
- }
+ return std::nullopt;
+ }
- return supported_extensions;
- }();
+ auto next_extensions = std::vector<const char*>{};
+ if (pCreateInfo->enabledExtensionCount &&
+ pCreateInfo->ppEnabledExtensionNames) {
- auto next_extensions =
- std::vector{*pCreateInfo->ppEnabledExtensionNames,
- std::next(*pCreateInfo->ppEnabledExtensionNames +
- pCreateInfo->enabledExtensionCount)};
+ std::ranges::copy_n(pCreateInfo->ppEnabledExtensionNames,
+ pCreateInfo->enabledExtensionCount,
+ std::back_inserter(next_extensions));
+ }
const auto wanted_extensions = {
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
@@ -180,12 +238,11 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
continue; // Already included, ignore it.
}
- if (std::ranges::none_of(*supported_extensions,
- [&](const auto& supported_extension) {
- return !std::strcmp(
- supported_extension.extensionName,
- wanted);
- })) {
+ if (std::ranges::none_of(
+ supported_extensions, [&](const auto& supported_extension) {
+ return !std::strcmp(supported_extension.extensionName,
+ wanted);
+ })) {
return std::nullopt; // We don't support it, the layer can't
// work.
@@ -201,8 +258,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
return VK_ERROR_INITIALIZATION_FAILED;
}
- const auto create_device = reinterpret_cast<PFN_vkCreateDevice>(
- gipa(VK_NULL_HANDLE, "vkCreateDevice"));
+ const auto create_device = instance_context.vtable.CreateDevice;
if (!create_device) {
return VK_ERROR_INITIALIZATION_FAILED;
}
@@ -221,164 +277,199 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
return result;
}
+#define DEVICE_VTABLE_LOAD(name) \
+ .name = reinterpret_cast<PFN_vk##name>(gdpa(*pDevice, "vk" #name))
auto vtable = VkuDeviceDispatchTable{
- .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>(
- gdpa(*pDevice, "vkGetDeviceProcAddr")),
- .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>(
- gdpa(*pDevice, "vkDestroyDevice")),
- .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>(
- gdpa(*pDevice, "vkGetDeviceQueue")),
- .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>(
- gdpa(*pDevice, "vkQueueSubmit")),
- .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>(
- gdpa(*pDevice, "vkCreateSemaphore")),
- .DestroySemaphore = reinterpret_cast<PFN_vkDestroySemaphore>(
- gdpa(*pDevice, "vkDestroySemaphore")),
- .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>(
- gdpa(*pDevice, "vkCreateQueryPool")),
- .DestroyQueryPool = reinterpret_cast<PFN_vkDestroyQueryPool>(
- gdpa(*pDevice, "vkDestroyQueryPool")),
- .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>(
- gdpa(*pDevice, "vkGetQueryPoolResults")),
- .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>(
- gdpa(*pDevice, "vkCreateCommandPool")),
- .DestroyCommandPool = reinterpret_cast<PFN_vkDestroyCommandPool>(
- gdpa(*pDevice, "vkDestroyCommandPool")),
- .AllocateCommandBuffers =
- reinterpret_cast<PFN_vkAllocateCommandBuffers>(
- gdpa(*pDevice, "vkAllocateCommandBuffers")),
- .FreeCommandBuffers = reinterpret_cast<PFN_vkFreeCommandBuffers>(
- gdpa(*pDevice, "vkFreeCommandBuffers")),
- .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>(
- gdpa(*pDevice, "vkBeginCommandBuffer")),
- .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>(
- gdpa(*pDevice, "vkEndCommandBuffer")),
- .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>(
- gdpa(*pDevice, "vkResetCommandBuffer")),
- .CmdDraw = reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")),
- .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>(
- gdpa(*pDevice, "vkCmdDrawIndexed")),
- .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>(
- gdpa(*pDevice, "vkCmdResetQueryPool")),
- .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>(
- gdpa(*pDevice, "vkGetDeviceQueue2")),
- .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>(
- gdpa(*pDevice, "vkQueueSubmit2")),
- .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>(
- gdpa(*pDevice, "vkQueuePresentKHR")),
- .GetSemaphoreCounterValueKHR =
- reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>(
- gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")),
- .CmdWriteTimestamp2KHR = reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>(
- gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")),
- .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>(
- gdpa(*pDevice, "vkQueueSubmit2KHR")),
+ DEVICE_VTABLE_LOAD(GetDeviceProcAddr),
+ DEVICE_VTABLE_LOAD(DestroyDevice),
+ DEVICE_VTABLE_LOAD(GetDeviceQueue),
+ DEVICE_VTABLE_LOAD(QueueSubmit),
+ DEVICE_VTABLE_LOAD(CreateSemaphore),
+ DEVICE_VTABLE_LOAD(DestroySemaphore),
+ DEVICE_VTABLE_LOAD(CreateQueryPool),
+ DEVICE_VTABLE_LOAD(DestroyQueryPool),
+ DEVICE_VTABLE_LOAD(GetQueryPoolResults),
+ DEVICE_VTABLE_LOAD(CreateCommandPool),
+ DEVICE_VTABLE_LOAD(DestroyCommandPool),
+ DEVICE_VTABLE_LOAD(AllocateCommandBuffers),
+ DEVICE_VTABLE_LOAD(FreeCommandBuffers),
+ DEVICE_VTABLE_LOAD(BeginCommandBuffer),
+ DEVICE_VTABLE_LOAD(EndCommandBuffer),
+ DEVICE_VTABLE_LOAD(ResetCommandBuffer),
+ DEVICE_VTABLE_LOAD(CmdResetQueryPool),
+ DEVICE_VTABLE_LOAD(CmdDraw),
+ DEVICE_VTABLE_LOAD(CmdDrawIndexed),
+ DEVICE_VTABLE_LOAD(GetDeviceQueue2),
+ DEVICE_VTABLE_LOAD(QueueSubmit2),
+ DEVICE_VTABLE_LOAD(AcquireNextImageKHR),
+ DEVICE_VTABLE_LOAD(QueuePresentKHR),
+ DEVICE_VTABLE_LOAD(AcquireNextImage2KHR),
+ DEVICE_VTABLE_LOAD(GetSemaphoreCounterValueKHR),
+ DEVICE_VTABLE_LOAD(CmdWriteTimestamp2KHR),
+ DEVICE_VTABLE_LOAD(QueueSubmit2KHR),
};
+#undef DEVICE_VTABLE_LOAD
const auto key = layer_context.get_key(*pDevice);
+ const auto lock = std::scoped_lock{layer_context.mutex};
assert(!layer_context.contexts.contains(key));
+
layer_context.contexts.try_emplace(
- key,
- std::make_unique<DeviceContext>(context, *pDevice, std::move(vtable)));
+ key, std::make_shared<DeviceContext>(instance_context, *pDevice, sdld,
+ std::move(vtable)));
return VK_SUCCESS;
}
static VKAPI_ATTR void VKAPI_CALL
DestroyDevice(VkDevice device, const VkAllocationCallbacks* allocator) {
- const auto lock = std::scoped_lock{layer_context.mutex};
- const auto key = layer_context.get_key(device);
- assert(layer_context.contexts.contains(key));
- layer_context.contexts.erase(key);
+
+ const auto destroy_device_func = [&]() -> auto {
+ const auto device_context = layer_context.get_context(device);
+
+ const auto func = device_context->vtable.DestroyDevice;
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ // Remove all owned queues from our global context pool.
+ for (const auto& [queue, _] : device_context->queues) {
+ const auto key = layer_context.get_key(queue);
+ assert(layer_context.contexts.erase(key));
+ }
+
+ const auto key = layer_context.get_key(device);
+ assert(layer_context.contexts.erase(key));
+
+ // should be the last shared ptr now, so its destructor can be called.
+ // the destructor should expect its owned queues to be unique as well!
+ assert(device_context.unique());
+
+ return func;
+ }();
+
+ destroy_device_func(device, allocator);
}
-// Small amount of duplication, we can't assume gdq2 is available apparently.
static VKAPI_ATTR void VKAPI_CALL
GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index,
std::uint32_t queue_index, VkQueue* queue) {
- const auto lock = std::scoped_lock{layer_context.mutex};
-
- auto& device_context = layer_context.get_context<DeviceContext>(device);
+ const auto device_context = layer_context.get_context(device);
- device_context.vtable.GetDeviceQueue(device, queue_family_index,
- queue_index, queue);
+ device_context->vtable.GetDeviceQueue(device, queue_family_index,
+ queue_index, queue);
if (!queue || !*queue) {
return;
}
- auto& queue_contexts = device_context.queue_contexts;
- if (!queue_contexts.contains(*queue)) {
- queue_contexts.try_emplace(
- *queue, std::make_unique<QueueContext>(device_context, *queue,
- queue_family_index));
+ // Look in our layer context, which has everything. If we were able to
+ // insert a nullptr key, then it didn't already exist so we should
+ // construct a new one.
+ const auto key = layer_context.get_key(*queue);
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ const auto [it, inserted] = layer_context.contexts.try_emplace(key);
+ if (inserted) {
+ it->second = std::make_shared<QueueContext>(*device_context, *queue,
+ queue_family_index);
}
+
+ // it->second should be QueueContext, also it might already be there
+ // but this is expected.
+ const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second);
+ assert(ptr);
+ device_context->queues.emplace(*queue, ptr);
}
+// Identical logic to gdq so some amount of duplication, we can't assume gdq1 is
+// available apparently, what do I know?
static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2(
VkDevice device, const VkDeviceQueueInfo2* info, VkQueue* queue) {
- const auto lock = std::scoped_lock{layer_context.mutex};
- auto& device_context = layer_context.get_context<DeviceContext>(device);
+ const auto device_context = layer_context.get_context(device);
- device_context.vtable.GetDeviceQueue2(device, info, queue);
+ device_context->vtable.GetDeviceQueue2(device, info, queue);
if (!queue || !*queue) {
return;
}
- auto& queue_contexts = device_context.queue_contexts;
- if (!queue_contexts.contains(*queue)) {
- queue_contexts.try_emplace(
- *queue, std::make_unique<QueueContext>(device_context, *queue,
- info->queueFamilyIndex));
+ const auto key = layer_context.get_key(*queue);
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ const auto [it, inserted] = layer_context.contexts.try_emplace(key);
+ if (inserted) {
+ it->second = std::make_shared<QueueContext>(*device_context, *queue,
+ info->queueFamilyIndex);
+ }
+
+ const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second);
+ assert(ptr);
+ device_context->queues.emplace(*queue, ptr);
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR(
+ VkDevice device, VkSwapchainKHR swapchain, std::uint64_t timeout,
+ VkSemaphore semaphore, VkFence fence, std::uint32_t* pImageIndex) {
+
+ const auto context = layer_context.get_context(device);
+ if (const auto result = context->vtable.AcquireNextImageKHR(
+ device, swapchain, timeout, semaphore, fence, pImageIndex);
+ result != VK_SUCCESS) {
+
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR(
+ VkDevice device, const VkAcquireNextImageInfoKHR* pAcquireInfo,
+ std::uint32_t* pImageIndex) {
+
+ const auto context = layer_context.get_context(device);
+ if (const auto result = context->vtable.AcquireNextImage2KHR(
+ device, pAcquireInfo, pImageIndex);
+ result != VK_SUCCESS) {
+
+ return result;
}
+
+ return VK_SUCCESS;
}
static VKAPI_ATTR VkResult VKAPI_CALL
vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
const VkSubmitInfo* submit_info, VkFence fence) {
- const auto lock = std::scoped_lock{layer_context.mutex};
-
- auto& queue_context = layer_context.get_context<QueueContext>(queue);
- const auto& vtable = queue_context.device_context.vtable;
+ const auto& queue_context = layer_context.get_context(queue);
+ const auto& vtable = queue_context->device_context.vtable;
if (!submit_count) { // no-op submit we shouldn't worry about
return vtable.QueueSubmit(queue, submit_count, submit_info, fence);
}
- // Create a new vector of submit infos, copy their existing ones.
+ // Create a new vector of submit infos.
auto next_submit_infos = std::vector<VkSubmitInfo>{};
- next_submit_infos.reserve(submit_count + 2);
- auto timestamp_handle = queue_context.timestamp_pool->acquire();
+ auto timestamp_handle = queue_context->timestamp_pool->acquire();
timestamp_handle->setup_command_buffers(vtable);
const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
- // The first submit info we use will steal their wait semaphores.
- next_submit_infos.push_back(VkSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = submit_info->pNext,
- .waitSemaphoreCount = submit_info[0].waitSemaphoreCount,
- .pWaitSemaphores = submit_info[0].pWaitSemaphores,
- .pWaitDstStageMask = submit_info[0].pWaitDstStageMask,
- .commandBufferCount = 1,
- .pCommandBuffers = &head_cb,
- });
+ const auto next_command_buffers = [&]() -> auto {
+ auto next_command_buffers = std::vector<VkCommandBuffer>{head_cb};
+ std::ranges::copy_n(submit_info[0].pCommandBuffers,
+ submit_info[0].commandBufferCount,
+ std::back_inserter(next_command_buffers));
+ return next_command_buffers;
+ }();
- // Fill in original submit infos but erase the wait semaphores on the
- // first because we stole them earlier.
std::ranges::copy_n(submit_info, submit_count,
std::back_inserter(next_submit_infos));
- next_submit_infos[1].pWaitSemaphores = nullptr;
- next_submit_infos[1].waitSemaphoreCount = 0u;
+ next_submit_infos[0].pCommandBuffers = std::data(next_command_buffers);
+ next_submit_infos[0].commandBufferCount = std::size(next_command_buffers);
- const auto TODO_next = std::uint64_t{layer_context.current_frame + 1};
+ const auto next_signal = queue_context->semaphore_sequence + 1;
const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.signalSemaphoreValueCount = 1,
- .pSignalSemaphoreValues = &TODO_next,
+ .pSignalSemaphoreValues = &next_signal,
};
next_submit_infos.push_back(VkSubmitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
@@ -386,7 +477,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
.commandBufferCount = 1,
.pCommandBuffers = &tail_cb,
.signalSemaphoreCount = 1,
- .pSignalSemaphores = &queue_context.semaphore,
+ .pSignalSemaphores = &queue_context->semaphore,
});
if (const auto res =
@@ -397,6 +488,14 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
return res;
}
+ // Hack for now, store timestamp handles.
+ queue_context->handle_hack.push_front(std::move(timestamp_handle));
+ if (std::size(queue_context->handle_hack) > 250) {
+ queue_context->handle_hack.pop_back();
+ }
+
+ ++queue_context->semaphore_sequence;
+
return VK_SUCCESS;
}
@@ -405,55 +504,69 @@ static VKAPI_ATTR VkResult VKAPI_CALL
vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
const VkSubmitInfo2* submit_infos, VkFence fence) {
- const auto lock = std::scoped_lock{layer_context.mutex};
- auto& queue_context = layer_context.get_context<QueueContext>(queue);
- const auto& vtable = queue_context.device_context.vtable;
+ const auto queue_context = layer_context.get_context(queue);
+ const auto& vtable = queue_context->device_context.vtable;
- if (!submit_count) { // another no-op submit
+ if (!submit_count) {
return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
}
- auto next_submit_infos = std::vector<VkSubmitInfo2>();
- next_submit_infos.reserve(submit_count + 2);
-
- auto timestamp_handle = queue_context.timestamp_pool->acquire();
+ auto timestamp_handle = queue_context->timestamp_pool->acquire();
timestamp_handle->setup_command_buffers(vtable);
const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
- const auto head_cb_info = VkCommandBufferSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
- .commandBuffer = head_cb,
- };
- next_submit_infos.push_back(VkSubmitInfo2{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
- .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount,
- .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos,
- .commandBufferInfoCount = 1,
- .pCommandBufferInfos = &head_cb_info,
- });
+ const auto next_command_buffers = [&]() -> auto {
+ auto next_command_buffers = std::vector<VkCommandBufferSubmitInfo>{};
+ next_command_buffers.push_back(VkCommandBufferSubmitInfo{
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
+ .commandBuffer = head_cb,
+ });
+ std::ranges::copy_n(submit_infos[0].pCommandBufferInfos,
+ submit_infos[0].commandBufferInfoCount,
+ std::back_inserter(next_command_buffers));
+ return next_command_buffers;
+ }();
+
+ auto next_submit_infos = std::vector<VkSubmitInfo2>();
std::ranges::copy_n(submit_infos, submit_count,
std::back_inserter(next_submit_infos));
- next_submit_infos[1].pWaitSemaphoreInfos = nullptr;
- next_submit_infos[1].waitSemaphoreInfoCount = 0;
-
- const auto tail_cb_info = VkCommandBufferSubmitInfo{
+ next_submit_infos[0].pCommandBufferInfos = std::data(next_command_buffers);
+ next_submit_infos[0].commandBufferInfoCount =
+ std::size(next_command_buffers);
+
+ const auto tail_ssi = VkSemaphoreSubmitInfo{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+ .semaphore = queue_context->semaphore,
+ .value = queue_context->semaphore_sequence + 1,
+ .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ };
+ const auto tail_cbsi = VkCommandBufferSubmitInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = tail_cb,
};
next_submit_infos.push_back(VkSubmitInfo2{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
- .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount,
- .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos,
.commandBufferInfoCount = 1,
- .pCommandBufferInfos = &tail_cb_info,
+ .pCommandBufferInfos = &tail_cbsi,
+ .signalSemaphoreInfoCount = 1,
+ .pSignalSemaphoreInfos = &tail_ssi,
});
if (const auto res =
- vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
+ vtable.QueueSubmit2(queue, std::size(next_submit_infos),
+ std::data(next_submit_infos), fence);
res != VK_SUCCESS) {
return res;
}
+ // hack
+ queue_context->handle_hack.push_front(std::move(timestamp_handle));
+ if (std::size(queue_context->handle_hack) > 250) {
+ queue_context->handle_hack.pop_back();
+ }
+
+ ++queue_context->semaphore_sequence;
+
return VK_SUCCESS;
}
@@ -467,9 +580,8 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count,
static VKAPI_ATTR VkResult VKAPI_CALL
vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
- const auto lock = std::scoped_lock{layer_context.mutex};
- auto& queue_context = layer_context.get_context<QueueContext>(queue);
- const auto& vtable = queue_context.device_context.vtable;
+ const auto& vtable =
+ layer_context.get_context(queue)->device_context.vtable;
if (const auto res = vtable.QueuePresentKHR(queue, present_info);
res != VK_SUCCESS) {
@@ -477,69 +589,49 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
return res;
}
- std::cout << "queuePresentKHR called for queue " << queue << '\n';
-
- // Update all of our information about this queue's timestamp pool!
- queue_context.timestamp_pool->poll();
-
- // While we might be submitting on this queue, let's see what our timeline
- // semaphore says we're at.
- uint64_t value = 0;
- if (const auto res = vtable.GetSemaphoreCounterValueKHR(
- queue_context.device_context.device, queue_context.semaphore,
- &value);
- res != VK_SUCCESS) {
-
- return res;
- }
-
- std::cout << " frame_index: " << layer_context.current_frame << '\n';
- std::cout << " semaphore: " << value << '\n';
- std::cout << " queue: " << queue << '\n';
-
- ++layer_context.current_frame;
return VK_SUCCESS;
}
} // namespace low_latency
-static const auto instance_functions =
- std::unordered_map<std::string_view, const PFN_vkVoidFunction>{
- {"vkGetInstanceProcAddr",
- reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetInstanceProcAddr)},
+using func_map_t = std::unordered_map<std::string_view, PFN_vkVoidFunction>;
+#define HOOK_ENTRY(vk_name_literal, fn_sym) \
+ {vk_name_literal, reinterpret_cast<PFN_vkVoidFunction>(fn_sym)}
+static const auto instance_functions = func_map_t{
+ HOOK_ENTRY("vkCreateDevice", low_latency::CreateDevice),
- {"vkCreateInstance",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateInstance)},
- {"vkDestroyInstance",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyInstance)},
- };
+ HOOK_ENTRY("vkGetInstanceProcAddr", LowLatency_GetInstanceProcAddr),
+ HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr),
-static const auto device_functions =
- std::unordered_map<std::string_view, const PFN_vkVoidFunction>{
- {"vkGetDeviceProcAddr",
- reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetDeviceProcAddr)},
+ HOOK_ENTRY("vkEnumeratePhysicalDevices",
+ low_latency::EnumeratePhysicalDevices),
- {"vkCreateDevice",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateDevice)},
- {"vkDestroyDevice",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyDevice)},
+ HOOK_ENTRY("vkCreateInstance", low_latency::CreateInstance),
+ HOOK_ENTRY("vkDestroyInstance", low_latency::DestroyInstance),
+};
+static const auto device_functions = func_map_t{
+ HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr),
- {"vkGetDeviceQueue",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue)},
- {"vkGetDeviceQueue2",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue2)},
+ HOOK_ENTRY("vkDestroyDevice", low_latency::DestroyDevice),
- {"vkQueueSubmit",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit)},
- {"vkQueueSubmit2",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit2)},
+ HOOK_ENTRY("vkGetDeviceQueue", low_latency::GetDeviceQueue),
+ HOOK_ENTRY("vkGetDeviceQueue2", low_latency::GetDeviceQueue2),
- {"vkQueuePresentKHR",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueuePresentKHR)},
- };
+ HOOK_ENTRY("vkQueueSubmit", low_latency::vkQueueSubmit),
+ HOOK_ENTRY("vkQueueSubmit2", low_latency::vkQueueSubmit2),
+
+ HOOK_ENTRY("vkQueuePresentKHR", low_latency::vkQueuePresentKHR),
+
+ HOOK_ENTRY("vkAcquireNextImageKHR", low_latency::vkAcquireNextImageKHR),
+ HOOK_ENTRY("vkAcquireNextImage2KHR", low_latency::vkAcquireNextImage2KHR),
+};
+#undef HOOK_ENTRY
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) {
+ if (!pName || !device) {
+ return nullptr;
+ }
if (const auto it = device_functions.find(pName);
it != std::end(device_functions)) {
@@ -547,26 +639,20 @@ LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) {
return it->second;
}
- const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
-
using namespace low_latency;
- const auto& context = layer_context.get_context<DeviceContext>(device);
- return context.vtable.GetDeviceProcAddr(device, pName);
+ const auto& vtable = layer_context.get_context(device)->vtable;
+ return vtable.GetDeviceProcAddr(device, pName);
}
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
LowLatency_GetInstanceProcAddr(VkInstance instance, const char* const pName) {
+ if (const auto it = instance_functions.find(pName);
+ it != std::end(instance_functions)) {
- for (const auto& functions : {device_functions, instance_functions}) {
-
- if (const auto it = functions.find(pName); it != std::end(functions)) {
- return it->second;
- }
+ return it->second;
}
- const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
-
using namespace low_latency;
- const auto& context = layer_context.get_context<InstanceContext>(instance);
- return context.vtable.GetInstanceProcAddr(instance, pName);
-} \ No newline at end of file
+ const auto& vtable = layer_context.get_context(instance)->vtable;
+ return vtable.GetInstanceProcAddr(instance, pName);
+}
diff --git a/src/layer_context.hh b/src/layer_context.hh
index 228efa3..59861a7 100644
--- a/src/layer_context.hh
+++ b/src/layer_context.hh
@@ -2,10 +2,13 @@
#define LAYER_CONTEXT_HH_
#include <mutex>
-#include <variant>
+#include <unordered_map>
+#include <vulkan/vulkan_core.h>
+#include "context.hh"
#include "device_context.hh"
#include "instance_context.hh"
+#include "physical_device_context.hh"
#include "queue_context.hh"
// The purpose of this file is to provide a definition for the highest level
@@ -19,63 +22,58 @@
namespace low_latency {
+// All these templates do is make it so we can go from some DispatchableType
+// to their respective context's with nice syntax.
+
template <typename T>
concept DispatchableType =
std::same_as<std::remove_cvref_t<T>, VkInstance> ||
- std::same_as<std::remove_cvref_t<T>, VkDevice> ||
std::same_as<std::remove_cvref_t<T>, VkPhysicalDevice> ||
+ std::same_as<std::remove_cvref_t<T>, VkDevice> ||
std::same_as<std::remove_cvref_t<T>, VkQueue>;
-struct LayerContext {
- public:
- using ContextVariant = std::variant<std::unique_ptr<DeviceContext>,
- std::unique_ptr<InstanceContext>>;
+template <class D> struct context_for_t;
+template <> struct context_for_t<VkInstance> {
+ using context = InstanceContext;
+};
+template <> struct context_for_t<VkPhysicalDevice> {
+ using context = PhysicalDeviceContext;
+};
+template <> struct context_for_t<VkDevice> {
+ using context = DeviceContext;
+};
+template <> struct context_for_t<VkQueue> {
+ using context = QueueContext;
+};
+template <DispatchableType D>
+using dispatch_context_t = typename context_for_t<D>::context;
+struct LayerContext final : public Context {
public:
std::mutex mutex;
- std::unordered_map<void*, ContextVariant> contexts;
- std::uint64_t current_frame = 0;
+ std::unordered_map<void*, std::shared_ptr<Context>> contexts;
public:
LayerContext();
- LayerContext(const LayerContext&) = delete;
- LayerContext(LayerContext&&) = delete;
- LayerContext operator==(const LayerContext&) = delete;
- LayerContext operator==(LayerContext&&) = delete;
- ~LayerContext();
+ virtual ~LayerContext();
public:
- template <DispatchableType T> static void* get_key(const T& dt) {
- return *reinterpret_cast<void**>(dt);
+ template <DispatchableType DT> static void* get_key(const DT& dt) {
+ return reinterpret_cast<void*>(dt);
}
- template <typename T, DispatchableType DispatchableType>
- requires(!std::same_as<T, QueueContext>)
- T& get_context(const DispatchableType& dt) {
+ template <DispatchableType DT>
+ std::shared_ptr<dispatch_context_t<DT>> get_context(const DT& dt) {
const auto key = get_key(dt);
+ const auto lock = std::scoped_lock(this->mutex);
const auto it = this->contexts.find(key);
assert(it != std::end(this->contexts));
- const auto ptr = std::get_if<std::unique_ptr<T>>(&it->second);
- assert(ptr && *ptr);
-
- return **ptr;
- }
-
- // QueueContext's are actually owned by a device so look there instead.
- template <typename T, DispatchableType DispatchableType>
- requires(std::same_as<T, QueueContext>)
- T& get_context(const DispatchableType& dt) {
-
- const auto& device_context = this->get_context<DeviceContext>(dt);
- const auto& queue_context = device_context.queue_contexts;
-
- const auto it = device_context.queue_contexts.find(dt);
- assert(it != std::end(queue_context));
-
- const auto& ptr = it->second;
- return *ptr;
+ using context_t = dispatch_context_t<DT>;
+ auto ptr = std::dynamic_pointer_cast<context_t>(it->second);
+ assert(ptr);
+ return ptr;
}
};
diff --git a/src/physical_device_context.cc b/src/physical_device_context.cc
new file mode 100644
index 0000000..105b840
--- /dev/null
+++ b/src/physical_device_context.cc
@@ -0,0 +1,11 @@
+#include "physical_device_context.hh"
+
+namespace low_latency {
+
+PhysicalDeviceContext::PhysicalDeviceContext(
+ InstanceContext& instance_context, const VkPhysicalDevice& physical_device)
+ : instance(instance_context), physical_device(physical_device) {}
+
+PhysicalDeviceContext::~PhysicalDeviceContext() {}
+
+} // namespace low_latency \ No newline at end of file
diff --git a/src/physical_device_context.hh b/src/physical_device_context.hh
new file mode 100644
index 0000000..639fa0f
--- /dev/null
+++ b/src/physical_device_context.hh
@@ -0,0 +1,26 @@
+#ifndef PHYSICAL_DEVICE_CONTEXT_HH_
+#define PHYSICAL_DEVICE_CONTEXT_HH_
+
+#include "instance_context.hh"
+
+#include <vulkan/vulkan.hpp>
+
+#include "context.hh"
+
+namespace low_latency {
+
+class PhysicalDeviceContext final : public Context {
+ public:
+ InstanceContext& instance;
+
+ const VkPhysicalDevice physical_device;
+
+ public:
+ PhysicalDeviceContext(InstanceContext& instance_context,
+ const VkPhysicalDevice& physical_device);
+ virtual ~PhysicalDeviceContext();
+};
+
+} // namespace low_latency
+
+#endif \ No newline at end of file
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 8f7d571..930b0c5 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -51,12 +51,15 @@ QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue,
timestamp_pool(std::make_unique<TimestampPool>(*this)) {}
QueueContext::~QueueContext() {
+
+ // nuke our handles, so we avoid segfaults for now
+ this->handle_hack.clear();
+
// Ugly - destructors of timestamp_pool should be called before we destroy
// our vulkan objects.
this->timestamp_pool.reset();
const auto& vtable = this->device_context.vtable;
-
vtable.DestroySemaphore(this->device_context.device, this->semaphore,
nullptr);
vtable.DestroyCommandPool(this->device_context.device, this->command_pool,
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 49bfcdf..184e31d 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -1,37 +1,39 @@
#ifndef QUEUE_STATE_HH_
#define QUEUE_STATE_HH_
+#include "context.hh"
#include "timestamp_pool.hh"
#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vulkan.hpp>
#include <memory>
+#include <deque>
namespace low_latency {
-
+
class DeviceContext;
-class QueueContext final {
+class QueueContext final : public Context {
public:
DeviceContext& device_context;
const VkQueue queue;
const std::uint32_t queue_family_index;
+ // this is incremented and tied to our semaphore
+ std::uint64_t semaphore_sequence = 0;
VkSemaphore semaphore;
+
VkCommandPool command_pool;
std::unique_ptr<TimestampPool> timestamp_pool;
+ std::deque<std::unique_ptr<TimestampPool::Handle>> handle_hack;
public:
QueueContext(DeviceContext& device_context, const VkQueue& queue,
const std::uint32_t& queue_family_index);
- QueueContext(const QueueContext&) = delete;
- QueueContext(QueueContext&&) = delete;
- QueueContext operator==(const QueueContext&) = delete;
- QueueContext operator==(QueueContext&&) = delete;
- ~QueueContext();
+ virtual ~QueueContext();
};
}; // namespace low_latency
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index a70c299..e37dcd2 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -17,6 +17,7 @@ TimestampPool::Block TimestampPool::allocate() {
.queryCount = this->TIMESTAMP_QUERY_POOL_SIZE};
auto query_pool = VkQueryPool{};
+
device_context.vtable.CreateQueryPool(device_context.device, &qpci,
nullptr, &query_pool);
return query_pool;
@@ -42,6 +43,9 @@ TimestampPool::Block TimestampPool::allocate() {
};
device_context.vtable.AllocateCommandBuffers(
device_context.device, &cbai, std::data(command_buffers));
+ std::ranges::for_each(command_buffers, [&](const auto& cb) {
+ device_context.sdld(device_context.device, cb);
+ });
return std::make_unique<std::vector<VkCommandBuffer>>(command_buffers);
}();
@@ -103,7 +107,7 @@ TimestampPool::Handle::Handle(
command_buffers(command_buffers) {}
TimestampPool::Handle::~Handle() {
- this->index_origin.insert(this->query_index);
+ assert(this->index_origin.insert(this->query_index).second);
}
void TimestampPool::Handle::setup_command_buffers(
@@ -174,7 +178,6 @@ std::uint64_t TimestampPool::get_polled(const Handle& handle) {
TimestampPool::~TimestampPool() {
const auto& device = this->queue_context.device_context.device;
const auto& vtable = this->queue_context.device_context.vtable;
-
for (const auto& block : this->blocks) {
vtable.FreeCommandBuffers(device, this->queue_context.command_pool,
std::size(*block.command_buffers),
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index 82c4721..cc67b18 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -69,18 +69,18 @@ class TimestampPool final {
public:
// A handle represents two std::uint64_t blocks of timestamp memory and two
// command buffers.
- struct Handle {
+ struct Handle final {
private:
friend class TimestampPool;
private:
available_query_indicies_t& index_origin;
- std::size_t block_index;
+ const std::size_t block_index;
public:
- VkQueryPool query_pool;
- std::uint64_t query_index;
- std::array<VkCommandBuffer, 2> command_buffers;
+ const VkQueryPool query_pool;
+ const std::uint64_t query_index;
+ const std::array<VkCommandBuffer, 2> command_buffers;
public:
Handle(TimestampPool::available_query_indicies_t& index_origin,
@@ -89,8 +89,8 @@ class TimestampPool final {
const std::array<VkCommandBuffer, 2>& command_buffers);
Handle(const Handle& handle) = delete;
Handle(Handle&&) = delete;
- Handle operator==(const Handle& handle) = delete;
- Handle operator==(Handle&&) = delete;
+ Handle operator=(const Handle& handle) = delete;
+ Handle operator=(Handle&&) = delete;
~Handle(); // frees from the pool
public:
@@ -104,8 +104,8 @@ class TimestampPool final {
TimestampPool(QueueContext& queue_context);
TimestampPool(const TimestampPool&) = delete;
TimestampPool(TimestampPool&&) = delete;
- TimestampPool operator==(const TimestampPool&) = delete;
- TimestampPool operator==(TimestampPool&&) = delete;
+ TimestampPool operator=(const TimestampPool&) = delete;
+ TimestampPool operator=(TimestampPool&&) = delete;
~TimestampPool();
public: