More cleanup, fix lifetime and mutex issues

author: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2026-02-11 23:19:15 +1100
committer: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2026-02-11 23:19:15 +1100
commit: 76f3ef1d7c2b4393a8e8b402deb924e606448d27 (patch)
tree: a291bec0544f007536a41ec1f590338aee1163e9
parent: 77e2be172718878b38999efc247ce7571435fcc8 (diff)
16 files changed, 522 insertions, 336 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 09daa4c..021f56b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,3 +39,22 @@ add_custom_command(TARGET ${LIBRARY_NAME} POST_BUILD
     "${CMAKE_CURRENT_SOURCE_DIR}/low_latency_layer.json"
 	"${OUTPUT_DIR}/"
 )
+
+set(SANITIZE_FLAGS
+  -fsanitize=address,undefined,leak
+  -fno-omit-frame-pointer
+  -fno-optimize-sibling-calls
+  -fno-sanitize-recover=all
+  -fsanitize-address-use-after-scope
+)
+
+target_compile_options(${LIBRARY_NAME} PRIVATE
+    #${SANITIZE_FLAGS}
+    -g3
+    -O1
+    -D_GLIBCXX_ASSERTIONS
+)
+
+target_link_options(${LIBRARY_NAME} PRIVATE 
+    #${SANITIZE_FLAGS}
+)
diff --git a/src/context.cc b/src/context.cc
new file mode 100644
index 0000000..ff93c36
--- /dev/null
+++ b/src/context.cc
@@ -0,0 +1,9 @@
+#include "context.hh"
+
+namespace low_latency {
+
+Context::Context() {}
+
+Context::~Context() {}
+
+} // namespace low_latency
+\ No newline at end of file
diff --git a/src/context.hh b/src/context.hh
new file mode 100644
index 0000000..5972740
--- /dev/null
+++ b/src/context.hh
@@ -0,0 +1,21 @@
+#ifndef CONTEXT_HH_
+#define CONTEXT_HH_
+
+// The purpose of this class is to provide a base class for Context classes.
+
+namespace low_latency {
+
+class Context {
+    
+public:
+    Context();
+    Context(const Context& context) = delete;
+    Context(Context&& context) = delete;
+    Context operator=(const Context& context) = delete;
+    Context operator=(Context&& context) = delete;
+    virtual ~Context();
+};
+
+} // namespace low_latency
+
+#endif
+\ No newline at end of file
diff --git a/src/device_context.cc b/src/device_context.cc
index 4be1872..5f5c1f7 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -7,9 +7,17 @@ namespace low_latency {
 
 DeviceContext::DeviceContext(InstanceContext& parent_instance,
                              const VkDevice& device,
+                             const PFN_vkSetDeviceLoaderData& sdld,
                              VkuDeviceDispatchTable&& vtable)
-    : instance(parent_instance), device(device), vtable(std::move(vtable))
+    : instance(parent_instance), device(device), sdld(sdld),
+      vtable(std::move(vtable)) {}
 
-{}
+DeviceContext::~DeviceContext() {
+    // We will let the destructor handle clearing here, but they should be
+    // unique by now (ie, removed from the layer's context map).
+    for (const auto& [queue, queue_context] : this->queues) {
+        assert(queue_context.unique());
+    }
+}
 
 } // namespace low_latency
 \ No newline at end of file
diff --git a/src/device_context.hh b/src/device_context.hh
index a936d6d..3406da1 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -5,29 +5,32 @@
 #include <unordered_map>
 
 #include <vulkan/utility/vk_dispatch_table.h>
+#include <vulkan/vk_layer.h>
 #include <vulkan/vulkan.hpp>
 
+#include "context.hh"
 #include "instance_context.hh"
 
 namespace low_latency {
 
 class QueueContext;
 
-struct DeviceContext {
+struct DeviceContext final : public Context {
     InstanceContext& instance;
 
     const VkDevice device;
     const VkuDeviceDispatchTable vtable;
 
-    std::unordered_map<VkQueue, std::unique_ptr<QueueContext>> queue_contexts;
+    // Do we need to use this unless we wrap dispatchable objects?
+    const PFN_vkSetDeviceLoaderData sdld;
+
+    std::unordered_map<VkQueue, std::shared_ptr<QueueContext>> queues;
 
   public:
     DeviceContext(InstanceContext& parent_instance, const VkDevice& device,
+                  const PFN_vkSetDeviceLoaderData& sdld,
                   VkuDeviceDispatchTable&& vtable);
-    DeviceContext(const DeviceContext&) = delete;
-    DeviceContext(DeviceContext&&) = delete;
-    DeviceContext operator==(const DeviceContext&) = delete;
-    DeviceContext operator==(DeviceContext&&) = delete;
+    virtual ~DeviceContext();
 };
 
 }; // namespace low_latency
diff --git a/src/instance_context.cc b/src/instance_context.cc
index 36d2c66..d12766f 100644
--- a/src/instance_context.cc
+++ b/src/instance_context.cc
@@ -1,5 +1,6 @@
 #include "instance_context.hh"
 
+#include <cassert>
 #include <utility>
 
 namespace low_latency {
@@ -8,6 +9,12 @@ InstanceContext::InstanceContext(const VkInstance& instance,
                                  VkuInstanceDispatchTable&& vtable)
     : instance(instance), vtable(std::move(vtable)) {}
 
-InstanceContext::~InstanceContext() {}
+InstanceContext::~InstanceContext() {
+    // Similar to devices, we should own the only shared ptr at this point so
+    // they destruct now.
+    for (const auto& [device, device_context] : this->phys_devices) {
+        assert(device_context.unique());
+    }
+}
 
 } // namespace low_latency
 \ No newline at end of file
diff --git a/src/instance_context.hh b/src/instance_context.hh
index 0a0b999..3b71a82 100644
--- a/src/instance_context.hh
+++ b/src/instance_context.hh
@@ -3,21 +3,26 @@
 
 #include <vulkan/utility/vk_dispatch_table.h>
 
+#include <memory>
+#include <unordered_map>
+
+#include "context.hh"
+
 namespace low_latency {
 
-struct InstanceContext {
+class PhysicalDeviceContext;
+
+struct InstanceContext final : public Context {
 
     const VkInstance instance;
     const VkuInstanceDispatchTable vtable;
 
+    std::unordered_map<void*, std::shared_ptr<PhysicalDeviceContext>> phys_devices;
+
   public:
     InstanceContext(const VkInstance& instance,
                     VkuInstanceDispatchTable&& vtable);
-    InstanceContext(const InstanceContext&) = delete;
-    InstanceContext(InstanceContext&&) = delete;
-    InstanceContext operator==(const InstanceContext&) = delete;
-    InstanceContext operator==(InstanceContext&&) = delete;
-    ~InstanceContext();
+    virtual ~InstanceContext();
 };
 
 }; // namespace low_latency
diff --git a/src/latency_controller.hh b/src/latency_controller.hh
deleted file mode 100644
index 6672d5a..0000000
--- a/src/latency_controller.hh
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef LATENCY_CONTROLLER_HH_
-#define LATENCY_CONTROLLER_HH_
-
-// The purpose of this file is to provide 
-
-namespace low_latency {
-    
-class LatencyController final {
-    
-
-};
-
-};
-
-#endif
-\ No newline at end of file
diff --git a/src/layer.cc b/src/layer.cc
index 5e652f0..cead7cd 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -5,6 +5,9 @@
 #include <unordered_map>
 #include <utility>
 
+// hack
+#include <deque>
+
 #include <vulkan/utility/vk_dispatch_table.h>
 #include <vulkan/vk_layer.h>
 #include <vulkan/vk_platform.h>
@@ -16,6 +19,7 @@
 #include "instance_context.hh"
 #include "layer_context.hh"
 #include "queue_context.hh"
+#include "timestamp_pool.hh"
 
 namespace low_latency {
 
@@ -25,8 +29,9 @@ LayerContext layer_context;
 
 } // namespace
 
-template <typename T, typename sType>
-static T* get_link_info(const void* const head, const sType& stype) {
+template <typename T, typename sType, typename fType>
+static T* get_link_info(const void* const head, const sType& stype,
+                        const fType& ftype) {
     for (auto i = reinterpret_cast<const VkBaseInStructure*>(head); i;
          i = i->pNext) {
 
@@ -35,7 +40,7 @@ static T* get_link_info(const void* const head, const sType& stype) {
         }
 
         const auto info = reinterpret_cast<const T*>(i);
-        if (info->function != VK_LAYER_LINK_INFO) {
+        if (info->function != ftype) {
             continue;
         }
 
@@ -49,7 +54,8 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
                const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) {
 
     const auto link_info = get_link_info<VkLayerInstanceCreateInfo>(
-        pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO);
+        pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO,
+        VK_LAYER_LINK_INFO);
 
     if (!link_info || !link_info->u.pLayerInfo) {
         return VK_ERROR_INITIALIZATION_FAILED;
@@ -78,23 +84,23 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
     }
 
     const auto key = layer_context.get_key(*pInstance);
+
+#define INSTANCE_VTABLE_LOAD(name)                                             \
+    .name = reinterpret_cast<PFN_vk##name>(gipa(*pInstance, "vk" #name))
     auto vtable = VkuInstanceDispatchTable{
-        .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
-            gipa(*pInstance, "vkDestroyInstance")),
-        .EnumeratePhysicalDevices =
-            reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
-                gipa(*pInstance, "vkEnumeratePhysicalDevices")),
-        .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>(
-            gipa(*pInstance, "vkGetInstanceProcAddr")),
-        .EnumerateDeviceExtensionProperties =
-            reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
-                gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")),
+        INSTANCE_VTABLE_LOAD(DestroyInstance),
+        INSTANCE_VTABLE_LOAD(EnumeratePhysicalDevices),
+        INSTANCE_VTABLE_LOAD(GetInstanceProcAddr),
+        INSTANCE_VTABLE_LOAD(CreateDevice),
+        INSTANCE_VTABLE_LOAD(EnumerateDeviceExtensionProperties),
     };
+#undef INSTANCE_VTABLE_LOAD
 
     const auto lock = std::scoped_lock{layer_context.mutex};
     assert(!layer_context.contexts.contains(key));
+
     layer_context.contexts.try_emplace(
-        key, std::make_unique<InstanceContext>(*pInstance, std::move(vtable)));
+        key, std::make_shared<InstanceContext>(*pInstance, std::move(vtable)));
 
     return VK_SUCCESS;
 }
@@ -102,11 +108,55 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
 static VKAPI_ATTR void VKAPI_CALL
 DestroyInstance(VkInstance instance, const VkAllocationCallbacks* allocator) {
 
+    const auto destroy_instance_func = [&]() -> auto {
+        const auto context = layer_context.get_context(instance);
+        const auto lock = std::scoped_lock{layer_context.mutex};
+
+        // Erase our physical devices owned by this instance from the global
+        // context.
+        for (const auto& [key, _] : context->phys_devices) {
+            assert(layer_context.contexts.erase(key));
+        }
+
+        const auto key = layer_context.get_key(instance);
+        assert(layer_context.contexts.erase(key));
+
+        // Should be the last ptr now like DestroyDevice.
+        assert(context.unique());
+        return context->vtable.DestroyInstance;
+    }();
+
+    destroy_instance_func(instance, allocator);
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL EnumeratePhysicalDevices(
+    VkInstance instance, std::uint32_t* count, VkPhysicalDevice* devices) {
+
+    const auto context = layer_context.get_context(instance);
+
+    if (const auto result =
+            context->vtable.EnumeratePhysicalDevices(instance, count, devices);
+        !devices || !count || result != VK_SUCCESS) {
+
+        return result;
+    }
+
     const auto lock = std::scoped_lock{layer_context.mutex};
+    const auto C = *count;
+    for (auto i = std::uint32_t{0}; i < C; ++i) {
+        const auto& device = devices[i];
+
+        const auto key = layer_context.get_key(device);
+        const auto [it, inserted] =
+            layer_context.contexts.try_emplace(key, nullptr);
 
-    const auto key = layer_context.get_key(instance);
-    assert(layer_context.contexts.contains(key));
-    layer_context.contexts.erase(key);
+        if (inserted) {
+            it->second =
+                std::make_shared<PhysicalDeviceContext>(*context, device);
+        }
+    }
+
+    return VK_SUCCESS;
 }
 
 static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
@@ -114,56 +164,64 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
     const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) {
 
     const auto create_info = get_link_info<VkLayerDeviceCreateInfo>(
-        pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO);
+        pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
+        VK_LAYER_LINK_INFO);
     if (!create_info || !create_info->u.pLayerInfo) {
         return VK_ERROR_INITIALIZATION_FAILED;
     }
 
+    const auto callback_info = get_link_info<VkLayerDeviceCreateInfo>(
+        pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
+        VK_LOADER_DATA_CALLBACK);
+    if (!callback_info || !callback_info->u.pLayerInfo) {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+
+    const auto sdld = callback_info->u.pfnSetDeviceLoaderData;
     const auto gipa = create_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
     const auto gdpa = create_info->u.pLayerInfo->pfnNextGetDeviceProcAddr;
-    if (!gipa || !gdpa) {
+    if (!sdld || !gipa || !gdpa) {
         return VK_ERROR_INITIALIZATION_FAILED;
     }
     create_info->u.pLayerInfo = create_info->u.pLayerInfo->pNext;
 
-    const auto lock = std::scoped_lock{layer_context.mutex};
-
-    auto& context = layer_context.get_context<InstanceContext>(physical_device);
+    const auto physical_device_context =
+        layer_context.get_context(physical_device);
+    auto& instance_context = physical_device_context->instance;
 
     const auto next_extensions =
         [&]() -> std::optional<std::vector<const char*>> {
-        const auto supported_extensions =
-            [&]() -> std::optional<std::vector<VkExtensionProperties>> {
-            const auto enumerate_device_extensions =
-                reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(gipa(
-                    context.instance, "vkEnumerateDeviceExtensionProperties"));
-            if (!enumerate_device_extensions) {
-                return std::nullopt;
-            }
+        const auto enumerate_device_extensions =
+            reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
+                gipa(instance_context.instance,
+                     "vkEnumerateDeviceExtensionProperties"));
+        if (!enumerate_device_extensions) {
+            return std::nullopt;
+        }
 
-            auto count = std::uint32_t{};
-            if (enumerate_device_extensions(physical_device, nullptr, &count,
-                                            nullptr) != VK_SUCCESS) {
+        auto count = std::uint32_t{};
+        if (enumerate_device_extensions(physical_device, nullptr, &count,
+                                        nullptr) != VK_SUCCESS) {
 
-                return std::nullopt;
-            }
+            return std::nullopt;
+        }
 
-            auto supported_extensions =
-                std::vector<VkExtensionProperties>(count);
-            if (enumerate_device_extensions(physical_device, nullptr, &count,
-                                            std::data(supported_extensions)) !=
-                VK_SUCCESS) {
+        auto supported_extensions = std::vector<VkExtensionProperties>(count);
+        if (enumerate_device_extensions(physical_device, nullptr, &count,
+                                        std::data(supported_extensions)) !=
+            VK_SUCCESS) {
 
-                return std::nullopt;
-            }
+            return std::nullopt;
+        }
 
-            return supported_extensions;
-        }();
+        auto next_extensions = std::vector<const char*>{};
+        if (pCreateInfo->enabledExtensionCount &&
+            pCreateInfo->ppEnabledExtensionNames) {
 
-        auto next_extensions =
-            std::vector{*pCreateInfo->ppEnabledExtensionNames,
-                        std::next(*pCreateInfo->ppEnabledExtensionNames +
-                                  pCreateInfo->enabledExtensionCount)};
+            std::ranges::copy_n(pCreateInfo->ppEnabledExtensionNames,
+                                pCreateInfo->enabledExtensionCount,
+                                std::back_inserter(next_extensions));
+        }
 
         const auto wanted_extensions = {
             VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
@@ -180,12 +238,11 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
                 continue; // Already included, ignore it.
             }
 
-            if (std::ranges::none_of(*supported_extensions,
-                                     [&](const auto& supported_extension) {
-                                         return !std::strcmp(
-                                             supported_extension.extensionName,
-                                             wanted);
-                                     })) {
+            if (std::ranges::none_of(
+                    supported_extensions, [&](const auto& supported_extension) {
+                        return !std::strcmp(supported_extension.extensionName,
+                                            wanted);
+                    })) {
 
                 return std::nullopt; // We don't support it, the layer can't
                                      // work.
@@ -201,8 +258,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
         return VK_ERROR_INITIALIZATION_FAILED;
     }
 
-    const auto create_device = reinterpret_cast<PFN_vkCreateDevice>(
-        gipa(VK_NULL_HANDLE, "vkCreateDevice"));
+    const auto create_device = instance_context.vtable.CreateDevice;
     if (!create_device) {
         return VK_ERROR_INITIALIZATION_FAILED;
     }
@@ -221,164 +277,199 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
         return result;
     }
 
+#define DEVICE_VTABLE_LOAD(name)                                               \
+    .name = reinterpret_cast<PFN_vk##name>(gdpa(*pDevice, "vk" #name))
     auto vtable = VkuDeviceDispatchTable{
-        .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>(
-            gdpa(*pDevice, "vkGetDeviceProcAddr")),
-        .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>(
-            gdpa(*pDevice, "vkDestroyDevice")),
-        .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>(
-            gdpa(*pDevice, "vkGetDeviceQueue")),
-        .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>(
-            gdpa(*pDevice, "vkQueueSubmit")),
-        .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>(
-            gdpa(*pDevice, "vkCreateSemaphore")),
-        .DestroySemaphore = reinterpret_cast<PFN_vkDestroySemaphore>(
-            gdpa(*pDevice, "vkDestroySemaphore")),
-        .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>(
-            gdpa(*pDevice, "vkCreateQueryPool")),
-        .DestroyQueryPool = reinterpret_cast<PFN_vkDestroyQueryPool>(
-            gdpa(*pDevice, "vkDestroyQueryPool")),
-        .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>(
-            gdpa(*pDevice, "vkGetQueryPoolResults")),
-        .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>(
-            gdpa(*pDevice, "vkCreateCommandPool")),
-        .DestroyCommandPool = reinterpret_cast<PFN_vkDestroyCommandPool>(
-            gdpa(*pDevice, "vkDestroyCommandPool")),
-        .AllocateCommandBuffers =
-            reinterpret_cast<PFN_vkAllocateCommandBuffers>(
-                gdpa(*pDevice, "vkAllocateCommandBuffers")),
-        .FreeCommandBuffers = reinterpret_cast<PFN_vkFreeCommandBuffers>(
-            gdpa(*pDevice, "vkFreeCommandBuffers")),
-        .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>(
-            gdpa(*pDevice, "vkBeginCommandBuffer")),
-        .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>(
-            gdpa(*pDevice, "vkEndCommandBuffer")),
-        .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>(
-            gdpa(*pDevice, "vkResetCommandBuffer")),
-        .CmdDraw = reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")),
-        .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>(
-            gdpa(*pDevice, "vkCmdDrawIndexed")),
-        .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>(
-            gdpa(*pDevice, "vkCmdResetQueryPool")),
-        .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>(
-            gdpa(*pDevice, "vkGetDeviceQueue2")),
-        .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>(
-            gdpa(*pDevice, "vkQueueSubmit2")),
-        .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>(
-            gdpa(*pDevice, "vkQueuePresentKHR")),
-        .GetSemaphoreCounterValueKHR =
-            reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>(
-                gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")),
-        .CmdWriteTimestamp2KHR = reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>(
-            gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")),
-        .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>(
-            gdpa(*pDevice, "vkQueueSubmit2KHR")),
+        DEVICE_VTABLE_LOAD(GetDeviceProcAddr),
+        DEVICE_VTABLE_LOAD(DestroyDevice),
+        DEVICE_VTABLE_LOAD(GetDeviceQueue),
+        DEVICE_VTABLE_LOAD(QueueSubmit),
+        DEVICE_VTABLE_LOAD(CreateSemaphore),
+        DEVICE_VTABLE_LOAD(DestroySemaphore),
+        DEVICE_VTABLE_LOAD(CreateQueryPool),
+        DEVICE_VTABLE_LOAD(DestroyQueryPool),
+        DEVICE_VTABLE_LOAD(GetQueryPoolResults),
+        DEVICE_VTABLE_LOAD(CreateCommandPool),
+        DEVICE_VTABLE_LOAD(DestroyCommandPool),
+        DEVICE_VTABLE_LOAD(AllocateCommandBuffers),
+        DEVICE_VTABLE_LOAD(FreeCommandBuffers),
+        DEVICE_VTABLE_LOAD(BeginCommandBuffer),
+        DEVICE_VTABLE_LOAD(EndCommandBuffer),
+        DEVICE_VTABLE_LOAD(ResetCommandBuffer),
+        DEVICE_VTABLE_LOAD(CmdResetQueryPool),
+        DEVICE_VTABLE_LOAD(CmdDraw),
+        DEVICE_VTABLE_LOAD(CmdDrawIndexed),
+        DEVICE_VTABLE_LOAD(GetDeviceQueue2),
+        DEVICE_VTABLE_LOAD(QueueSubmit2),
+        DEVICE_VTABLE_LOAD(AcquireNextImageKHR),
+        DEVICE_VTABLE_LOAD(QueuePresentKHR),
+        DEVICE_VTABLE_LOAD(AcquireNextImage2KHR),
+        DEVICE_VTABLE_LOAD(GetSemaphoreCounterValueKHR),
+        DEVICE_VTABLE_LOAD(CmdWriteTimestamp2KHR),
+        DEVICE_VTABLE_LOAD(QueueSubmit2KHR),
     };
+#undef DEVICE_VTABLE_LOAD
 
     const auto key = layer_context.get_key(*pDevice);
+    const auto lock = std::scoped_lock{layer_context.mutex};
     assert(!layer_context.contexts.contains(key));
+
     layer_context.contexts.try_emplace(
-        key,
-        std::make_unique<DeviceContext>(context, *pDevice, std::move(vtable)));
+        key, std::make_shared<DeviceContext>(instance_context, *pDevice, sdld,
+                                             std::move(vtable)));
 
     return VK_SUCCESS;
 }
 
 static VKAPI_ATTR void VKAPI_CALL
 DestroyDevice(VkDevice device, const VkAllocationCallbacks* allocator) {
-    const auto lock = std::scoped_lock{layer_context.mutex};
-    const auto key = layer_context.get_key(device);
-    assert(layer_context.contexts.contains(key));
-    layer_context.contexts.erase(key);
+
+    const auto destroy_device_func = [&]() -> auto {
+        const auto device_context = layer_context.get_context(device);
+
+        const auto func = device_context->vtable.DestroyDevice;
+        const auto lock = std::scoped_lock{layer_context.mutex};
+        // Remove all owned queues from our global context pool.
+        for (const auto& [queue, _] : device_context->queues) {
+            const auto key = layer_context.get_key(queue);
+            assert(layer_context.contexts.erase(key));
+        }
+
+        const auto key = layer_context.get_key(device);
+        assert(layer_context.contexts.erase(key));
+
+        // should be the last shared ptr now, so its destructor can be called.
+        // the destructor should expect its owned queues to be unique as well!
+        assert(device_context.unique());
+
+        return func;
+    }();
+
+    destroy_device_func(device, allocator);
 }
 
-// Small amount of duplication, we can't assume gdq2 is available apparently.
 static VKAPI_ATTR void VKAPI_CALL
 GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index,
                std::uint32_t queue_index, VkQueue* queue) {
 
-    const auto lock = std::scoped_lock{layer_context.mutex};
-
-    auto& device_context = layer_context.get_context<DeviceContext>(device);
+    const auto device_context = layer_context.get_context(device);
 
-    device_context.vtable.GetDeviceQueue(device, queue_family_index,
-                                         queue_index, queue);
+    device_context->vtable.GetDeviceQueue(device, queue_family_index,
+                                          queue_index, queue);
     if (!queue || !*queue) {
         return;
     }
 
-    auto& queue_contexts = device_context.queue_contexts;
-    if (!queue_contexts.contains(*queue)) {
-        queue_contexts.try_emplace(
-            *queue, std::make_unique<QueueContext>(device_context, *queue,
-                                                   queue_family_index));
+    // Look in our layer context, which has everything. If we were able to
+    // insert a nullptr key, then it didn't already exist so we should
+    // construct a new one.
+    const auto key = layer_context.get_key(*queue);
+    const auto lock = std::scoped_lock{layer_context.mutex};
+    const auto [it, inserted] = layer_context.contexts.try_emplace(key);
+    if (inserted) {
+        it->second = std::make_shared<QueueContext>(*device_context, *queue,
+                                                    queue_family_index);
     }
+
+    // it->second should be QueueContext, also it might already be there
+    // but this is expected.
+    const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second);
+    assert(ptr);
+    device_context->queues.emplace(*queue, ptr);
 }
 
+// Identical logic to gdq so some amount of duplication, we can't assume gdq1 is
+// available apparently, what do I know?
 static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2(
     VkDevice device, const VkDeviceQueueInfo2* info, VkQueue* queue) {
 
-    const auto lock = std::scoped_lock{layer_context.mutex};
-    auto& device_context = layer_context.get_context<DeviceContext>(device);
+    const auto device_context = layer_context.get_context(device);
 
-    device_context.vtable.GetDeviceQueue2(device, info, queue);
+    device_context->vtable.GetDeviceQueue2(device, info, queue);
     if (!queue || !*queue) {
         return;
     }
 
-    auto& queue_contexts = device_context.queue_contexts;
-    if (!queue_contexts.contains(*queue)) {
-        queue_contexts.try_emplace(
-            *queue, std::make_unique<QueueContext>(device_context, *queue,
-                                                   info->queueFamilyIndex));
+    const auto key = layer_context.get_key(*queue);
+    const auto lock = std::scoped_lock{layer_context.mutex};
+    const auto [it, inserted] = layer_context.contexts.try_emplace(key);
+    if (inserted) {
+        it->second = std::make_shared<QueueContext>(*device_context, *queue,
+                                                    info->queueFamilyIndex);
+    }
+
+    const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second);
+    assert(ptr);
+    device_context->queues.emplace(*queue, ptr);
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR(
+    VkDevice device, VkSwapchainKHR swapchain, std::uint64_t timeout,
+    VkSemaphore semaphore, VkFence fence, std::uint32_t* pImageIndex) {
+
+    const auto context = layer_context.get_context(device);
+    if (const auto result = context->vtable.AcquireNextImageKHR(
+            device, swapchain, timeout, semaphore, fence, pImageIndex);
+        result != VK_SUCCESS) {
+
+        return result;
+    }
+
+    return VK_SUCCESS;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR(
+    VkDevice device, const VkAcquireNextImageInfoKHR* pAcquireInfo,
+    std::uint32_t* pImageIndex) {
+
+    const auto context = layer_context.get_context(device);
+    if (const auto result = context->vtable.AcquireNextImage2KHR(
+            device, pAcquireInfo, pImageIndex);
+        result != VK_SUCCESS) {
+
+        return result;
     }
+
+    return VK_SUCCESS;
 }
 
 static VKAPI_ATTR VkResult VKAPI_CALL
 vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
               const VkSubmitInfo* submit_info, VkFence fence) {
 
-    const auto lock = std::scoped_lock{layer_context.mutex};
-
-    auto& queue_context = layer_context.get_context<QueueContext>(queue);
-    const auto& vtable = queue_context.device_context.vtable;
+    const auto& queue_context = layer_context.get_context(queue);
+    const auto& vtable = queue_context->device_context.vtable;
 
     if (!submit_count) { // no-op submit we shouldn't worry about
         return vtable.QueueSubmit(queue, submit_count, submit_info, fence);
     }
 
-    // Create a new vector of submit infos, copy their existing ones.
+    // Create a new vector of submit infos.
     auto next_submit_infos = std::vector<VkSubmitInfo>{};
-    next_submit_infos.reserve(submit_count + 2);
 
-    auto timestamp_handle = queue_context.timestamp_pool->acquire();
+    auto timestamp_handle = queue_context->timestamp_pool->acquire();
     timestamp_handle->setup_command_buffers(vtable);
 
     const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
 
-    // The first submit info we use will steal their wait semaphores.
-    next_submit_infos.push_back(VkSubmitInfo{
-        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
-        .pNext = submit_info->pNext,
-        .waitSemaphoreCount = submit_info[0].waitSemaphoreCount,
-        .pWaitSemaphores = submit_info[0].pWaitSemaphores,
-        .pWaitDstStageMask = submit_info[0].pWaitDstStageMask,
-        .commandBufferCount = 1,
-        .pCommandBuffers = &head_cb,
-    });
+    const auto next_command_buffers = [&]() -> auto {
+        auto next_command_buffers = std::vector<VkCommandBuffer>{head_cb};
+        std::ranges::copy_n(submit_info[0].pCommandBuffers,
+                            submit_info[0].commandBufferCount,
+                            std::back_inserter(next_command_buffers));
+        return next_command_buffers;
+    }();
 
-    // Fill in original submit infos but erase the wait semaphores on the
-    // first because we stole them earlier.
     std::ranges::copy_n(submit_info, submit_count,
                         std::back_inserter(next_submit_infos));
-    next_submit_infos[1].pWaitSemaphores = nullptr;
-    next_submit_infos[1].waitSemaphoreCount = 0u;
+    next_submit_infos[0].pCommandBuffers = std::data(next_command_buffers);
+    next_submit_infos[0].commandBufferCount = std::size(next_command_buffers);
 
-    const auto TODO_next = std::uint64_t{layer_context.current_frame + 1};
+    const auto next_signal = queue_context->semaphore_sequence + 1;
     const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{
         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
         .signalSemaphoreValueCount = 1,
-        .pSignalSemaphoreValues = &TODO_next,
+        .pSignalSemaphoreValues = &next_signal,
     };
     next_submit_infos.push_back(VkSubmitInfo{
         .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
@@ -386,7 +477,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
         .commandBufferCount = 1,
         .pCommandBuffers = &tail_cb,
         .signalSemaphoreCount = 1,
-        .pSignalSemaphores = &queue_context.semaphore,
+        .pSignalSemaphores = &queue_context->semaphore,
     });
 
     if (const auto res =
@@ -397,6 +488,14 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
         return res;
     }
 
+    // Hack for now, store timestamp handles.
+    queue_context->handle_hack.push_front(std::move(timestamp_handle));
+    if (std::size(queue_context->handle_hack) > 250) {
+        queue_context->handle_hack.pop_back();
+    }
+
+    ++queue_context->semaphore_sequence;
+
     return VK_SUCCESS;
 }
 
@@ -405,55 +504,69 @@ static VKAPI_ATTR VkResult VKAPI_CALL
 vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
                const VkSubmitInfo2* submit_infos, VkFence fence) {
 
-    const auto lock = std::scoped_lock{layer_context.mutex};
-    auto& queue_context = layer_context.get_context<QueueContext>(queue);
-    const auto& vtable = queue_context.device_context.vtable;
+    const auto queue_context = layer_context.get_context(queue);
+    const auto& vtable = queue_context->device_context.vtable;
 
-    if (!submit_count) { // another no-op submit
+    if (!submit_count) {
         return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
     }
 
-    auto next_submit_infos = std::vector<VkSubmitInfo2>();
-    next_submit_infos.reserve(submit_count + 2);
-
-    auto timestamp_handle = queue_context.timestamp_pool->acquire();
+    auto timestamp_handle = queue_context->timestamp_pool->acquire();
     timestamp_handle->setup_command_buffers(vtable);
     const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
 
-    const auto head_cb_info = VkCommandBufferSubmitInfo{
-        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
-        .commandBuffer = head_cb,
-    };
-    next_submit_infos.push_back(VkSubmitInfo2{
-        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
-        .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount,
-        .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos,
-        .commandBufferInfoCount = 1,
-        .pCommandBufferInfos = &head_cb_info,
-    });
+    const auto next_command_buffers = [&]() -> auto {
+        auto next_command_buffers = std::vector<VkCommandBufferSubmitInfo>{};
+        next_command_buffers.push_back(VkCommandBufferSubmitInfo{
+            .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
+            .commandBuffer = head_cb,
+        });
+        std::ranges::copy_n(submit_infos[0].pCommandBufferInfos,
+                            submit_infos[0].commandBufferInfoCount,
+                            std::back_inserter(next_command_buffers));
+        return next_command_buffers;
+    }();
+
+    auto next_submit_infos = std::vector<VkSubmitInfo2>();
     std::ranges::copy_n(submit_infos, submit_count,
                         std::back_inserter(next_submit_infos));
-    next_submit_infos[1].pWaitSemaphoreInfos = nullptr;
-    next_submit_infos[1].waitSemaphoreInfoCount = 0;
-
-    const auto tail_cb_info = VkCommandBufferSubmitInfo{
+    next_submit_infos[0].pCommandBufferInfos = std::data(next_command_buffers);
+    next_submit_infos[0].commandBufferInfoCount =
+        std::size(next_command_buffers);
+
+    const auto tail_ssi = VkSemaphoreSubmitInfo{
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+        .semaphore = queue_context->semaphore,
+        .value = queue_context->semaphore_sequence + 1,
+        .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+    };
+    const auto tail_cbsi = VkCommandBufferSubmitInfo{
         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
         .commandBuffer = tail_cb,
     };
     next_submit_infos.push_back(VkSubmitInfo2{
         .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
-        .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount,
-        .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos,
         .commandBufferInfoCount = 1,
-        .pCommandBufferInfos = &tail_cb_info,
+        .pCommandBufferInfos = &tail_cbsi,
+        .signalSemaphoreInfoCount = 1,
+        .pSignalSemaphoreInfos = &tail_ssi,
     });
 
     if (const auto res =
-            vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
+            vtable.QueueSubmit2(queue, std::size(next_submit_infos),
+                                std::data(next_submit_infos), fence);
         res != VK_SUCCESS) {
         return res;
     }
 
+    // hack
+    queue_context->handle_hack.push_front(std::move(timestamp_handle));
+    if (std::size(queue_context->handle_hack) > 250) {
+        queue_context->handle_hack.pop_back();
+    }
+
+    ++queue_context->semaphore_sequence;
+
     return VK_SUCCESS;
 }
 
@@ -467,9 +580,8 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count,
 static VKAPI_ATTR VkResult VKAPI_CALL
 vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
 
-    const auto lock = std::scoped_lock{layer_context.mutex};
-    auto& queue_context = layer_context.get_context<QueueContext>(queue);
-    const auto& vtable = queue_context.device_context.vtable;
+    const auto& vtable =
+        layer_context.get_context(queue)->device_context.vtable;
 
     if (const auto res = vtable.QueuePresentKHR(queue, present_info);
         res != VK_SUCCESS) {
@@ -477,69 +589,49 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
         return res;
     }
 
-    std::cout << "queuePresentKHR called for queue " << queue << '\n';
-
-    // Update all of our information about this queue's timestamp pool!
-    queue_context.timestamp_pool->poll();
-
-    // While we might be submitting on this queue, let's see what our timeline
-    // semaphore says we're at.
-    uint64_t value = 0;
-    if (const auto res = vtable.GetSemaphoreCounterValueKHR(
-            queue_context.device_context.device, queue_context.semaphore,
-            &value);
-        res != VK_SUCCESS) {
-
-        return res;
-    }
-
-    std::cout << "    frame_index: " << layer_context.current_frame << '\n';
-    std::cout << "    semaphore: " << value << '\n';
-    std::cout << "    queue: " << queue << '\n';
-
-    ++layer_context.current_frame;
     return VK_SUCCESS;
 }
 
 } // namespace low_latency
 
-static const auto instance_functions =
-    std::unordered_map<std::string_view, const PFN_vkVoidFunction>{
-        {"vkGetInstanceProcAddr",
-         reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetInstanceProcAddr)},
+using func_map_t = std::unordered_map<std::string_view, PFN_vkVoidFunction>;
+#define HOOK_ENTRY(vk_name_literal, fn_sym)                                    \
+    {vk_name_literal, reinterpret_cast<PFN_vkVoidFunction>(fn_sym)}
+static const auto instance_functions = func_map_t{
+    HOOK_ENTRY("vkCreateDevice", low_latency::CreateDevice),
 
-        {"vkCreateInstance",
-         reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateInstance)},
-        {"vkDestroyInstance",
-         reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyInstance)},
-    };
+    HOOK_ENTRY("vkGetInstanceProcAddr", LowLatency_GetInstanceProcAddr),
+    HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr),
 
-static const auto device_functions =
-    std::unordered_map<std::string_view, const PFN_vkVoidFunction>{
-        {"vkGetDeviceProcAddr",
-         reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetDeviceProcAddr)},
+    HOOK_ENTRY("vkEnumeratePhysicalDevices",
+               low_latency::EnumeratePhysicalDevices),
 
-        {"vkCreateDevice",
-         reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateDevice)},
-        {"vkDestroyDevice",
-         reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyDevice)},
+    HOOK_ENTRY("vkCreateInstance", low_latency::CreateInstance),
+    HOOK_ENTRY("vkDestroyInstance", low_latency::DestroyInstance),
+};
+static const auto device_functions = func_map_t{
+    HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr),
 
-        {"vkGetDeviceQueue",
-         reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue)},
-        {"vkGetDeviceQueue2",
-         reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue2)},
+    HOOK_ENTRY("vkDestroyDevice", low_latency::DestroyDevice),
 
-        {"vkQueueSubmit",
-         reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit)},
-        {"vkQueueSubmit2",
-         reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit2)},
+    HOOK_ENTRY("vkGetDeviceQueue", low_latency::GetDeviceQueue),
+    HOOK_ENTRY("vkGetDeviceQueue2", low_latency::GetDeviceQueue2),
 
-        {"vkQueuePresentKHR",
-         reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueuePresentKHR)},
-    };
+    HOOK_ENTRY("vkQueueSubmit", low_latency::vkQueueSubmit),
+    HOOK_ENTRY("vkQueueSubmit2", low_latency::vkQueueSubmit2),
+
+    HOOK_ENTRY("vkQueuePresentKHR", low_latency::vkQueuePresentKHR),
+
+    HOOK_ENTRY("vkAcquireNextImageKHR", low_latency::vkAcquireNextImageKHR),
+    HOOK_ENTRY("vkAcquireNextImage2KHR", low_latency::vkAcquireNextImage2KHR),
+};
+#undef HOOK_ENTRY
 
 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
 LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) {
+    if (!pName || !device) {
+        return nullptr;
+    }
 
     if (const auto it = device_functions.find(pName);
         it != std::end(device_functions)) {
@@ -547,26 +639,20 @@ LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) {
         return it->second;
     }
 
-    const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
-
     using namespace low_latency;
-    const auto& context = layer_context.get_context<DeviceContext>(device);
-    return context.vtable.GetDeviceProcAddr(device, pName);
+    const auto& vtable = layer_context.get_context(device)->vtable;
+    return vtable.GetDeviceProcAddr(device, pName);
 }
 
 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
 LowLatency_GetInstanceProcAddr(VkInstance instance, const char* const pName) {
+    if (const auto it = instance_functions.find(pName);
+        it != std::end(instance_functions)) {
 
-    for (const auto& functions : {device_functions, instance_functions}) {
-
-        if (const auto it = functions.find(pName); it != std::end(functions)) {
-            return it->second;
-        }
+        return it->second;
     }
 
-    const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
-
     using namespace low_latency;
-    const auto& context = layer_context.get_context<InstanceContext>(instance);
-    return context.vtable.GetInstanceProcAddr(instance, pName);
-}
-\ No newline at end of file
+    const auto& vtable = layer_context.get_context(instance)->vtable;
+    return vtable.GetInstanceProcAddr(instance, pName);
+}
diff --git a/src/layer_context.hh b/src/layer_context.hh
index 228efa3..59861a7 100644
--- a/src/layer_context.hh
+++ b/src/layer_context.hh
@@ -2,10 +2,13 @@
 #define LAYER_CONTEXT_HH_
 
 #include <mutex>
-#include <variant>
+#include <unordered_map>
+#include <vulkan/vulkan_core.h>
 
+#include "context.hh"
 #include "device_context.hh"
 #include "instance_context.hh"
+#include "physical_device_context.hh"
 #include "queue_context.hh"
 
 // The purpose of this file is to provide a definition for the highest level
@@ -19,63 +22,58 @@
 
 namespace low_latency {
 
+// All these templates do is make it so we can go from some DispatchableType
+// to their respective context's with nice syntax.
+
 template <typename T>
 concept DispatchableType =
     std::same_as<std::remove_cvref_t<T>, VkInstance> ||
-    std::same_as<std::remove_cvref_t<T>, VkDevice> ||
     std::same_as<std::remove_cvref_t<T>, VkPhysicalDevice> ||
+    std::same_as<std::remove_cvref_t<T>, VkDevice> ||
     std::same_as<std::remove_cvref_t<T>, VkQueue>;
 
-struct LayerContext {
-  public:
-    using ContextVariant = std::variant<std::unique_ptr<DeviceContext>,
-                                        std::unique_ptr<InstanceContext>>;
+template <class D> struct context_for_t;
+template <> struct context_for_t<VkInstance> {
+    using context = InstanceContext;
+};
+template <> struct context_for_t<VkPhysicalDevice> {
+    using context = PhysicalDeviceContext;
+};
+template <> struct context_for_t<VkDevice> {
+    using context = DeviceContext;
+};
+template <> struct context_for_t<VkQueue> {
+    using context = QueueContext;
+};
+template <DispatchableType D>
+using dispatch_context_t = typename context_for_t<D>::context;
 
+struct LayerContext final : public Context {
   public:
     std::mutex mutex;
-    std::unordered_map<void*, ContextVariant> contexts;
-    std::uint64_t current_frame = 0;
+    std::unordered_map<void*, std::shared_ptr<Context>> contexts;
 
   public:
     LayerContext();
-    LayerContext(const LayerContext&) = delete;
-    LayerContext(LayerContext&&) = delete;
-    LayerContext operator==(const LayerContext&) = delete;
-    LayerContext operator==(LayerContext&&) = delete;
-    ~LayerContext();
+    virtual ~LayerContext();
 
   public:
-    template <DispatchableType T> static void* get_key(const T& dt) {
-        return *reinterpret_cast<void**>(dt);
+    template <DispatchableType DT> static void* get_key(const DT& dt) {
+        return reinterpret_cast<void*>(dt);
     }
 
-    template <typename T, DispatchableType DispatchableType>
-        requires(!std::same_as<T, QueueContext>)
-    T& get_context(const DispatchableType& dt) {
+    template <DispatchableType DT>
+    std::shared_ptr<dispatch_context_t<DT>> get_context(const DT& dt) {
         const auto key = get_key(dt);
 
+        const auto lock = std::scoped_lock(this->mutex);
         const auto it = this->contexts.find(key);
         assert(it != std::end(this->contexts));
 
-        const auto ptr = std::get_if<std::unique_ptr<T>>(&it->second);
-        assert(ptr && *ptr);
-
-        return **ptr;
-    }
-
-    // QueueContext's are actually owned by a device so look there instead.
-    template <typename T, DispatchableType DispatchableType>
-        requires(std::same_as<T, QueueContext>)
-    T& get_context(const DispatchableType& dt) {
-
-        const auto& device_context = this->get_context<DeviceContext>(dt);
-        const auto& queue_context = device_context.queue_contexts;
-
-        const auto it = device_context.queue_contexts.find(dt);
-        assert(it != std::end(queue_context));
-
-        const auto& ptr = it->second;
-        return *ptr;
+        using context_t = dispatch_context_t<DT>;
+        auto ptr = std::dynamic_pointer_cast<context_t>(it->second);
+        assert(ptr);
+        return ptr;
     }
 };
 
diff --git a/src/physical_device_context.cc b/src/physical_device_context.cc
new file mode 100644
index 0000000..105b840
--- /dev/null
+++ b/src/physical_device_context.cc
@@ -0,0 +1,11 @@
+#include "physical_device_context.hh"
+
+namespace low_latency {
+
+PhysicalDeviceContext::PhysicalDeviceContext(
+    InstanceContext& instance_context, const VkPhysicalDevice& physical_device)
+    : instance(instance_context), physical_device(physical_device) {}
+
+PhysicalDeviceContext::~PhysicalDeviceContext() {}
+
+} // namespace low_latency
+\ No newline at end of file
diff --git a/src/physical_device_context.hh b/src/physical_device_context.hh
new file mode 100644
index 0000000..639fa0f
--- /dev/null
+++ b/src/physical_device_context.hh
@@ -0,0 +1,26 @@
+#ifndef PHYSICAL_DEVICE_CONTEXT_HH_
+#define PHYSICAL_DEVICE_CONTEXT_HH_
+
+#include "instance_context.hh"
+
+#include <vulkan/vulkan.hpp>
+
+#include "context.hh"
+
+namespace low_latency {
+
+class PhysicalDeviceContext final : public Context {
+  public:
+    InstanceContext& instance;
+
+    const VkPhysicalDevice physical_device;
+
+  public:
+    PhysicalDeviceContext(InstanceContext& instance_context,
+                          const VkPhysicalDevice& physical_device);
+    virtual ~PhysicalDeviceContext();
+};
+
+} // namespace low_latency
+
+#endif
+\ No newline at end of file
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 8f7d571..930b0c5 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -51,12 +51,15 @@ QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue,
       timestamp_pool(std::make_unique<TimestampPool>(*this)) {}
 
 QueueContext::~QueueContext() {
+    
+    // nuke our handles, so we avoid segfaults for now
+    this->handle_hack.clear();
+    
     // Ugly - destructors of timestamp_pool should be called before we destroy
     // our vulkan objects.
     this->timestamp_pool.reset();
 
     const auto& vtable = this->device_context.vtable;
-
     vtable.DestroySemaphore(this->device_context.device, this->semaphore,
                             nullptr);
     vtable.DestroyCommandPool(this->device_context.device, this->command_pool,
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 49bfcdf..184e31d 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -1,37 +1,39 @@
 #ifndef QUEUE_STATE_HH_
 #define QUEUE_STATE_HH_
 
+#include "context.hh"
 #include "timestamp_pool.hh"
 
 #include <vulkan/utility/vk_dispatch_table.h>
 #include <vulkan/vulkan.hpp>
 
 #include <memory>
+#include <deque>
 
 namespace low_latency {
-  
+
 class DeviceContext;
 
-class QueueContext final {
+class QueueContext final : public Context {
   public:
     DeviceContext& device_context;
 
     const VkQueue queue;
     const std::uint32_t queue_family_index;
 
+    // this is incremented and tied to our semaphore
+    std::uint64_t semaphore_sequence = 0;
     VkSemaphore semaphore;
+
     VkCommandPool command_pool;
 
     std::unique_ptr<TimestampPool> timestamp_pool;
+    std::deque<std::unique_ptr<TimestampPool::Handle>> handle_hack;
 
   public:
     QueueContext(DeviceContext& device_context, const VkQueue& queue,
                  const std::uint32_t& queue_family_index);
-    QueueContext(const QueueContext&) = delete;
-    QueueContext(QueueContext&&) = delete;
-    QueueContext operator==(const QueueContext&) = delete;
-    QueueContext operator==(QueueContext&&) = delete;
-    ~QueueContext();
+    virtual ~QueueContext();
 };
 
 }; // namespace low_latency
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index a70c299..e37dcd2 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -17,6 +17,7 @@ TimestampPool::Block TimestampPool::allocate() {
             .queryCount = this->TIMESTAMP_QUERY_POOL_SIZE};
 
         auto query_pool = VkQueryPool{};
+
         device_context.vtable.CreateQueryPool(device_context.device, &qpci,
                                               nullptr, &query_pool);
         return query_pool;
@@ -42,6 +43,9 @@ TimestampPool::Block TimestampPool::allocate() {
         };
         device_context.vtable.AllocateCommandBuffers(
             device_context.device, &cbai, std::data(command_buffers));
+        std::ranges::for_each(command_buffers, [&](const auto& cb) {
+            device_context.sdld(device_context.device, cb);
+        });
         return std::make_unique<std::vector<VkCommandBuffer>>(command_buffers);
     }();
 
@@ -103,7 +107,7 @@ TimestampPool::Handle::Handle(
       command_buffers(command_buffers) {}
 
 TimestampPool::Handle::~Handle() {
-    this->index_origin.insert(this->query_index);
+    assert(this->index_origin.insert(this->query_index).second);
 }
 
 void TimestampPool::Handle::setup_command_buffers(
@@ -174,7 +178,6 @@ std::uint64_t TimestampPool::get_polled(const Handle& handle) {
 TimestampPool::~TimestampPool() {
     const auto& device = this->queue_context.device_context.device;
     const auto& vtable = this->queue_context.device_context.vtable;
-
     for (const auto& block : this->blocks) {
         vtable.FreeCommandBuffers(device, this->queue_context.command_pool,
                                   std::size(*block.command_buffers),
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index 82c4721..cc67b18 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -69,18 +69,18 @@ class TimestampPool final {
   public:
     // A handle represents two std::uint64_t blocks of timestamp memory and two
     // command buffers.
-    struct Handle {
+    struct Handle final {
       private:
         friend class TimestampPool;
 
       private:
         available_query_indicies_t& index_origin;
-        std::size_t block_index;
+        const std::size_t block_index;
 
       public:
-        VkQueryPool query_pool;
-        std::uint64_t query_index;
-        std::array<VkCommandBuffer, 2> command_buffers;
+        const VkQueryPool query_pool;
+        const std::uint64_t query_index;
+        const std::array<VkCommandBuffer, 2> command_buffers;
 
       public:
         Handle(TimestampPool::available_query_indicies_t& index_origin,
@@ -89,8 +89,8 @@ class TimestampPool final {
                const std::array<VkCommandBuffer, 2>& command_buffers);
         Handle(const Handle& handle) = delete;
         Handle(Handle&&) = delete;
-        Handle operator==(const Handle& handle) = delete;
-        Handle operator==(Handle&&) = delete;
+        Handle operator=(const Handle& handle) = delete;
+        Handle operator=(Handle&&) = delete;
         ~Handle(); // frees from the pool
 
       public:
@@ -104,8 +104,8 @@ class TimestampPool final {
     TimestampPool(QueueContext& queue_context);
     TimestampPool(const TimestampPool&) = delete;
     TimestampPool(TimestampPool&&) = delete;
-    TimestampPool operator==(const TimestampPool&) = delete;
-    TimestampPool operator==(TimestampPool&&) = delete;
+    TimestampPool operator=(const TimestampPool&) = delete;
+    TimestampPool operator=(TimestampPool&&) = delete;
     ~TimestampPool();
 
   public:
author	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2026-02-11 23:19:15 +1100
committer	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2026-02-11 23:19:15 +1100
commit	76f3ef1d7c2b4393a8e8b402deb924e606448d27 (patch)
tree	a291bec0544f007536a41ec1f590338aee1163e9
parent	77e2be172718878b38999efc247ce7571435fcc8 (diff)