11 files changed, 427 insertions, 259 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
new file mode 100644
index 0000000..4be1872
--- /dev/null
+++ b/src/device_context.cc
@@ -0,0 +1,15 @@
+#include "device_context.hh"
+#include "queue_context.hh"
+
+#include <utility>
+
+namespace low_latency {
+
+DeviceContext::DeviceContext(InstanceContext& parent_instance,
+                             const VkDevice& device,
+                             VkuDeviceDispatchTable&& vtable)
+    : instance(parent_instance), device(device), vtable(std::move(vtable))
+
+{}
+
+} // namespace low_latency
+\ No newline at end of file
diff --git a/src/device_context.hh b/src/device_context.hh
new file mode 100644
index 0000000..a936d6d
--- /dev/null
+++ b/src/device_context.hh
@@ -0,0 +1,35 @@
+#ifndef DEVICE_CONTEXT_HH_
+#define DEVICE_CONTEXT_HH_
+
+#include <memory>
+#include <unordered_map>
+
+#include <vulkan/utility/vk_dispatch_table.h>
+#include <vulkan/vulkan.hpp>
+
+#include "instance_context.hh"
+
+namespace low_latency {
+
+class QueueContext;
+
+struct DeviceContext {
+    InstanceContext& instance;
+
+    const VkDevice device;
+    const VkuDeviceDispatchTable vtable;
+
+    std::unordered_map<VkQueue, std::unique_ptr<QueueContext>> queue_contexts;
+
+  public:
+    DeviceContext(InstanceContext& parent_instance, const VkDevice& device,
+                  VkuDeviceDispatchTable&& vtable);
+    DeviceContext(const DeviceContext&) = delete;
+    DeviceContext(DeviceContext&&) = delete;
+    DeviceContext operator==(const DeviceContext&) = delete;
+    DeviceContext operator==(DeviceContext&&) = delete;
+};
+
+}; // namespace low_latency
+
+#endif
+\ No newline at end of file
diff --git a/src/instance_context.cc b/src/instance_context.cc
new file mode 100644
index 0000000..36d2c66
--- /dev/null
+++ b/src/instance_context.cc
@@ -0,0 +1,13 @@
+#include "instance_context.hh"
+
+#include <utility>
+
+namespace low_latency {
+
+InstanceContext::InstanceContext(const VkInstance& instance,
+                                 VkuInstanceDispatchTable&& vtable)
+    : instance(instance), vtable(std::move(vtable)) {}
+
+InstanceContext::~InstanceContext() {}
+
+} // namespace low_latency
+\ No newline at end of file
diff --git a/src/instance_context.hh b/src/instance_context.hh
new file mode 100644
index 0000000..0a0b999
--- /dev/null
+++ b/src/instance_context.hh
@@ -0,0 +1,25 @@
+#ifndef INSTANCE_CONTEXT_HH_
+#define INSTANCE_CONTEXT_HH_
+
+#include <vulkan/utility/vk_dispatch_table.h>
+
+namespace low_latency {
+
+struct InstanceContext {
+
+    const VkInstance instance;
+    const VkuInstanceDispatchTable vtable;
+
+  public:
+    InstanceContext(const VkInstance& instance,
+                    VkuInstanceDispatchTable&& vtable);
+    InstanceContext(const InstanceContext&) = delete;
+    InstanceContext(InstanceContext&&) = delete;
+    InstanceContext operator==(const InstanceContext&) = delete;
+    InstanceContext operator==(InstanceContext&&) = delete;
+    ~InstanceContext();
+};
+
+}; // namespace low_latency
+
+#endif
+\ No newline at end of file
diff --git a/src/layer.cc b/src/layer.cc
index 94b4969..5e652f0 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -1,6 +1,10 @@
 #include "layer.hh"
 
+#include <iostream>
+#include <string_view>
+#include <unordered_map>
 #include <utility>
+
 #include <vulkan/utility/vk_dispatch_table.h>
 #include <vulkan/vk_layer.h>
 #include <vulkan/vk_platform.h>
@@ -8,37 +12,18 @@
 #include <vulkan/vulkan.hpp>
 #include <vulkan/vulkan_core.h>
 
-#include <deque>
-#include <iostream>
-#include <mutex>
-#include <string_view>
-#include <unordered_map>
-#include <unordered_set>
-
+#include "device_context.hh"
+#include "instance_context.hh"
+#include "layer_context.hh"
 #include "queue_context.hh"
-#include "timestamp_pool.hh"
 
 namespace low_latency {
 
-// Global mutex for layer data.
-static auto mutex = std::mutex{};
-
-// Mappings for device instances.
-static std::unordered_map<VkPhysicalDevice, VkInstance> device_instances;
-static std::unordered_map<void*, VkuInstanceDispatchTable> instance_vtables;
-static std::unordered_map<void*, VkuDeviceDispatchTable> device_vtables;
+namespace {
 
-static std::uint64_t current_frame = 0;
-static std::unordered_map<VkQueue, QueueContext> queue_contexts;
+LayerContext layer_context;
 
-template <typename T>
-concept DispatchableType =
-    std::same_as<std::remove_cvref_t<T>, VkInstance> ||
-    std::same_as<std::remove_cvref_t<T>, VkDevice> ||
-    std::same_as<std::remove_cvref_t<T>, VkPhysicalDevice>;
-template <DispatchableType T> void* get_key(const T& inst) {
-    return *reinterpret_cast<void**>(inst);
-}
+} // namespace
 
 template <typename T, typename sType>
 static T* get_link_info(const void* const head, const sType& stype) {
@@ -92,23 +77,24 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
         return result;
     }
 
-    const auto lock = std::scoped_lock{mutex};
-    instance_vtables.emplace(
-        get_key(*pInstance),
-        VkuInstanceDispatchTable{
-            .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
-                gipa(*pInstance, "vkDestroyInstance")),
-            .EnumeratePhysicalDevices =
-                reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
-                    gipa(*pInstance, "vkEnumeratePhysicalDevices")),
-            .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>(
-                gipa(*pInstance, "vkGetInstanceProcAddr")),
-            .EnumerateDeviceExtensionProperties =
-                reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
-                    gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")),
-        }
+    const auto key = layer_context.get_key(*pInstance);
+    auto vtable = VkuInstanceDispatchTable{
+        .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
+            gipa(*pInstance, "vkDestroyInstance")),
+        .EnumeratePhysicalDevices =
+            reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
+                gipa(*pInstance, "vkEnumeratePhysicalDevices")),
+        .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>(
+            gipa(*pInstance, "vkGetInstanceProcAddr")),
+        .EnumerateDeviceExtensionProperties =
+            reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
+                gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")),
+    };
 
-    );
+    const auto lock = std::scoped_lock{layer_context.mutex};
+    assert(!layer_context.contexts.contains(key));
+    layer_context.contexts.try_emplace(
+        key, std::make_unique<InstanceContext>(*pInstance, std::move(vtable)));
 
     return VK_SUCCESS;
 }
@@ -116,34 +102,11 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
 static VKAPI_ATTR void VKAPI_CALL
 DestroyInstance(VkInstance instance, const VkAllocationCallbacks* allocator) {
 
-    const auto lock = std::scoped_lock{mutex};
+    const auto lock = std::scoped_lock{layer_context.mutex};
 
-    const auto key = get_key(instance);
-    assert(instance_vtables.contains(key));
-    instance_vtables.erase(key);
-}
-
-static VKAPI_ATTR VkResult VKAPI_CALL EnumeratePhysicalDevices(
-    VkInstance instance, std::uint32_t* count, VkPhysicalDevice* devices) {
-
-    const auto lock = std::scoped_lock{mutex};
-
-    const auto it = instance_vtables.find(get_key(instance));
-    assert(it != std::end(instance_vtables));
-    const auto& vtable = it->second;
-
-    if (const auto result =
-            vtable.EnumeratePhysicalDevices(instance, count, devices);
-        !devices || result != VK_SUCCESS) {
-
-        return result;
-    }
-
-    for (auto i = std::uint32_t{0}; i < *count; ++i) {
-        device_instances.emplace(devices[i], instance);
-    }
-
-    return VK_SUCCESS;
+    const auto key = layer_context.get_key(instance);
+    assert(layer_context.contexts.contains(key));
+    layer_context.contexts.erase(key);
 }
 
 static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
@@ -163,16 +126,17 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
     }
     create_info->u.pLayerInfo = create_info->u.pLayerInfo->pNext;
 
-    const auto lock = std::scoped_lock{mutex};
+    const auto lock = std::scoped_lock{layer_context.mutex};
+
+    auto& context = layer_context.get_context<InstanceContext>(physical_device);
 
     const auto next_extensions =
         [&]() -> std::optional<std::vector<const char*>> {
         const auto supported_extensions =
             [&]() -> std::optional<std::vector<VkExtensionProperties>> {
             const auto enumerate_device_extensions =
-                reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
-                    gipa(device_instances[physical_device],
-                         "vkEnumerateDeviceExtensionProperties"));
+                reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(gipa(
+                    context.instance, "vkEnumerateDeviceExtensionProperties"));
             if (!enumerate_device_extensions) {
                 return std::nullopt;
             }
@@ -257,67 +221,75 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
         return result;
     }
 
-    device_vtables.emplace(
-        get_key(*pDevice),
-        VkuDeviceDispatchTable{
-            .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>(
-                gdpa(*pDevice, "vkGetDeviceProcAddr")),
-            .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>(
-                gdpa(*pDevice, "vkDestroyDevice")),
-            .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>(
-                gdpa(*pDevice, "vkGetDeviceQueue")),
-            .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>(
-                gdpa(*pDevice, "vkQueueSubmit")),
-            .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>(
-                gdpa(*pDevice, "vkCreateSemaphore")),
-            .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>(
-                gdpa(*pDevice, "vkCreateQueryPool")),
-            .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>(
-                gdpa(*pDevice, "vkGetQueryPoolResults")),
-            .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>(
-                gdpa(*pDevice, "vkCreateCommandPool")),
-            .AllocateCommandBuffers =
-                reinterpret_cast<PFN_vkAllocateCommandBuffers>(
-                    gdpa(*pDevice, "vkAllocateCommandBuffers")),
-            .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>(
-                gdpa(*pDevice, "vkBeginCommandBuffer")),
-            .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>(
-                gdpa(*pDevice, "vkEndCommandBuffer")),
-            .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>(
-                gdpa(*pDevice, "vkResetCommandBuffer")),
-            .CmdDraw =
-                reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")),
-            .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>(
-                gdpa(*pDevice, "vkCmdDrawIndexed")),
-            .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>(
-                gdpa(*pDevice, "vkCmdResetQueryPool")),
-            .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>(
-                gdpa(*pDevice, "vkGetDeviceQueue2")),
-            .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>(
-                gdpa(*pDevice, "vkQueueSubmit2")),
-            .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>(
-                gdpa(*pDevice, "vkQueuePresentKHR")),
-            .GetSemaphoreCounterValueKHR =
-                reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>(
-                    gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")),
-            .CmdWriteTimestamp2KHR =
-                reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>(
-                    gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")),
-            .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>(
-                gdpa(*pDevice, "vkQueueSubmit2KHR")),
-
-        });
+    auto vtable = VkuDeviceDispatchTable{
+        .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>(
+            gdpa(*pDevice, "vkGetDeviceProcAddr")),
+        .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>(
+            gdpa(*pDevice, "vkDestroyDevice")),
+        .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>(
+            gdpa(*pDevice, "vkGetDeviceQueue")),
+        .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>(
+            gdpa(*pDevice, "vkQueueSubmit")),
+        .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>(
+            gdpa(*pDevice, "vkCreateSemaphore")),
+        .DestroySemaphore = reinterpret_cast<PFN_vkDestroySemaphore>(
+            gdpa(*pDevice, "vkDestroySemaphore")),
+        .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>(
+            gdpa(*pDevice, "vkCreateQueryPool")),
+        .DestroyQueryPool = reinterpret_cast<PFN_vkDestroyQueryPool>(
+            gdpa(*pDevice, "vkDestroyQueryPool")),
+        .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>(
+            gdpa(*pDevice, "vkGetQueryPoolResults")),
+        .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>(
+            gdpa(*pDevice, "vkCreateCommandPool")),
+        .DestroyCommandPool = reinterpret_cast<PFN_vkDestroyCommandPool>(
+            gdpa(*pDevice, "vkDestroyCommandPool")),
+        .AllocateCommandBuffers =
+            reinterpret_cast<PFN_vkAllocateCommandBuffers>(
+                gdpa(*pDevice, "vkAllocateCommandBuffers")),
+        .FreeCommandBuffers = reinterpret_cast<PFN_vkFreeCommandBuffers>(
+            gdpa(*pDevice, "vkFreeCommandBuffers")),
+        .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>(
+            gdpa(*pDevice, "vkBeginCommandBuffer")),
+        .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>(
+            gdpa(*pDevice, "vkEndCommandBuffer")),
+        .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>(
+            gdpa(*pDevice, "vkResetCommandBuffer")),
+        .CmdDraw = reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")),
+        .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>(
+            gdpa(*pDevice, "vkCmdDrawIndexed")),
+        .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>(
+            gdpa(*pDevice, "vkCmdResetQueryPool")),
+        .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>(
+            gdpa(*pDevice, "vkGetDeviceQueue2")),
+        .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>(
+            gdpa(*pDevice, "vkQueueSubmit2")),
+        .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>(
+            gdpa(*pDevice, "vkQueuePresentKHR")),
+        .GetSemaphoreCounterValueKHR =
+            reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>(
+                gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")),
+        .CmdWriteTimestamp2KHR = reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>(
+            gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")),
+        .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>(
+            gdpa(*pDevice, "vkQueueSubmit2KHR")),
+    };
+
+    const auto key = layer_context.get_key(*pDevice);
+    assert(!layer_context.contexts.contains(key));
+    layer_context.contexts.try_emplace(
+        key,
+        std::make_unique<DeviceContext>(context, *pDevice, std::move(vtable)));
 
     return VK_SUCCESS;
 }
 
 static VKAPI_ATTR void VKAPI_CALL
 DestroyDevice(VkDevice device, const VkAllocationCallbacks* allocator) {
-
-    const auto lock = std::scoped_lock{mutex};
-    const auto key = get_key(device);
-    assert(device_vtables.contains(key));
-    device_vtables.erase(key);
+    const auto lock = std::scoped_lock{layer_context.mutex};
+    const auto key = layer_context.get_key(device);
+    assert(layer_context.contexts.contains(key));
+    layer_context.contexts.erase(key);
 }
 
 // Small amount of duplication, we can't assume gdq2 is available apparently.
@@ -325,37 +297,40 @@ static VKAPI_ATTR void VKAPI_CALL
 GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index,
                std::uint32_t queue_index, VkQueue* queue) {
 
-    const auto lock = std::scoped_lock{mutex};
-    const auto& vtable = device_vtables[get_key(device)];
+    const auto lock = std::scoped_lock{layer_context.mutex};
+
+    auto& device_context = layer_context.get_context<DeviceContext>(device);
 
-    vtable.GetDeviceQueue(device, queue_family_index, queue_index, queue);
+    device_context.vtable.GetDeviceQueue(device, queue_family_index,
+                                         queue_index, queue);
     if (!queue || !*queue) {
         return;
     }
 
+    auto& queue_contexts = device_context.queue_contexts;
     if (!queue_contexts.contains(*queue)) {
-        queue_contexts.emplace(
-            std::piecewise_construct, std::forward_as_tuple(*queue),
-            std::forward_as_tuple(device, *queue, queue_family_index, vtable));
+        queue_contexts.try_emplace(
+            *queue, std::make_unique<QueueContext>(device_context, *queue,
+                                                   queue_family_index));
     }
 }
 
 static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2(
     VkDevice device, const VkDeviceQueueInfo2* info, VkQueue* queue) {
 
-    const auto lock = std::scoped_lock{mutex};
-    const auto& vtable = device_vtables[get_key(device)];
+    const auto lock = std::scoped_lock{layer_context.mutex};
+    auto& device_context = layer_context.get_context<DeviceContext>(device);
 
-    vtable.GetDeviceQueue2(device, info, queue);
+    device_context.vtable.GetDeviceQueue2(device, info, queue);
     if (!queue || !*queue) {
         return;
     }
 
+    auto& queue_contexts = device_context.queue_contexts;
     if (!queue_contexts.contains(*queue)) {
-        queue_contexts.emplace(
-            std::piecewise_construct, std::forward_as_tuple(*queue),
-            std::forward_as_tuple(device, *queue, info->queueFamilyIndex,
-                                  vtable));
+        queue_contexts.try_emplace(
+            *queue, std::make_unique<QueueContext>(device_context, *queue,
+                                                   info->queueFamilyIndex));
     }
 }
 
@@ -363,14 +338,10 @@ static VKAPI_ATTR VkResult VKAPI_CALL
 vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
               const VkSubmitInfo* submit_info, VkFence fence) {
 
-    const auto lock = std::scoped_lock{mutex};
+    const auto lock = std::scoped_lock{layer_context.mutex};
 
-    auto& queue_context = [&]() -> auto& {
-        const auto& queue_context_it = queue_contexts.find(queue);
-        assert(queue_context_it != std::end(queue_contexts));
-        return queue_context_it->second;
-    }();
-    const auto& vtable = device_vtables[get_key(queue_context.device)];
+    auto& queue_context = layer_context.get_context<QueueContext>(queue);
+    const auto& vtable = queue_context.device_context.vtable;
 
     if (!submit_count) { // no-op submit we shouldn't worry about
         return vtable.QueueSubmit(queue, submit_count, submit_info, fence);
@@ -380,7 +351,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
     auto next_submit_infos = std::vector<VkSubmitInfo>{};
     next_submit_infos.reserve(submit_count + 2);
 
-    auto timestamp_handle = queue_context.timestamp_pool.acquire();
+    auto timestamp_handle = queue_context.timestamp_pool->acquire();
     timestamp_handle->setup_command_buffers(vtable);
 
     const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
@@ -403,7 +374,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
     next_submit_infos[1].pWaitSemaphores = nullptr;
     next_submit_infos[1].waitSemaphoreCount = 0u;
 
-    const auto TODO_next = std::uint64_t{current_frame + 1};
+    const auto TODO_next = std::uint64_t{layer_context.current_frame + 1};
     const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{
         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
         .signalSemaphoreValueCount = 1,
@@ -434,13 +405,9 @@ static VKAPI_ATTR VkResult VKAPI_CALL
 vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
                const VkSubmitInfo2* submit_infos, VkFence fence) {
 
-    const auto lock = std::scoped_lock{mutex};
-    auto& queue_context = [&]() -> auto& {
-        const auto& queue_context_it = queue_contexts.find(queue);
-        assert(queue_context_it != std::end(queue_contexts));
-        return queue_context_it->second;
-    }();
-    const auto& vtable = device_vtables[get_key(queue_context.device)];
+    const auto lock = std::scoped_lock{layer_context.mutex};
+    auto& queue_context = layer_context.get_context<QueueContext>(queue);
+    const auto& vtable = queue_context.device_context.vtable;
 
     if (!submit_count) { // another no-op submit
         return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
@@ -449,7 +416,7 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
     auto next_submit_infos = std::vector<VkSubmitInfo2>();
     next_submit_infos.reserve(submit_count + 2);
 
-    auto timestamp_handle = queue_context.timestamp_pool.acquire();
+    auto timestamp_handle = queue_context.timestamp_pool->acquire();
     timestamp_handle->setup_command_buffers(vtable);
     const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
 
@@ -500,13 +467,9 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count,
 static VKAPI_ATTR VkResult VKAPI_CALL
 vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
 
-    const auto lock = std::scoped_lock{mutex};
-    auto& queue_context = [&]() -> auto& {
-        const auto& queue_context_it = queue_contexts.find(queue);
-        assert(queue_context_it != std::end(queue_contexts));
-        return queue_context_it->second;
-    }();
-    const auto& vtable = device_vtables[get_key(queue_context.device)];
+    const auto lock = std::scoped_lock{layer_context.mutex};
+    auto& queue_context = layer_context.get_context<QueueContext>(queue);
+    const auto& vtable = queue_context.device_context.vtable;
 
     if (const auto res = vtable.QueuePresentKHR(queue, present_info);
         res != VK_SUCCESS) {
@@ -517,23 +480,24 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
     std::cout << "queuePresentKHR called for queue " << queue << '\n';
 
     // Update all of our information about this queue's timestamp pool!
-    queue_context.timestamp_pool.poll();
+    queue_context.timestamp_pool->poll();
 
     // While we might be submitting on this queue, let's see what our timeline
     // semaphore says we're at.
     uint64_t value = 0;
     if (const auto res = vtable.GetSemaphoreCounterValueKHR(
-            queue_context.device, queue_context.semaphore, &value);
+            queue_context.device_context.device, queue_context.semaphore,
+            &value);
         res != VK_SUCCESS) {
 
         return res;
     }
 
-    std::cout << "    frame_index: " << current_frame << '\n';
+    std::cout << "    frame_index: " << layer_context.current_frame << '\n';
     std::cout << "    semaphore: " << value << '\n';
     std::cout << "    queue: " << queue << '\n';
 
-    ++current_frame;
+    ++layer_context.current_frame;
     return VK_SUCCESS;
 }
 
@@ -548,10 +512,6 @@ static const auto instance_functions =
          reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateInstance)},
         {"vkDestroyInstance",
          reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyInstance)},
-
-        {"vkEnumeratePhysicalDevices",
-         reinterpret_cast<PFN_vkVoidFunction>(
-             low_latency::EnumeratePhysicalDevices)},
     };
 
 static const auto device_functions =
@@ -587,9 +547,11 @@ LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) {
         return it->second;
     }
 
-    const auto lock = std::scoped_lock{low_latency::mutex};
-    return low_latency::device_vtables[low_latency::get_key(device)]
-        .GetDeviceProcAddr(device, pName);
+    const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
+
+    using namespace low_latency;
+    const auto& context = layer_context.get_context<DeviceContext>(device);
+    return context.vtable.GetDeviceProcAddr(device, pName);
 }
 
 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
@@ -602,7 +564,9 @@ LowLatency_GetInstanceProcAddr(VkInstance instance, const char* const pName) {
         }
     }
 
-    const auto lock = std::scoped_lock{low_latency::mutex};
-    return low_latency::instance_vtables[low_latency::get_key(instance)]
-        .GetInstanceProcAddr(instance, pName);
+    const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
+
+    using namespace low_latency;
+    const auto& context = layer_context.get_context<InstanceContext>(instance);
+    return context.vtable.GetInstanceProcAddr(instance, pName);
 }
 \ No newline at end of file
diff --git a/src/layer_context.cc b/src/layer_context.cc
new file mode 100644
index 0000000..ceb0030
--- /dev/null
+++ b/src/layer_context.cc
@@ -0,0 +1,9 @@
+#include "layer_context.hh"
+
+namespace low_latency {
+
+LayerContext::LayerContext() {}
+
+LayerContext::~LayerContext() {}
+
+} // namespace low_latency
+\ No newline at end of file
diff --git a/src/layer_context.hh b/src/layer_context.hh
new file mode 100644
index 0000000..228efa3
--- /dev/null
+++ b/src/layer_context.hh
@@ -0,0 +1,84 @@
+#ifndef LAYER_CONTEXT_HH_
+#define LAYER_CONTEXT_HH_
+
+#include <mutex>
+#include <variant>
+
+#include "device_context.hh"
+#include "instance_context.hh"
+#include "queue_context.hh"
+
+// The purpose of this file is to provide a definition for the highest level
+// entry point struct of our vulkan state.
+//
+// All Context structs have deleted copy/move constructors. This is because we
+// want to be extremely explicit with how/when we delete things, and this allows
+// us to use destructors for cleanup without much worry about weird copies
+// floating around. Most contexts will probably live inside std::unique_ptr's as
+// a result so they can be used in standard containers.
+
+namespace low_latency {
+
+template <typename T>
+concept DispatchableType =
+    std::same_as<std::remove_cvref_t<T>, VkInstance> ||
+    std::same_as<std::remove_cvref_t<T>, VkDevice> ||
+    std::same_as<std::remove_cvref_t<T>, VkPhysicalDevice> ||
+    std::same_as<std::remove_cvref_t<T>, VkQueue>;
+
+struct LayerContext {
+  public:
+    using ContextVariant = std::variant<std::unique_ptr<DeviceContext>,
+                                        std::unique_ptr<InstanceContext>>;
+
+  public:
+    std::mutex mutex;
+    std::unordered_map<void*, ContextVariant> contexts;
+    std::uint64_t current_frame = 0;
+
+  public:
+    LayerContext();
+    LayerContext(const LayerContext&) = delete;
+    LayerContext(LayerContext&&) = delete;
+    LayerContext operator==(const LayerContext&) = delete;
+    LayerContext operator==(LayerContext&&) = delete;
+    ~LayerContext();
+
+  public:
+    template <DispatchableType T> static void* get_key(const T& dt) {
+        return *reinterpret_cast<void**>(dt);
+    }
+
+    template <typename T, DispatchableType DispatchableType>
+        requires(!std::same_as<T, QueueContext>)
+    T& get_context(const DispatchableType& dt) {
+        const auto key = get_key(dt);
+
+        const auto it = this->contexts.find(key);
+        assert(it != std::end(this->contexts));
+
+        const auto ptr = std::get_if<std::unique_ptr<T>>(&it->second);
+        assert(ptr && *ptr);
+
+        return **ptr;
+    }
+
+    // QueueContext's are actually owned by a device so look there instead.
+    template <typename T, DispatchableType DispatchableType>
+        requires(std::same_as<T, QueueContext>)
+    T& get_context(const DispatchableType& dt) {
+
+        const auto& device_context = this->get_context<DeviceContext>(dt);
+        const auto& queue_context = device_context.queue_contexts;
+
+        const auto it = device_context.queue_contexts.find(dt);
+        assert(it != std::end(queue_context));
+
+        const auto& ptr = it->second;
+        return *ptr;
+    }
+};
+
+}; // namespace low_latency
+
+#endif
+\ No newline at end of file
diff --git a/src/queue_context.cc b/src/queue_context.cc
index dbae4c0..8f7d571 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -1,10 +1,12 @@
 #include "queue_context.hh"
+#include "device_context.hh"
+#include "timestamp_pool.hh"
 
 namespace low_latency {
 
-static VkCommandPool make_command_pool(const VkDevice& device,
-                                       const std::uint32_t& queue_family_index,
-                                       const VkuDeviceDispatchTable& vtable) {
+static VkCommandPool
+make_command_pool(const DeviceContext& device_context,
+                  const std::uint32_t& queue_family_index) {
 
     const auto cpci = VkCommandPoolCreateInfo{
         .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
@@ -14,38 +16,51 @@ static VkCommandPool make_command_pool(const VkDevice& device,
     };
 
     auto command_pool = VkCommandPool{};
-    vtable.CreateCommandPool(device, &cpci, nullptr, &command_pool);
+    device_context.vtable.CreateCommandPool(device_context.device, &cpci,
+                                            nullptr, &command_pool);
     return command_pool;
 }
 
-QueueContext::QueueContext(const VkDevice& device, const VkQueue queue,
-                           const std::uint32_t& queue_family_index,
-                           const VkuDeviceDispatchTable& vtable)
-    : device(device), queue(queue), queue_family_index(queue_family_index),
-      vtable(vtable),
-      // Important we make the command pool before the timestamp pool, because it's a dependency.
-      command_pool(make_command_pool(device, queue_family_index, vtable)),
-      timestamp_pool(device, vtable, command_pool) {
-
-    this->semaphore = [&]() -> VkSemaphore {
-        const auto stci = VkSemaphoreTypeCreateInfo{
-            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
-            .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
-            .initialValue = 0,
-        };
-
-        const auto sci = VkSemaphoreCreateInfo{
-            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
-            .pNext = &stci,
-        };
-
-        auto semaphore = VkSemaphore{};
-        vtable.CreateSemaphore(device, &sci, nullptr, &semaphore);
-        return semaphore;
-    }();
+static VkSemaphore make_semaphore(const DeviceContext& device_context) {
+
+    const auto stci = VkSemaphoreTypeCreateInfo{
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
+        .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
+        .initialValue = 0,
+    };
+
+    const auto sci = VkSemaphoreCreateInfo{
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+        .pNext = &stci,
+    };
+
+    auto semaphore = VkSemaphore{};
+    device_context.vtable.CreateSemaphore(device_context.device, &sci, nullptr,
+                                          &semaphore);
+    return semaphore;
 }
 
+QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue,
+                           const std::uint32_t& queue_family_index)
+    : device_context(device_context), queue(queue),
+      queue_family_index(queue_family_index),
+      // Important we make the command pool before the timestamp pool, because
+      // it's a dependency.
+      command_pool(make_command_pool(device_context, queue_family_index)),
+      semaphore(make_semaphore(device_context)),
+      timestamp_pool(std::make_unique<TimestampPool>(*this)) {}
+
 QueueContext::~QueueContext() {
+    // Ugly - destructors of timestamp_pool should be called before we destroy
+    // our vulkan objects.
+    this->timestamp_pool.reset();
+
+    const auto& vtable = this->device_context.vtable;
+
+    vtable.DestroySemaphore(this->device_context.device, this->semaphore,
+                            nullptr);
+    vtable.DestroyCommandPool(this->device_context.device, this->command_pool,
+                              nullptr);
 }
 
 } // namespace low_latency
 \ No newline at end of file
diff --git a/src/queue_context.hh b/src/queue_context.hh
index eb3f2ea..49bfcdf 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -6,32 +6,27 @@
 #include <vulkan/utility/vk_dispatch_table.h>
 #include <vulkan/vulkan.hpp>
 
-#include <deque>
-#include <vector>
+#include <memory>
 
 namespace low_latency {
+  
+class DeviceContext;
 
 class QueueContext final {
   public:
-    VkDevice device;
-    VkuDeviceDispatchTable vtable;
+    DeviceContext& device_context;
 
-    VkQueue queue;
-    std::uint32_t queue_family_index;
+    const VkQueue queue;
+    const std::uint32_t queue_family_index;
 
     VkSemaphore semaphore;
     VkCommandPool command_pool;
 
-    TimestampPool timestamp_pool;
-
-    std::deque<
-        std::vector<std::pair<TimestampPool::Handle, TimestampPool::Handle>>>
-        tracked_queues;
+    std::unique_ptr<TimestampPool> timestamp_pool;
 
   public:
-    QueueContext(const VkDevice& device, const VkQueue queue,
-               const std::uint32_t& queue_family_index,
-               const VkuDeviceDispatchTable& vtable);
+    QueueContext(DeviceContext& device_context, const VkQueue& queue,
+                 const std::uint32_t& queue_family_index);
     QueueContext(const QueueContext&) = delete;
     QueueContext(QueueContext&&) = delete;
     QueueContext operator==(const QueueContext&) = delete;
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index 1dc37b2..a70c299 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -1,11 +1,15 @@
 #include "timestamp_pool.hh"
+#include "device_context.hh"
+#include "queue_context.hh"
 
 #include <ranges>
 #include <vulkan/vulkan_core.h>
 
 namespace low_latency {
 
-TimestampPool::block TimestampPool::allocate() {
+TimestampPool::Block TimestampPool::allocate() {
+    const auto& device_context = this->queue_context.device_context;
+
     const auto query_pool = [&]() -> VkQueryPool {
         const auto qpci = VkQueryPoolCreateInfo{
             .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
@@ -13,7 +17,8 @@ TimestampPool::block TimestampPool::allocate() {
             .queryCount = this->TIMESTAMP_QUERY_POOL_SIZE};
 
         auto query_pool = VkQueryPool{};
-        vtable.CreateQueryPool(device, &qpci, nullptr, &query_pool);
+        device_context.vtable.CreateQueryPool(device_context.device, &qpci,
+                                              nullptr, &query_pool);
         return query_pool;
     }();
 
@@ -21,34 +26,32 @@ TimestampPool::block TimestampPool::allocate() {
         std::views::iota(0u, this->TIMESTAMP_QUERY_POOL_SIZE / 2) |
         std::views::transform([](const std::uint64_t& i) { return 2 * i; });
 
-    const auto available_keys = std::make_shared<available_query_indicies_t>(
+    auto available_indices = std::make_unique<available_query_indicies_t>(
         available_query_indicies_t{std::begin(key_range), std::end(key_range)});
 
-    auto command_buffers = [this]() -> auto {
+    auto command_buffers = [&, this]() -> auto {
         auto command_buffers =
             std::vector<VkCommandBuffer>(this->TIMESTAMP_QUERY_POOL_SIZE);
 
         const auto cbai = VkCommandBufferAllocateInfo{
             .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
-            .commandPool = this->command_pool,
+            .commandPool = this->queue_context.command_pool,
             .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
             .commandBufferCount =
                 static_cast<std::uint32_t>(std::size(command_buffers)),
         };
-        vtable.AllocateCommandBuffers(device, &cbai,
-                                      std::data(command_buffers));
+        device_context.vtable.AllocateCommandBuffers(
+            device_context.device, &cbai, std::data(command_buffers));
         return std::make_unique<std::vector<VkCommandBuffer>>(command_buffers);
     }();
 
-    return block{.query_pool = query_pool,
-                 .available_indicies = available_keys,
+    return Block{.query_pool = query_pool,
+                 .available_indicies = std::move(available_indices),
                  .command_buffers = std::move(command_buffers)};
 }
 
-TimestampPool::TimestampPool(const VkDevice& device,
-                             const VkuDeviceDispatchTable& vtable,
-                             const VkCommandPool& command_pool)
-    : device(device), vtable(vtable), command_pool(command_pool) {
+TimestampPool::TimestampPool(QueueContext& queue_context)
+    : queue_context(queue_context) {
 
     // Allocate one block on construction, it's likely more than enough!
     this->blocks.emplace_back(this->allocate());
@@ -69,11 +72,11 @@ std::unique_ptr<TimestampPool::Handle> TimestampPool::acquire() {
     }();
 
     const auto query_pool = vacant_iter->query_pool;
-    auto& available_indices = vacant_iter->available_indicies;
+    auto& available_indices = *vacant_iter->available_indicies;
 
     // Grab any element from our set and erase it immediately after.
-    const auto query_index = *std::begin(*available_indices);
-    available_indices->erase(std::begin(*available_indices));
+    const auto query_index = *std::begin(available_indices);
+    available_indices.erase(std::begin(available_indices));
 
     const auto command_buffers = [&]() -> auto {
         auto command_buffers = std::array<VkCommandBuffer, 2>{};
@@ -91,8 +94,7 @@ std::unique_ptr<TimestampPool::Handle> TimestampPool::acquire() {
 }
 
 TimestampPool::Handle::Handle(
-    const std::weak_ptr<TimestampPool::available_query_indicies_t>&
-        index_origin,
+    TimestampPool::available_query_indicies_t& index_origin,
     const std::size_t block_index, const VkQueryPool& query_pool,
     const std::uint64_t query_index,
     const std::array<VkCommandBuffer, 2>& command_buffers)
@@ -101,10 +103,7 @@ TimestampPool::Handle::Handle(
       command_buffers(command_buffers) {}
 
 TimestampPool::Handle::~Handle() {
-    if (const auto origin = this->index_origin.lock(); origin) {
-        assert(!origin->contains(this->query_index));
-        origin->insert(this->query_index);
-    }
+    this->index_origin.insert(this->query_index);
 }
 
 void TimestampPool::Handle::setup_command_buffers(
@@ -136,6 +135,8 @@ void TimestampPool::poll() {
     this->cached_timestamps.clear();
     this->cached_timestamps.reserve(std::size(this->blocks));
 
+    const auto& device_context = this->queue_context.device_context;
+
     std::ranges::transform(
         this->blocks, std::back_inserter(this->cached_timestamps),
         [&, this](const auto& block) {
@@ -144,14 +145,15 @@ void TimestampPool::poll() {
             auto timestamps = std::make_unique<std::vector<std::uint64_t>>(
                 this->TIMESTAMP_QUERY_POOL_SIZE);
 
-            const auto result = vtable.GetQueryPoolResults(
-                this->device, query_pool, 0, this->TIMESTAMP_QUERY_POOL_SIZE,
+            const auto result = device_context.vtable.GetQueryPoolResults(
+                device_context.device, query_pool, 0,
+                this->TIMESTAMP_QUERY_POOL_SIZE,
                 this->TIMESTAMP_QUERY_POOL_SIZE * sizeof(std::uint64_t),
                 std::data(*timestamps), sizeof(uint64_t),
                 VK_QUERY_RESULT_64_BIT);
 
-            // Might return not ready when any of them aren't ready, which is
-            // not an error for our use case.
+            // Might return not ready when any of them aren't ready, which
+            // is not an error for our use case.
             assert(result == VK_SUCCESS || result == VK_NOT_READY);
 
             return timestamps;
@@ -169,4 +171,16 @@ std::uint64_t TimestampPool::get_polled(const Handle& handle) {
     return handle.query_index;
 }
 
+TimestampPool::~TimestampPool() {
+    const auto& device = this->queue_context.device_context.device;
+    const auto& vtable = this->queue_context.device_context.vtable;
+
+    for (const auto& block : this->blocks) {
+        vtable.FreeCommandBuffers(device, this->queue_context.command_pool,
+                                  std::size(*block.command_buffers),
+                                  std::data(*block.command_buffers));
+        vtable.DestroyQueryPool(device, block.query_pool, nullptr);
+    }
+}
+
 } // namespace low_latency
 \ No newline at end of file
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index 7efa4ee..82c4721 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -43,25 +43,25 @@
 
 namespace low_latency {
 
+class QueueContext;
+
 class TimestampPool final {
   private:
     static constexpr auto TIMESTAMP_QUERY_POOL_SIZE = 512u;
     static_assert(TIMESTAMP_QUERY_POOL_SIZE % 2 == 0);
 
   private:
-    VkuDeviceDispatchTable vtable;
-    VkDevice device;
-    VkCommandPool command_pool;
+    QueueContext& queue_context;
 
     // VkQueryPool with an unordered set of keys available for reading.
     using available_query_indicies_t = std::unordered_set<std::uint64_t>;
 
-    struct block {
+    struct Block {
         VkQueryPool query_pool;
-        std::shared_ptr<available_query_indicies_t> available_indicies;
+        std::unique_ptr<available_query_indicies_t> available_indicies;
         std::unique_ptr<std::vector<VkCommandBuffer>> command_buffers;
     };
-    std::vector<block> blocks; // multiple blocks
+    std::vector<Block> blocks; // multiple blocks
 
     // A snapshot of all available blocks for reading after each poll.
     std::vector<std::unique_ptr<std::vector<std::uint64_t>>> cached_timestamps;
@@ -74,7 +74,7 @@ class TimestampPool final {
         friend class TimestampPool;
 
       private:
-        std::weak_ptr<available_query_indicies_t> index_origin;
+        available_query_indicies_t& index_origin;
         std::size_t block_index;
 
       public:
@@ -83,8 +83,7 @@ class TimestampPool final {
         std::array<VkCommandBuffer, 2> command_buffers;
 
       public:
-        Handle(const std::weak_ptr<TimestampPool::available_query_indicies_t>&
-                   index_origin,
+        Handle(TimestampPool::available_query_indicies_t& index_origin,
                const std::size_t block_index, const VkQueryPool& query_pool,
                const std::uint64_t query_index,
                const std::array<VkCommandBuffer, 2>& command_buffers);
@@ -99,15 +98,15 @@ class TimestampPool final {
     };
 
   private:
-    block allocate();
+    Block allocate();
 
   public:
-    TimestampPool(const VkDevice& device, const VkuDeviceDispatchTable& vtable,
-                  const VkCommandPool& command_pool);
+    TimestampPool(QueueContext& queue_context);
     TimestampPool(const TimestampPool&) = delete;
     TimestampPool(TimestampPool&&) = delete;
     TimestampPool operator==(const TimestampPool&) = delete;
     TimestampPool operator==(TimestampPool&&) = delete;
+    ~TimestampPool();
 
   public:
     // Hands out a Handle with a pool and index of two uint64_t's.