From 8f4501215c0dbbbde59da2d015fdec3dbe5131bc Mon Sep 17 00:00:00 2001
From: Nicolas James <Eele1Ephe7uZahRie@tutanota.com>
Date: Fri, 13 Feb 2026 18:21:04 +1100
Subject: add working frame tracking and commit before i break everything

---
 src/device_context.cc          |  61 ++++++++-
 src/device_context.hh          |  39 +++++-
 src/layer.cc                   |  58 ++++----
 src/physical_device_context.cc |  10 +-
 src/physical_device_context.hh |   2 +
 src/queue_context.cc           | 296 ++++++++++++++++++++++++++++++++++++++++-
 src/queue_context.hh           |  46 ++++++-
 src/timestamp_pool.cc          |  12 +-
 src/timestamp_pool.hh          |   6 +-
 9 files changed, 481 insertions(+), 49 deletions(-)
diff --git a/src/device_context.cc b/src/device_context.cc
index 5f5c1f7..4b39210 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -6,11 +6,12 @@
 namespace low_latency {
 
 DeviceContext::DeviceContext(InstanceContext& parent_instance,
+                             PhysicalDeviceContext& parent_physical_device,
                              const VkDevice& device,
                              const PFN_vkSetDeviceLoaderData& sdld,
                              VkuDeviceDispatchTable&& vtable)
-    : instance(parent_instance), device(device), sdld(sdld),
-      vtable(std::move(vtable)) {}
+    : instance(parent_instance), physical_device(parent_physical_device),
+      device(device), sdld(sdld), vtable(std::move(vtable)), clock(*this) {}
 
 DeviceContext::~DeviceContext() {
     // We will let the destructor handle clearing here, but they should be
@@ -20,4 +21,60 @@ DeviceContext::~DeviceContext() {
     }
 }
 
+void DeviceContext::notify_acquire(const VkSwapchainKHR& swapchain,
+                                   const std::uint32_t& image_index,
+                                   const VkSemaphore& signal_semaphore) {
+
+    const auto it = this->swapchain_signals.try_emplace(swapchain).first;
+
+    // Doesn't matter if it was already there, overwrite it.
+    it->second.insert_or_assign(image_index, signal_semaphore);
+}
+
+DeviceContext::Clock::Clock(const DeviceContext& context) {
+
+    const auto infos = std::vector<VkCalibratedTimestampInfoKHR>{
+        {VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr,
+         VK_TIME_DOMAIN_DEVICE_EXT},
+        {VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr,
+         VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT}};
+
+    auto device_host = std::array<std::uint64_t, 2>{};
+
+    const auto steady_before = std::chrono::steady_clock::now();
+    context.vtable.GetCalibratedTimestampsKHR(
+        context.device, 2, std::data(infos), std::data(device_host),
+        &this->error_bound);
+    const auto steady_after = std::chrono::steady_clock::now();
+
+    this->cpu_time = steady_before + (steady_after - steady_before) / 2;
+    this->device_ticks = device_host[0];
+    this->host_ns = device_host[1];
+
+    // Might need to get physical limits again?
+    this->ticks_per_ns =
+        context.physical_device.properties->limits.timestampPeriod;
+}
+
+DeviceContext::Clock::time_point_t
+DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const {
+    /*
+    struct timespec tv;
+    clock_gettime(CLOCK_MONOTONIC, &tv);
+    return tv.tv_nsec + tv.tv_sec*1000000000ull;
+    */
+
+    auto a = this->device_ticks;
+    auto b = ticks;
+
+    const auto was_before = a > b;
+    if (was_before) { // it's happened before
+        std::swap(a, b);
+    }
+    const auto nsec = std::chrono::nanoseconds((b - a) * this->ticks_per_ns);
+    return this->cpu_time + (was_before ? -nsec : nsec);
+}
+
+void DeviceContext::calibrate_timestamps() { this->clock = Clock{*this}; }
+
 } // namespace low_latency
\ No newline at end of file
diff --git a/src/device_context.hh b/src/device_context.hh
index 3406da1..b55b70c 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -1,36 +1,69 @@
 #ifndef DEVICE_CONTEXT_HH_
 #define DEVICE_CONTEXT_HH_
 
+#include <chrono>
 #include <memory>
 #include <unordered_map>
 
 #include <vulkan/utility/vk_dispatch_table.h>
 #include <vulkan/vk_layer.h>
 #include <vulkan/vulkan.hpp>
+#include <vulkan/vulkan_core.h>
 
 #include "context.hh"
 #include "instance_context.hh"
+#include "physical_device_context.hh"
 
 namespace low_latency {
 
 class QueueContext;
 
 struct DeviceContext final : public Context {
+  public:
     InstanceContext& instance;
+    PhysicalDeviceContext& physical_device;
 
     const VkDevice device;
     const VkuDeviceDispatchTable vtable;
-
     // Do we need to use this unless we wrap dispatchable objects?
     const PFN_vkSetDeviceLoaderData sdld;
 
     std::unordered_map<VkQueue, std::shared_ptr<QueueContext>> queues;
 
+    // We map swapchains to image indexes and their last signalled semaphore.
+    using index_semaphores_t = std::unordered_map<std::uint32_t, VkSemaphore>;
+    std::unordered_map<VkSwapchainKHR, index_semaphores_t> swapchain_signals;
+
+    struct Clock {
+        using time_point_t = std::chrono::steady_clock::time_point;
+
+        time_point_t cpu_time;
+        std::uint64_t error_bound;
+        std::uint64_t device_ticks;
+        std::uint64_t host_ns;
+        std::uint64_t ticks_per_ns;
+
+      public:
+        Clock(const DeviceContext& device);
+        
+        time_point_t ticks_to_time(const std::uint64_t& ticks) const;
+    };
+    Clock clock;
+
   public:
-    DeviceContext(InstanceContext& parent_instance, const VkDevice& device,
-                  const PFN_vkSetDeviceLoaderData& sdld,
+    DeviceContext(InstanceContext& parent_instance,
+                  PhysicalDeviceContext& parent_physical,
+                  const VkDevice& device, const PFN_vkSetDeviceLoaderData& sdld,
                   VkuDeviceDispatchTable&& vtable);
     virtual ~DeviceContext();
+
+  public:
+    void notify_acquire(const VkSwapchainKHR& swapchain,
+                        const std::uint32_t& image_index,
+                        const VkSemaphore& signal_semaphore);
+
+  public:
+    void calibrate_timestamps();
 };
 
 }; // namespace low_latency
diff --git a/src/layer.cc b/src/layer.cc
index cead7cd..c521bb9 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -1,13 +1,10 @@
 #include "layer.hh"
 
-#include <iostream>
 #include <string_view>
+#include <thread>
 #include <unordered_map>
 #include <utility>
 
-// hack
-#include <deque>
-
 #include <vulkan/utility/vk_dispatch_table.h>
 #include <vulkan/vk_layer.h>
 #include <vulkan/vk_platform.h>
@@ -90,6 +87,7 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
     auto vtable = VkuInstanceDispatchTable{
         INSTANCE_VTABLE_LOAD(DestroyInstance),
         INSTANCE_VTABLE_LOAD(EnumeratePhysicalDevices),
+        INSTANCE_VTABLE_LOAD(GetPhysicalDeviceProperties),
         INSTANCE_VTABLE_LOAD(GetInstanceProcAddr),
         INSTANCE_VTABLE_LOAD(CreateDevice),
         INSTANCE_VTABLE_LOAD(EnumerateDeviceExtensionProperties),
@@ -307,16 +305,20 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
         DEVICE_VTABLE_LOAD(GetSemaphoreCounterValueKHR),
         DEVICE_VTABLE_LOAD(CmdWriteTimestamp2KHR),
         DEVICE_VTABLE_LOAD(QueueSubmit2KHR),
+        DEVICE_VTABLE_LOAD(GetCalibratedTimestampsKHR),
     };
 #undef DEVICE_VTABLE_LOAD
 
+    const auto physical_context = layer_context.get_context(physical_device);
+
     const auto key = layer_context.get_key(*pDevice);
     const auto lock = std::scoped_lock{layer_context.mutex};
     assert(!layer_context.contexts.contains(key));
 
     layer_context.contexts.try_emplace(
-        key, std::make_shared<DeviceContext>(instance_context, *pDevice, sdld,
-                                             std::move(vtable)));
+        key,
+        std::make_shared<DeviceContext>(instance_context, *physical_context,
+                                        *pDevice, sdld, std::move(vtable)));
 
     return VK_SUCCESS;
 }
@@ -415,6 +417,8 @@ static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR(
         return result;
     }
 
+    context->notify_acquire(swapchain, *pImageIndex, semaphore);
+
     return VK_SUCCESS;
 }
 
@@ -430,6 +434,9 @@ static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR(
         return result;
     }
 
+    context->notify_acquire(pAcquireInfo->swapchain, *pImageIndex,
+                            pAcquireInfo->semaphore);
+
     return VK_SUCCESS;
 }
 
@@ -465,7 +472,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
     next_submit_infos[0].pCommandBuffers = std::data(next_command_buffers);
     next_submit_infos[0].commandBufferCount = std::size(next_command_buffers);
 
-    const auto next_signal = queue_context->semaphore_sequence + 1;
+    const auto next_signal = 1 + queue_context->semaphore_sequence++;
     const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{
         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
         .signalSemaphoreValueCount = 1,
@@ -488,13 +495,8 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
         return res;
     }
 
-    // Hack for now, store timestamp handles.
-    queue_context->handle_hack.push_front(std::move(timestamp_handle));
-    if (std::size(queue_context->handle_hack) > 250) {
-        queue_context->handle_hack.pop_back();
-    }
-
-    ++queue_context->semaphore_sequence;
+    queue_context->notify_submit(std::span{submit_info, submit_count},
+                                 next_signal, std::move(timestamp_handle));
 
     return VK_SUCCESS;
 }
@@ -534,10 +536,12 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
     next_submit_infos[0].commandBufferInfoCount =
         std::size(next_command_buffers);
 
+    const auto target_semaphore_sequence =
+        1 + queue_context->semaphore_sequence++;
     const auto tail_ssi = VkSemaphoreSubmitInfo{
         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
         .semaphore = queue_context->semaphore,
-        .value = queue_context->semaphore_sequence + 1,
+        .value = target_semaphore_sequence,
         .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
     };
     const auto tail_cbsi = VkCommandBufferSubmitInfo{
@@ -559,13 +563,9 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
         return res;
     }
 
-    // hack
-    queue_context->handle_hack.push_front(std::move(timestamp_handle));
-    if (std::size(queue_context->handle_hack) > 250) {
-        queue_context->handle_hack.pop_back();
-    }
-
-    ++queue_context->semaphore_sequence;
+    queue_context->notify_submit({submit_infos, submit_count},
+                                 target_semaphore_sequence,
+                                 std::move(timestamp_handle));
 
     return VK_SUCCESS;
 }
@@ -580,8 +580,8 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count,
 static VKAPI_ATTR VkResult VKAPI_CALL
 vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
 
-    const auto& vtable =
-        layer_context.get_context(queue)->device_context.vtable;
+    const auto queue_context = layer_context.get_context(queue);
+    const auto& vtable = queue_context->device_context.vtable;
 
     if (const auto res = vtable.QueuePresentKHR(queue, present_info);
         res != VK_SUCCESS) {
@@ -589,6 +589,16 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
         return res;
     }
 
+    if (present_info) { // might not be needed
+        queue_context->notify_present(*present_info);
+    }
+
+    if (const auto sleep_time = queue_context->get_delay_time();
+        sleep_time.has_value()) {
+
+        std::this_thread::sleep_for(*sleep_time);
+    }
+
     return VK_SUCCESS;
 }
 
diff --git a/src/physical_device_context.cc b/src/physical_device_context.cc
index 105b840..d265c9d 100644
--- a/src/physical_device_context.cc
+++ b/src/physical_device_context.cc
@@ -1,10 +1,16 @@
 #include "physical_device_context.hh"
 
 namespace low_latency {
-
+    
 PhysicalDeviceContext::PhysicalDeviceContext(
     InstanceContext& instance_context, const VkPhysicalDevice& physical_device)
-    : instance(instance_context), physical_device(physical_device) {}
+    : instance(instance_context), physical_device(physical_device) {
+
+    auto props = VkPhysicalDeviceProperties{};
+    instance.vtable.GetPhysicalDeviceProperties(this->physical_device, &props);
+    this->properties =
+        std::make_unique<VkPhysicalDeviceProperties>(std::move(props));
+}
 
 PhysicalDeviceContext::~PhysicalDeviceContext() {}
 
diff --git a/src/physical_device_context.hh b/src/physical_device_context.hh
index 639fa0f..8eb4a1a 100644
--- a/src/physical_device_context.hh
+++ b/src/physical_device_context.hh
@@ -14,6 +14,8 @@ class PhysicalDeviceContext final : public Context {
     InstanceContext& instance;
 
     const VkPhysicalDevice physical_device;
+    
+    std::unique_ptr<VkPhysicalDeviceProperties> properties;
 
   public:
     PhysicalDeviceContext(InstanceContext& instance_context,
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 930b0c5..9b46773 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -2,6 +2,9 @@
 #include "device_context.hh"
 #include "timestamp_pool.hh"
 
+#include <chrono>
+#include <iostream>
+
 namespace low_latency {
 
 static VkCommandPool
@@ -51,12 +54,9 @@ QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue,
       timestamp_pool(std::make_unique<TimestampPool>(*this)) {}
 
 QueueContext::~QueueContext() {
-    
-    // nuke our handles, so we avoid segfaults for now
-    this->handle_hack.clear();
-    
-    // Ugly - destructors of timestamp_pool should be called before we destroy
-    // our vulkan objects.
+
+    this->in_flight_frames.clear();
+    this->submissions.clear();
     this->timestamp_pool.reset();
 
     const auto& vtable = this->device_context.vtable;
@@ -66,4 +66,288 @@ QueueContext::~QueueContext() {
                               nullptr);
 }
 
+void QueueContext::notify_submit(
+    std::span<const VkSubmitInfo> infos,
+    const std::uint64_t target_semaphore_sequence,
+    std::shared_ptr<TimestampPool::Handle>&& handle) {
+
+    // This has an issue where we're collecting all signals and waits and
+    // treating a single submit call as finishing
+
+    auto signals = std::unordered_set<VkSemaphore>{};
+    auto waits = std::unordered_set<VkSemaphore>{};
+    for (const auto& info : infos) {
+        std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
+                            std::inserter(waits, std::end(waits)));
+        std::ranges::copy_n(info.pSignalSemaphores, info.signalSemaphoreCount,
+                            std::inserter(signals, std::end(signals)));
+    }
+
+    this->submissions.emplace_back(std::make_unique<Submission>(
+        std::move(signals), std::move(waits), target_semaphore_sequence,
+        std::move(handle)));
+
+    // TODO HACK
+    if (std::size(this->submissions) > 100) {
+        this->submissions.pop_front();
+    }
+}
+
+void QueueContext::notify_submit(
+    std::span<const VkSubmitInfo2> infos,
+    const std::uint64_t target_semaphore_sequence,
+    std::shared_ptr<TimestampPool::Handle>&& handle) {
+
+    auto signals = std::unordered_set<VkSemaphore>{};
+    auto waits = std::unordered_set<VkSemaphore>{};
+    for (const auto& info : infos) {
+        constexpr auto get_semaphore = [](const auto& semaphore_info) {
+            return semaphore_info.semaphore;
+        };
+        std::ranges::transform(info.pSignalSemaphoreInfos,
+                               std::next(info.pSignalSemaphoreInfos,
+                                         info.signalSemaphoreInfoCount),
+                               std::inserter(signals, std::end(signals)),
+                               get_semaphore);
+        std::ranges::transform(
+            info.pWaitSemaphoreInfos,
+            std::next(info.pWaitSemaphoreInfos, info.waitSemaphoreInfoCount),
+            std::inserter(waits, std::end(waits)), get_semaphore);
+    }
+
+    this->submissions.emplace_back(std::make_unique<Submission>(
+        std::move(signals), std::move(waits), target_semaphore_sequence,
+        std::move(handle)));
+
+    // TODO HACK
+    if (std::size(this->submissions) > 100) {
+        this->submissions.pop_front();
+    }
+}
+
+void QueueContext::notify_present(const VkPresentInfoKHR& info) {
+
+    auto frame = [&]() -> std::unique_ptr<Frame> {
+        const auto waits = [&]() {
+            auto waits = std::unordered_set<VkSemaphore>{};
+            std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
+                                std::inserter(waits, std::end(waits)));
+            return waits;
+        }();
+
+        const auto wait_semaphores = std::unordered_set<VkSemaphore>{
+            info.pWaitSemaphores,
+            std::next(info.pWaitSemaphores, info.waitSemaphoreCount)};
+
+        auto collected_semaphores = std::unordered_set<VkSemaphore>{};
+        for (auto i = std::uint32_t{0}; i < info.swapchainCount; ++i) {
+            const auto& swapchain = info.pSwapchains[i];
+            const auto& index = info.pImageIndices[i];
+
+            // Shouldn't be possible to present to a swapchain that wasn't
+            // waited in
+
+            const auto& signals = this->device_context.swapchain_signals;
+            const auto swapchain_it = signals.find(swapchain);
+            assert(swapchain_it != std::end(signals));
+            const auto index_it = swapchain_it->second.find(index);
+            assert(index_it != std::end(swapchain_it->second));
+
+            const auto semaphore = index_it->second;
+            collected_semaphores.emplace(index_it->second);
+        }
+
+        const auto start_submission_it = std::ranges::find_if(
+            std::rbegin(this->submissions), std::rend(this->submissions),
+            [&](const auto& submission) {
+                return std::ranges::any_of(
+                    submission->waits, [&](const auto& wait) {
+                        return collected_semaphores.contains(wait);
+                    });
+            });
+
+        if (start_submission_it == std::rend(this->submissions)) {
+            std::cout << "couldn't find starting submission!\n";
+            return nullptr;
+        }
+        const auto& start_submission = *start_submission_it;
+
+        const auto end_submission_it = std::ranges::find_if(
+            std::rbegin(this->submissions), std::rend(this->submissions),
+            [&](const auto& submission) {
+                return std::ranges::any_of(
+                    submission->signals, [&](const auto& signal) {
+                        return wait_semaphores.contains(signal);
+                    });
+            });
+
+        if (end_submission_it == std::rend(this->submissions)) {
+            std::cout << "couldn't find ending submission!\n";
+            return nullptr;
+        }
+        const auto& end_submission = *end_submission_it;
+
+        return std::make_unique<Frame>(Frame{
+            .start_context = *this,
+            .start = start_submission->timestamp_handle,
+            .target_start_sequence =
+                start_submission->target_semaphore_sequence,
+            .end_context = *this,
+            .end = start_submission->timestamp_handle,
+            .target_end_sequence = start_submission->target_semaphore_sequence,
+        });
+    }();
+
+    this->in_flight_frames.emplace_back(std::move(frame));
+    
+    // hack
+    if (this->in_flight_frames.size() > 5) {
+        this->in_flight_frames.pop_front();
+    }
+}
+
+// now it's all coming together
+std::optional<QueueContext::duration_t> QueueContext::get_delay_time() {
+    if (!std::size(this->in_flight_frames)) {
+        return std::nullopt;
+    }
+
+    auto seq = std::uint64_t{};
+    this->device_context.vtable.GetSemaphoreCounterValueKHR(
+        this->device_context.device, this->semaphore, &seq);
+
+    // Get semaphore first, then poll!
+    this->timestamp_pool->poll();
+
+    // idk how frequently we should call this.
+    this->device_context.calibrate_timestamps();
+
+    static auto gpu_frametimes = std::deque<uint64_t>{};
+    static auto cpu_frametimes = std::deque<uint64_t>{};
+
+    const auto S = std::size(this->in_flight_frames);
+
+    std::cout << "\nSTART FRAME READOUT\n";
+    std::cout << "error bound: " << this->device_context.clock.error_bound
+              << '\n';
+    std::cout << "num frames in flight: " << S << '\n';
+    std::cout << "from oldest -> newest\n";
+
+    // const auto b_seq = semaphore_from_context(*this);
+    const auto now = std::chrono::steady_clock::now();
+
+    auto i = std::size_t{0};
+    for (; i < std::size(this->in_flight_frames); ++i) {
+        const auto& frame = this->in_flight_frames[i];
+        std::cout << "    Evaluating the frame that's " << S - i - 1
+                  << " behind\n";
+        if (!frame) {
+            std::cout << "        nullptr!\n";
+            continue;
+        }
+
+        std::cout << "    target start: " << frame->target_start_sequence << '\n';
+        std::cout << "    target end: " << frame->target_end_sequence << '\n';
+        if (seq < frame->target_start_sequence) {
+            std::cout << "        frame hasn't started yet!\n";
+            continue;
+        }
+
+        const auto start_ticks =
+            frame->start_context.timestamp_pool->get_polled(*frame->start);
+        std::cout << "        START TICKS: " << start_ticks << '\n';
+        const auto& a_clock = frame->start_context.device_context.clock;
+        const auto a = a_clock.ticks_to_time(start_ticks);
+        
+        {
+            using namespace std::chrono;
+            const auto diff = now - a;
+            const auto ms = duration_cast<milliseconds>(diff);
+            const auto us = duration_cast<microseconds>(diff - ms);
+            const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+            std::cout << "        frame started: " << ms << " ms " << us
+                      << " us " << ns << " ns ago\n";
+        }
+
+        if (seq < frame->target_end_sequence) {
+            std::cout << "        frame hasn't ended yet!\n";
+            continue;
+        }
+
+
+        const auto end_ticks =
+            frame->end_context.timestamp_pool->get_polled(*frame->end, true);
+        const auto& b_clock = frame->end_context.device_context.clock;
+        std::cout << "        END_TICKS: " << end_ticks << '\n';
+        const auto b = b_clock.ticks_to_time(end_ticks);
+        {
+            using namespace std::chrono;
+            if (now <= b) {
+                std::cout << "b happened before now?\n";
+            }
+            const auto diff = now - b;
+            const auto ms = duration_cast<milliseconds>(diff);
+            const auto us = duration_cast<microseconds>(diff - ms);
+            const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+            std::cout << "        frame ended: " << ms << " ms " << us
+                      << " us " << ns << " ns ago\n";
+        }
+
+        const auto gpu_time = b - a;
+        {
+            using namespace std::chrono;
+            const auto diff = gpu_time;
+            const auto ms = duration_cast<milliseconds>(diff);
+            const auto us = duration_cast<microseconds>(diff - ms);
+            const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+            std::cout << "        gpu_time: " << ms << " ms " << us
+                      << " us " << ns << " ns ago\n";
+        }
+
+        /*
+        cpu_frametimes.emplace_back(cpu_time);
+        gpu_frametimes.emplace_back(gpu_time);
+        */
+    }
+
+    /*
+    if (remove_index.has_value()) {
+        this->in_flight_frames.erase(std::begin(this->in_flight_frames),
+                                     std::begin(this->in_flight_frames) +
+                                         *remove_index);
+    }
+    */
+
+    /*
+    auto g_copy = gpu_frametimes;
+    auto c_copy = cpu_frametimes;
+    std::ranges::sort(g_copy);
+    std::ranges::sort(c_copy);
+
+    constexpr auto N = 49;
+    if (std::size(cpu_frametimes) < N) {
+        return std::nullopt;
+    }
+
+    const auto F = std::size(g_copy);
+    // close enough to median lol
+    const auto g = g_copy[F / 2];
+    const auto c = c_copy[F / 2];
+
+    std::cout << g << '\n';
+
+    std::cout << "    median gpu: " << (g / 1'000'000) << " ms " << g / 1'000
+              << " us " << g << " ns\n";
+    std::cout << "    median cpu: " << c / 1'000'000 << " ms " << c / 1'000
+              << " us " << c << " ns\n";
+
+    if (F > N) {
+        gpu_frametimes.pop_front();
+        cpu_frametimes.pop_front();
+    }
+    */
+
+    return std::nullopt;
+}
+
 } // namespace low_latency
\ No newline at end of file
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 184e31d..a6f43e5 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -7,8 +7,11 @@
 #include <vulkan/utility/vk_dispatch_table.h>
 #include <vulkan/vulkan.hpp>
 
-#include <memory>
+#include <chrono>
 #include <deque>
+#include <memory>
+#include <span>
+#include <unordered_set>
 
 namespace low_latency {
 
@@ -21,19 +24,56 @@ class QueueContext final : public Context {
     const VkQueue queue;
     const std::uint32_t queue_family_index;
 
-    // this is incremented and tied to our semaphore
     std::uint64_t semaphore_sequence = 0;
     VkSemaphore semaphore;
 
     VkCommandPool command_pool;
 
     std::unique_ptr<TimestampPool> timestamp_pool;
-    std::deque<std::unique_ptr<TimestampPool::Handle>> handle_hack;
+
+    // Potentially in flight queue submissions
+    struct Submission {
+        const std::unordered_set<VkSemaphore> signals;
+        const std::unordered_set<VkSemaphore> waits;
+        const std::uint64_t target_semaphore_sequence;
+        const std::shared_ptr<TimestampPool::Handle> timestamp_handle;
+    };
+    std::deque<std::shared_ptr<Submission>> submissions;
+
+    // In flight frames!
+    // These might come from different contexts.
+    struct Frame {
+        const QueueContext& start_context;
+        const std::shared_ptr<TimestampPool::Handle> start;
+        const std::uint64_t target_start_sequence;
+
+        const QueueContext& end_context;
+        const std::shared_ptr<TimestampPool::Handle> end;
+        const std::uint64_t target_end_sequence;
+    };
+    // These can be null, it means we made presented without finding the
+    // timestamps associated with the present.
+    std::deque<std::unique_ptr<Frame>> in_flight_frames;
 
   public:
     QueueContext(DeviceContext& device_context, const VkQueue& queue,
                  const std::uint32_t& queue_family_index);
     virtual ~QueueContext();
+
+  public:
+    void notify_submit(std::span<const VkSubmitInfo> infos,
+                       const std::uint64_t target_semaphore_sequence,
+                       std::shared_ptr<TimestampPool::Handle>&& handle);
+    void notify_submit(std::span<const VkSubmitInfo2> infos,
+                       const std::uint64_t target_semaphore_sequence,
+                       std::shared_ptr<TimestampPool::Handle>&& handle);
+
+    void notify_present(const VkPresentInfoKHR& info);
+
+  public:
+    // Computes the amount we should delay...
+    using duration_t = std::chrono::steady_clock::duration;
+    std::optional<duration_t> get_delay_time();
 };
 
 }; // namespace low_latency
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index e37dcd2..b4dc3c9 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -61,8 +61,8 @@ TimestampPool::TimestampPool(QueueContext& queue_context)
     this->blocks.emplace_back(this->allocate());
 }
 
-std::unique_ptr<TimestampPool::Handle> TimestampPool::acquire() {
-    const auto& vacant_iter = [this]() -> auto {
+std::shared_ptr<TimestampPool::Handle> TimestampPool::acquire() {
+    const auto vacant_iter = [this]() -> auto {
         const auto it =
             std::ranges::find_if(this->blocks, [](const auto& block) {
                 return std::size(*block.available_indicies);
@@ -93,7 +93,7 @@ std::unique_ptr<TimestampPool::Handle> TimestampPool::acquire() {
     const auto block_index = static_cast<std::size_t>(
         std::distance(std::begin(this->blocks), vacant_iter));
 
-    return std::make_unique<Handle>(available_indices, block_index, query_pool,
+    return std::make_shared<Handle>(available_indices, block_index, query_pool,
                                     query_index, command_buffers);
 }
 
@@ -164,15 +164,15 @@ void TimestampPool::poll() {
         });
 };
 
-std::uint64_t TimestampPool::get_polled(const Handle& handle) {
+std::uint64_t TimestampPool::get_polled(const Handle& handle, const bool hack) {
 
     assert(handle.block_index < std::size(this->cached_timestamps));
 
     const auto& cached_timestamp = this->cached_timestamps[handle.block_index];
     assert(cached_timestamp != nullptr);
-    assert(std::size(*cached_timestamp) < handle.query_index);
+    assert(handle.query_index < std::size(*cached_timestamp));
 
-    return handle.query_index;
+    return (*cached_timestamp)[handle.query_index + hack];
 }
 
 TimestampPool::~TimestampPool() {
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index cc67b18..a4aa429 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -67,7 +67,7 @@ class TimestampPool final {
     std::vector<std::unique_ptr<std::vector<std::uint64_t>>> cached_timestamps;
 
   public:
-    // A handle represents two std::uint64_t blocks of timestamp memory and two
+    // A handle represents two std::uint64_t blocks oftimestamp memory and two
     // command buffers.
     struct Handle final {
       private:
@@ -110,11 +110,11 @@ class TimestampPool final {
 
   public:
     // Hands out a Handle with a pool and index of two uint64_t's.
-    std::unique_ptr<Handle> acquire();
+    std::shared_ptr<Handle> acquire();
 
     void poll(); // saves the current state for future get's.
 
-    std::uint64_t get_polled(const Handle& handle);
+    std::uint64_t get_polled(const Handle& handle, const bool hack = false);
 };
 
 } // namespace low_latency
-- 
cgit v1.2.3