4 files changed, 31 insertions, 28 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
index b8d3482..67af3aa 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -53,14 +53,6 @@ void DeviceContext::Clock::calibrate() {
     };
     auto calibrated_result = CalibratedResult{};
 
-    // we probably want to use this instead bc clock_gettime isn't guaranteed
-    // by steady clock afaik
-    /*
-    struct timespec tv;
-    clock_gettime(CLOCK_MONOTONIC, &tv);
-    return tv.tv_nsec + tv.tv_sec*1000000000ull;
-    */
-
     const auto steady_before = std::chrono::steady_clock::now();
     device.vtable.GetCalibratedTimestampsKHR(device.device, 2, std::data(infos),
                                              &calibrated_result.device,
diff --git a/src/layer.cc b/src/layer.cc
index 24ee72e..e2777b6 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -600,14 +600,6 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
         queue_context->notify_present(*present_info);
     }
 
-    const auto debug_log_time = [](const auto& diff) {
-        using namespace std::chrono;
-        const auto ms = duration_cast<milliseconds>(diff);
-        const auto us = duration_cast<microseconds>(diff - ms);
-        const auto ns = duration_cast<nanoseconds>(diff - ms - us);
-        std::cerr << ms << " " << us << " " << ns << "\n";
-    };
-
     queue_context->sleep_in_present();
 
     return VK_SUCCESS;
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 342f393..4c8f776 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -319,14 +319,23 @@ void QueueContext::process_frames() {
 
         const auto frametime = *b - *a;
 
+        const auto cpu_time =
+            [&]() -> DeviceContext::Clock::time_point_t::duration {
+            const auto latest_iter = std::rbegin(this->timings);
+            if (latest_iter == std::rend(this->timings)) {
+                return DeviceContext::Clock::time_point_t::duration::zero();
+            }
+            return *a - (*latest_iter)->gpu_end;
+        }();
+
         std::cerr
             << "        calculated total time from last frame (frametime): ";
         debug_log_time(*b - *a);
 
         this->timings.emplace_back(std::make_unique<Timing>(
-            Timing{.gpu_start = *a,
-                   .gpu_end = *b,
+            Timing{.gpu_end = *b,
                    .gpu_time = frametime,
+                   .cpu_time = cpu_time,
                    .frame = std::move(this->in_flight_frames.front())}));
         this->in_flight_frames.pop_front();
     }
@@ -393,12 +402,25 @@ void QueueContext::sleep_in_present() {
         // return vect[0]->frametime;
         return vect[std::size(vect) / 2]->gpu_time;
     }();
+
+    const auto expected_cputime = [&, this]() {
+        auto vect = std::vector<Timing*>{};
+        std::ranges::transform(this->timings, std::back_inserter(vect),
+                               [](const auto& timing) { return timing.get(); });
+        std::ranges::sort(vect, [](const auto& a, const auto& b) {
+            return a->gpu_time < b->gpu_time;
+        });
+        // return vect[0]->frametime;
+        return vect[std::size(vect) / 2]->gpu_time;
+    }();
     std::cerr << "    expected gputime: ";
     debug_log_time(expected_gputime);
+    std::cerr << "    expected cputime: ";
+    debug_log_time(expected_cputime);
 
-    //                                                 PRESENT CALL
-    // |-------------------------|---------------------------|
-    // ^a                        ^swap_acquire               ^b
+    //                               PRESENT CALL
+    // |--------------|-------------------|----------------|
+    // a        swap_acquire              b                c
     //
     // Us, the CPU on the host, is approximately at 'b'.
     // We have a good guess for the distance between
@@ -432,7 +454,7 @@ void QueueContext::sleep_in_present() {
 
     const auto now = std::chrono::steady_clock::now();
     const auto dist = now - a;
-    const auto expected = expected_gputime - dist;
+    const auto expected = expected_gputime - dist - expected_cputime;
 
     const auto swi = VkSemaphoreWaitInfo{
         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
@@ -440,7 +462,8 @@ void QueueContext::sleep_in_present() {
         .pSemaphores = &this->semaphore,
         .pValues = &frame->end.sequence,
     };
-    vtable.WaitSemaphoresKHR(device.device, &swi, std::max(expected.count(), 0l));
+    vtable.WaitSemaphoresKHR(device.device, &swi,
+                             std::max(expected.count(), 0l));
 
     /*
 
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 356de33..172683d 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -22,8 +22,6 @@ class QueueContext final : public Context {
     const VkQueue queue;
     const std::uint32_t queue_family_index;
 
-    // I used to use these to signal when we could read timestamps until
-    // I realised you could use hostQueryReset.
     std::uint64_t semaphore_sequence = 0;
     VkSemaphore semaphore;
 
@@ -64,11 +62,9 @@ class QueueContext final : public Context {
     std::deque<std::unique_ptr<Frame>> in_flight_frames;
 
     struct Timing {
-        DeviceContext::Clock::time_point_t gpu_start;
         DeviceContext::Clock::time_point_t gpu_end;
 
-        DeviceContext::Clock::time_point_t::duration cpu_time;
-        DeviceContext::Clock::time_point_t::duration gpu_time;
+        DeviceContext::Clock::time_point_t::duration gpu_time, cpu_time;
         
         std::unique_ptr<Frame> frame;
     };