1 files changed, 31 insertions, 65 deletions
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 221626f..48500e1 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -2,33 +2,23 @@
 #define QUEUE_STATE_HH_
 
 #include "context.hh"
-#include "device_context.hh"
+#include "device_clock.hh"
 #include "timestamp_pool.hh"
 
 #include <vulkan/utility/vk_dispatch_table.h>
 #include <vulkan/vulkan.hpp>
 
-#include <chrono>
 #include <deque>
 #include <memory>
-#include <unordered_set>
+#include <unordered_map>
 
 namespace low_latency {
 
 class QueueContext final : public Context {
   private:
-    // The amount of finished frame timing data we keep before eviction.
-    // For now, this value is also the number of data points used in the
-    // calculation of gpu timing information.
-    static constexpr auto MAX_TRACKED_TIMINGS = 50u;
     // The amount of queue submissions we allow tracked per queue before
-    // we give up tracking them. For a queue that is presented to,
-    // these submissions will be constantly moved to Frame structs so
-    // it's not an issue that we only track so many - unless it just
-    // happens that an application makes an unexpectedly large
-    // amount of vkQueueSubmit's per frame. For queues which don't
-    // present, this limit stops them from growing limitlessly in memory
-    // as we may not necessarily manually evict them yet.
+    // we give up tracking them. This is neccessary for queues which do not
+    // present anything.
     static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u;
 
   public:
@@ -59,55 +49,35 @@ class QueueContext final : public Context {
 
     // NVIDIA's extension lets the application explicitly state that this queue
     // does not contribute to the frame. AMD's extension has no such mechanism -
-    // so this will always be false.
+    // so this will always be false when using VK_AMD_anti_lag.
     bool should_ignore_latency = false;
 
   public:
-    // Potentially in flight queue submissions that come from this queue.
+    // I want our queue bookkeeping to be fairly simple and do one thing - track
+    // submissions that have yet to have been presented to a swapchain. General
+    // idea:
+    //
+    // For each vkQueueSubmit (specifically for each pSubmitInfo in that
+    // hook) grab the VK_EXT_present_id value provided by the application for
+    // that submission. Once we add our timing objects as part of the hook, we
+    // then take those timing objects, bundle them into a Submission struct, and
+    // append it to the (potentially currently nonexistent) mapping of
+    // present_id's to deque<Submission>'s. Now we cleanly track what queue
+    // submissions refer to what present_id.
+    //
+    // When our hook sees a VkQueuePresentKHR, we take the provided present_id
+    // and notify our device that it needs to watch for when this completes.
+    // We give it our submission. Now, it's out of our hands. We remove the
+    // present_id_t mapping when doing so.
     struct Submission {
-        const std::unordered_set<VkSemaphore> signals;
-        const std::unordered_set<VkSemaphore> waits;
-
-        const std::shared_ptr<TimestampPool::Handle> start_handle;
-        const std::shared_ptr<TimestampPool::Handle> end_handle;
-
-        const DeviceContext::Clock::time_point_t enqueued_time;
-    };
-    using submission_ptr_t = std::shared_ptr<Submission>;
-    std::deque<submission_ptr_t> submissions;
-
-    // In flight frame submissions grouped together.
-    // The first element in the deque refers to the first submission that
-    // contributed to that frame. The last element is the last submission before
-    // present was called.
-    // std::size(submissions) >= 1 btw
-    struct Frame {
-        std::deque<submission_ptr_t> submissions;
-
-        // the point that control flow was returned from VkQueuePresentKHR back
-        // to the application.
-        DeviceContext::Clock::time_point_t cpu_post_present_time;
-    };
-    std::deque<Frame> in_flight_frames;
-
-    // Completed frames.
-    struct Timing {
-        DeviceContext::Clock::time_point_t::duration gputime, cputime;
-
-        Frame frame;
+        std::shared_ptr<TimestampPool::Handle> head_handle, tail_handle;
+        DeviceClock::time_point_t cpu_present_time;
     };
-    std::deque<std::unique_ptr<Timing>> timings;
 
-  private:
-    // Drains submissions and promotes them into a single frame object.
-    void drain_submissions_to_frame();
-
-    // Drains in flight frames and promotes them into a Timing object if they
-    // have completed.
-    void drain_frames_to_timings();
-
-    // Antilag 1 equivalent where we sleep after present to reduce queueing.
-    void sleep_in_present();
+    using submissions_t =
+        std::shared_ptr<std::deque<std::unique_ptr<Submission>>>;
+    using present_id_t = std::uint64_t;
+    std::unordered_map<present_id_t, submissions_t> unpresented_submissions;
 
   public:
     QueueContext(DeviceContext& device_context, const VkQueue& queue,
@@ -115,17 +85,13 @@ class QueueContext final : public Context {
     virtual ~QueueContext();
 
   public:
-    void notify_submit(const VkSubmitInfo& info,
-                       const std::shared_ptr<TimestampPool::Handle> head_handle,
-                       const std::shared_ptr<TimestampPool::Handle> tail_handle,
-                       const DeviceContext::Clock::time_point_t& now);
-
-    void notify_submit(const VkSubmitInfo2& info,
+    void notify_submit(const present_id_t& present_id,
                        const std::shared_ptr<TimestampPool::Handle> head_handle,
                        const std::shared_ptr<TimestampPool::Handle> tail_handle,
-                       const DeviceContext::Clock::time_point_t& now);
+                       const DeviceClock::time_point_t& now);
 
-    void notify_present(const VkPresentInfoKHR& info);
+    void notify_present(const VkSwapchainKHR& swapchain,
+                        const std::uint64_t& present_id);
 
   public:
     bool should_inject_timestamps() const;