Refactor storing submissions into FrameSpan class, reduce AntiLag thread contention

author: Nicolas James <nj3ahxac@gmail.com> 2026-04-08 00:56:40 +1000
committer: Nicolas James <nj3ahxac@gmail.com> 2026-04-08 00:56:40 +1000
commit: eb9719cc8b9a308654ccd2c3bce8a7047b6e2a1a (patch)
tree: 5e72b419d3dc900a35921be5e551b17552251769 /src/strategies
parent: 69764a869d99e9abd0fbe10c2773d3556d7f35e8 (diff)
9 files changed, 107 insertions, 108 deletions
diff --git a/src/strategies/anti_lag/device_strategy.cc b/src/strategies/anti_lag/device_strategy.cc
index b391371..2d1d9ad 100644
--- a/src/strategies/anti_lag/device_strategy.cc
+++ b/src/strategies/anti_lag/device_strategy.cc
@@ -15,7 +15,7 @@ AntiLagDeviceStrategy::AntiLagDeviceStrategy(DeviceContext& device)
 AntiLagDeviceStrategy::~AntiLagDeviceStrategy() {}
 
 void AntiLagDeviceStrategy::notify_update(const VkAntiLagDataAMD& data) {
-    const auto lock = std::scoped_lock{this->mutex};
+    auto lock = std::unique_lock{this->mutex};
 
     this->is_enabled = !(data.mode == VK_ANTI_LAG_MODE_OFF_AMD);
 
@@ -37,11 +37,14 @@ void AntiLagDeviceStrategy::notify_update(const VkAntiLagDataAMD& data) {
         this->frame_index.reset();
         return;
     }
-
     // If we're at the input stage, start marking submissions as relevant.
     this->frame_index.emplace(data.pPresentationInfo->frameIndex);
 
-    { // Input stage needs to wait for all queue submissions to complete.
+    lock.unlock();
+    // We need to collect all queue submission and wait on them in this thread.
+    // Input stage needs to wait for all queue submissions to complete.
+    const auto queue_frame_spans = [&]() -> auto {
+        auto queue_frame_spans = std::vector<std::unique_ptr<FrameSpan>>{};
         const auto device_lock = std::shared_lock{this->device.mutex};
         for (const auto& iter : this->device.queues) {
             const auto& queue = iter.second;
@@ -50,15 +53,30 @@ void AntiLagDeviceStrategy::notify_update(const VkAntiLagDataAMD& data) {
                 dynamic_cast<AntiLagQueueStrategy*>(queue->strategy.get());
             assert(strategy);
 
-            strategy->await_complete();
+            // Grab it from the queue, don't hold the lock.
+            const auto queue_lock = std::scoped_lock{strategy->mutex};
+            queue_frame_spans.emplace_back(std::move(strategy->frame_span));
+            strategy->frame_span.reset();
+        }
+        return queue_frame_spans;
+    }();
+
+    // Wait on them and relock the mutex.
+    for (const auto& frame_span : queue_frame_spans) {
+        if (frame_span) { // Can still be null here.
+            frame_span->await_completed();
         }
     }
 
+    lock.lock();
+
     // We might need to wait a little more time to meet our frame limit.
     using namespace std::chrono;
     if (this->delay != 0us && this->previous_input_release.has_value()) {
+        lock.unlock();
         std::this_thread::sleep_until(*this->previous_input_release +
                                       this->delay);
+        lock.lock();
     }
 
     this->previous_input_release = steady_clock::now();
diff --git a/src/strategies/anti_lag/queue_strategy.cc b/src/strategies/anti_lag/queue_strategy.cc
index 27a9337..9c49e7c 100644
--- a/src/strategies/anti_lag/queue_strategy.cc
+++ b/src/strategies/anti_lag/queue_strategy.cc
@@ -12,7 +12,7 @@ AntiLagQueueStrategy::~AntiLagQueueStrategy() {}
 
 void AntiLagQueueStrategy::notify_submit(
     [[maybe_unused]] const VkSubmitInfo& submit,
-    std::unique_ptr<Submission> submission) {
+    std::shared_ptr<TimestampPool::Handle> handle) {
 
     const auto strategy =
         dynamic_cast<AntiLagDeviceStrategy*>(this->queue.device.strategy.get());
@@ -22,12 +22,16 @@ void AntiLagQueueStrategy::notify_submit(
     }
 
     const auto lock = std::scoped_lock(this->mutex);
-    this->pending_submissions.push_back(std::move(submission));
+    if (this->frame_span) {
+        this->frame_span->update(std::move(handle));
+    } else {
+        this->frame_span = std::make_unique<FrameSpan>(std::move(handle));
+    }
 }
 
 void AntiLagQueueStrategy::notify_submit(
     [[maybe_unused]] const VkSubmitInfo2& submit,
-    std::unique_ptr<Submission> submission) {
+    std::shared_ptr<TimestampPool::Handle> handle) {
 
     const auto strategy =
         dynamic_cast<AntiLagDeviceStrategy*>(this->queue.device.strategy.get());
@@ -37,26 +41,11 @@ void AntiLagQueueStrategy::notify_submit(
     }
 
     const auto lock = std::scoped_lock(this->mutex);
-    this->pending_submissions.push_back(std::move(submission));
-}
-
-void AntiLagQueueStrategy::await_complete() {
-
-    // Grab submissions while under a lock.
-    const auto submissions = [&]() -> std::deque<std::unique_ptr<Submission>> {
-        const auto lock = std::scoped_lock{this->mutex};
-
-        auto submissions = std::move(this->pending_submissions);
-        this->pending_submissions.clear();
-        return submissions;
-    }();
-
-    // Wait for completion on the last submission.
-    if (submissions.empty()) {
-        return;
+    if (this->frame_span) {
+        this->frame_span->update(std::move(handle));
+    } else {
+        this->frame_span = std::make_unique<FrameSpan>(std::move(handle));
     }
-    const auto& last = submissions.back();
-    last->handle->await_end_time();
 }
 
 // Stub - AntiLag doesn't care about presents.
diff --git a/src/strategies/anti_lag/queue_strategy.hh b/src/strategies/anti_lag/queue_strategy.hh
index 37c44a5..b1ae3e6 100644
--- a/src/strategies/anti_lag/queue_strategy.hh
+++ b/src/strategies/anti_lag/queue_strategy.hh
@@ -3,7 +3,7 @@
 
 #include "strategies/queue_strategy.hh"
 
-#include <deque>
+#include "frame_span.hh"
 #include <memory>
 #include <mutex>
 
@@ -12,25 +12,22 @@ namespace low_latency {
 class QueueContext;
 
 class AntiLagQueueStrategy final : public QueueStrategy {
-  private:
+  public:
     std::mutex mutex;
-    std::deque<std::unique_ptr<Submission>> pending_submissions;
+    std::unique_ptr<FrameSpan> frame_span; // Null represents no work.
 
   public:
     AntiLagQueueStrategy(QueueContext& queue);
     virtual ~AntiLagQueueStrategy();
 
   public:
-    virtual void notify_submit(const VkSubmitInfo& submit,
-                               std::unique_ptr<Submission> submission) override;
-    virtual void notify_submit(const VkSubmitInfo2& submit,
-                               std::unique_ptr<Submission> submission) override;
+    virtual void
+    notify_submit(const VkSubmitInfo& submit,
+                  std::shared_ptr<TimestampPool::Handle> handle) override;
+    virtual void
+    notify_submit(const VkSubmitInfo2& submit,
+                  std::shared_ptr<TimestampPool::Handle> handle) override;
     virtual void notify_present(const VkPresentInfoKHR& present) override;
-
-  public:
-    // Wait for all pending submissions to complete. Resets pending submissions
-    // once done.
-    void await_complete();
 };
 
 } // namespace low_latency
diff --git a/src/strategies/low_latency2/device_strategy.cc b/src/strategies/low_latency2/device_strategy.cc
index 32ff981..227c385 100644
--- a/src/strategies/low_latency2/device_strategy.cc
+++ b/src/strategies/low_latency2/device_strategy.cc
@@ -68,8 +68,8 @@ void LowLatency2DeviceStrategy::submit_swapchain_present_id(
     // Iterate through all queues and grab any work that's associated with this
     // present_id. Turn it into a vector of work that we give to our swapchain
     // monitor.
-    auto work = [&]() -> std::vector<std::deque<std::unique_ptr<Submission>>> {
-        auto work = std::vector<std::deque<std::unique_ptr<Submission>>>{};
+    auto work = [&]() -> std::vector<std::unique_ptr<FrameSpan>> {
+        auto work = std::vector<std::unique_ptr<FrameSpan>>{};
         const auto lock = std::scoped_lock{this->device.mutex};
         for (const auto& queue_iter : this->device.queues) {
             const auto& queue = queue_iter.second;
@@ -84,14 +84,14 @@ void LowLatency2DeviceStrategy::submit_swapchain_present_id(
 
             // Need the lock now - we're modifying it.
             const auto strategy_lock = std::unique_lock{strategy->mutex};
-            const auto iter = strategy->present_id_submissions.find(present_id);
-            if (iter == std::end(strategy->present_id_submissions)) {
+            const auto iter = strategy->frame_spans.find(present_id);
+            if (iter == std::end(strategy->frame_spans)) {
                 continue;
             }
 
             // Make sure we clean it up from the present as well.
             work.push_back(std::move(iter->second));
-            strategy->present_id_submissions.erase(iter);
+            strategy->frame_spans.erase(iter);
         }
         return work;
     }();
diff --git a/src/strategies/low_latency2/queue_strategy.cc b/src/strategies/low_latency2/queue_strategy.cc
index 9a68b78..a020c0d 100644
--- a/src/strategies/low_latency2/queue_strategy.cc
+++ b/src/strategies/low_latency2/queue_strategy.cc
@@ -16,50 +16,48 @@ LowLatency2QueueStrategy::~LowLatency2QueueStrategy() {}
 template <typename T>
 static void notify_submit_impl(LowLatency2QueueStrategy& strategy,
                                const T& submit,
-                               std::unique_ptr<Submission> submission) {
+                               std::shared_ptr<TimestampPool::Handle> handle) {
 
     // It's actually not a requirement that we have this present id.
-    const auto lspi = find_next<VkLatencySubmissionPresentIdNV>(
-        &submit, VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV);
-    const auto present_id = lspi ? lspi->presentID : 0;
+    const auto present_id = [&]() -> std::uint64_t {
+        const auto lspi = find_next<VkLatencySubmissionPresentIdNV>(
+            &submit, VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV);
+        return lspi ? lspi->presentID : 0;
+    }();
 
     const auto lock = std::scoped_lock{strategy.mutex};
-    const auto [iter, inserted] =
-        strategy.present_id_submissions.try_emplace(present_id);
-    iter->second.push_back(std::move(submission));
-
-    // Remove stale submissions if we're presenting a lot to the same
-    // present_id. This doesn't affect anything because we're waiting on the
-    // last. It begs the question: should we should just store the last only?
-    if (std::size(iter->second) >=
-        LowLatency2QueueStrategy::MAX_TRACKED_OBJECTS) {
-
-        iter->second.pop_front();
+    const auto [iter, inserted] = strategy.frame_spans.try_emplace(present_id);
+    if (inserted) {
+        iter->second = std::make_unique<FrameSpan>(std::move(handle));
+    } else {
+        iter->second->update(std::move(handle));
     }
 
     // Add our present_id to our ring tracking if it's non-zero.
     if (inserted && present_id) {
-        strategy.present_id_ring.push_back(present_id);
+        strategy.stale_present_ids.push_back(present_id);
     }
 
     // Remove stale present_id's if they weren't presented to.
-    if (std::size(strategy.present_id_ring) >
-        LowLatency2QueueStrategy::MAX_TRACKED_OBJECTS) {
+    if (std::size(strategy.stale_present_ids) >
+        LowLatency2QueueStrategy::MAX_TRACKED_PRESENTS) {
 
-        const auto to_remove = strategy.present_id_ring.front();
-        strategy.present_id_ring.pop_front();
-        strategy.present_id_submissions.erase(to_remove);
+        const auto stale_present_id = strategy.stale_present_ids.front();
+        strategy.stale_present_ids.pop_front();
+        strategy.frame_spans.erase(stale_present_id);
     }
 }
 
 void LowLatency2QueueStrategy::notify_submit(
-    const VkSubmitInfo& submit, std::unique_ptr<Submission> submission) {
+    const VkSubmitInfo& submit,
+    std::shared_ptr<TimestampPool::Handle> submission) {
 
     notify_submit_impl(*this, submit, std::move(submission));
 }
 
 void LowLatency2QueueStrategy::notify_submit(
-    const VkSubmitInfo2& submit, std::unique_ptr<Submission> submission) {
+    const VkSubmitInfo2& submit,
+    std::shared_ptr<TimestampPool::Handle> submission) {
 
     notify_submit_impl(*this, submit, std::move(submission));
 }
diff --git a/src/strategies/low_latency2/queue_strategy.hh b/src/strategies/low_latency2/queue_strategy.hh
index a090e1b..6d41027 100644
--- a/src/strategies/low_latency2/queue_strategy.hh
+++ b/src/strategies/low_latency2/queue_strategy.hh
@@ -1,8 +1,8 @@
 #ifndef STRATEGIES_LOW_LATENCY2_QUEUE_STRATEGY_HH_
 #define STRATEGIES_LOW_LATENCY2_QUEUE_STRATEGY_HH_
 
+#include "frame_span.hh"
 #include "strategies/queue_strategy.hh"
-#include "submission.hh"
 
 #include <atomic>
 #include <deque>
@@ -16,26 +16,27 @@ class QueueContext;
 
 class LowLatency2QueueStrategy final : public QueueStrategy {
   public:
-    static constexpr auto MAX_TRACKED_OBJECTS = 50;
+    static constexpr auto MAX_TRACKED_PRESENTS = 50;
 
     // Mapping of present_id's to submissions. Grabbed later by the device
     // strategy when we present and actually can associate them to some
     // vkSwapchainKHR.
     std::mutex mutex{};
-    std::unordered_map<std::uint64_t, std::deque<std::unique_ptr<Submission>>>
-        present_id_submissions{};
-    std::deque<std::uint64_t> present_id_ring{};
-    std::atomic<bool> is_out_of_band{}; // atomic so we don't need a lock check
+    std::unordered_map<std::uint64_t, std::unique_ptr<FrameSpan>> frame_spans{};
+    std::deque<std::uint64_t> stale_present_ids{};
+    std::atomic<bool> is_out_of_band{}; // atomic to avoid lock
 
   public:
     LowLatency2QueueStrategy(QueueContext& queue);
     virtual ~LowLatency2QueueStrategy();
 
   public:
-    virtual void notify_submit(const VkSubmitInfo& submit,
-                               std::unique_ptr<Submission> submission) override;
-    virtual void notify_submit(const VkSubmitInfo2& submit,
-                               std::unique_ptr<Submission> submission) override;
+    virtual void
+    notify_submit(const VkSubmitInfo& submit,
+                  std::shared_ptr<TimestampPool::Handle> handle) override;
+    virtual void
+    notify_submit(const VkSubmitInfo2& submit,
+                  std::shared_ptr<TimestampPool::Handle> handle) override;
     virtual void notify_present(const VkPresentInfoKHR& present) override;
 
   public:
diff --git a/src/strategies/low_latency2/swapchain_monitor.cc b/src/strategies/low_latency2/swapchain_monitor.cc
index b6d4dd0..a70fa6c 100644
--- a/src/strategies/low_latency2/swapchain_monitor.cc
+++ b/src/strategies/low_latency2/swapchain_monitor.cc
@@ -35,32 +35,29 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) {
     for (;;) {
         auto lock = std::unique_lock{this->mutex};
         this->cv.wait(lock, stoken,
-                      [&]() { return this->semaphore_submission.has_value(); });
+                      [&]() { return this->semaphore_spans.has_value(); });
 
         // Stop only if we're stopped and we have nothing to signal.
-        if (stoken.stop_requested() &&
-            !this->semaphore_submission.has_value()) {
+        if (stoken.stop_requested() && !this->semaphore_spans.has_value()) {
             break;
         }
 
         // Grab the most recent semaphore. When work completes, signal it.
-        const auto semaphore_submission =
-            std::move(*this->semaphore_submission);
-        this->semaphore_submission.reset();
+        const auto semaphore_span = std::move(*this->semaphore_spans);
+        this->semaphore_spans.reset();
 
         // If we're stopping, signal the semaphore and don't worry about work
         // actually completing.
         if (stoken.stop_requested()) {
-            semaphore_submission.wakeup_semaphore.signal(this->device);
+            semaphore_span.wakeup_semaphore.signal(this->device);
             break;
         }
 
         // Unlock, wait for work to finish, lock again.
         lock.unlock();
-        for (const auto& submission : semaphore_submission.submissions) {
-            if (!submission.empty()) {
-                const auto& last = submission.back();
-                last->handle->await_end_time();
+        for (const auto& frame_span : semaphore_span.frame_spans) {
+            if (frame_span) {
+                frame_span->await_completed();
             }
         }
 
@@ -78,7 +75,7 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) {
         }
         lock.unlock();
 
-        semaphore_submission.wakeup_semaphore.signal(this->device);
+        semaphore_span.wakeup_semaphore.signal(this->device);
     }
 }
 
@@ -97,29 +94,29 @@ void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore,
     }
 
     // Signal immediately if we have no outstanding work.
-    if (this->pending_submissions.empty()) {
+    if (this->pending_frame_spans.empty()) {
         wakeup_semaphore.signal(this->device);
         return;
     }
 
-    this->semaphore_submission.emplace(SemaphoreSubmissions{
+    this->semaphore_spans.emplace(SemaphoreSpans{
         .wakeup_semaphore = wakeup_semaphore,
-        .submissions = std::move(this->pending_submissions),
+        .frame_spans = std::move(this->pending_frame_spans),
     });
-    this->pending_submissions.clear();
+    this->pending_frame_spans.clear();
 
     lock.unlock();
     this->cv.notify_one();
 }
 
 void SwapchainMonitor::attach_work(
-    std::vector<std::deque<std::unique_ptr<Submission>>> submissions) {
+    std::vector<std::unique_ptr<FrameSpan>> frame_spans) {
 
     const auto lock = std::scoped_lock{this->mutex};
     if (!this->was_low_latency_requested) {
         return;
     }
-    this->pending_submissions = std::move(submissions);
+    this->pending_frame_spans = std::move(frame_spans);
 }
 
 } // namespace low_latency
 \ No newline at end of file
diff --git a/src/strategies/low_latency2/swapchain_monitor.hh b/src/strategies/low_latency2/swapchain_monitor.hh
index 47c3a75..837f8e4 100644
--- a/src/strategies/low_latency2/swapchain_monitor.hh
+++ b/src/strategies/low_latency2/swapchain_monitor.hh
@@ -2,17 +2,16 @@
 #ifndef SWAPCHAIN_MONITOR_HH_
 #define SWAPCHAIN_MONITOR_HH_
 
+#include "frame_span.hh"
+
 #include <vulkan/vulkan.h>
 
 #include <chrono>
 #include <condition_variable>
-#include <deque>
 #include <memory>
 #include <mutex>
 #include <thread>
 
-#include "submission.hh"
-
 namespace low_latency {
 
 class DeviceContext;
@@ -28,15 +27,15 @@ class SwapchainMonitor final {
     };
 
     // An empty vector here represents our 'no work' state.
-    std::vector<std::deque<std::unique_ptr<Submission>>> pending_submissions{};
+    std::vector<std::unique_ptr<FrameSpan>> pending_frame_spans{};
 
     // A pairing of semaphore -> submissions.
     // If the Submissions completes then signal the bundled semaphore.
-    struct SemaphoreSubmissions {
+    struct SemaphoreSpans {
         WakeupSemaphore wakeup_semaphore{};
-        std::vector<std::deque<std::unique_ptr<Submission>>> submissions{};
+        std::vector<std::unique_ptr<FrameSpan>> frame_spans{};
     };
-    std::optional<SemaphoreSubmissions> semaphore_submission{};
+    std::optional<SemaphoreSpans> semaphore_spans{};
 
   protected:
     const DeviceContext& device;
@@ -67,8 +66,7 @@ class SwapchainMonitor final {
     void notify_semaphore(const VkSemaphore& timeline_semaphore,
                           const std::uint64_t& value);
 
-    void attach_work(
-        std::vector<std::deque<std::unique_ptr<Submission>>> submissions);
+    void attach_work(std::vector<std::unique_ptr<FrameSpan>> submissions);
 };
 
 } // namespace low_latency
diff --git a/src/strategies/queue_strategy.hh b/src/strategies/queue_strategy.hh
index b4fbcb9..0bc0dbb 100644
--- a/src/strategies/queue_strategy.hh
+++ b/src/strategies/queue_strategy.hh
@@ -1,7 +1,6 @@
 #ifndef STRATEGIES_QUEUE_STRATEGY_HH_
 #define STRATEGIES_QUEUE_STRATEGY_HH_
 
-#include "submission.hh"
 #include "timestamp_pool.hh"
 
 #include <vulkan/vulkan.h>
@@ -19,10 +18,12 @@ class QueueStrategy {
     virtual ~QueueStrategy();
 
   public:
-    virtual void notify_submit(const VkSubmitInfo& submit,
-                               std::unique_ptr<Submission> submission) = 0;
-    virtual void notify_submit(const VkSubmitInfo2& submit,
-                               std::unique_ptr<Submission> submission) = 0;
+    virtual void
+    notify_submit(const VkSubmitInfo& submit,
+                  std::shared_ptr<TimestampPool::Handle> handle) = 0;
+    virtual void
+    notify_submit(const VkSubmitInfo2& submit,
+                  std::shared_ptr<TimestampPool::Handle> handle) = 0;
     virtual void notify_present(const VkPresentInfoKHR& present) = 0;
 };
author	Nicolas James <nj3ahxac@gmail.com>	2026-04-08 00:56:40 +1000
committer	Nicolas James <nj3ahxac@gmail.com>	2026-04-08 00:56:40 +1000
commit	eb9719cc8b9a308654ccd2c3bce8a7047b6e2a1a (patch)
tree	5e72b419d3dc900a35921be5e551b17552251769 /src/strategies
parent	69764a869d99e9abd0fbe10c2773d3556d7f35e8 (diff)