Fix vram leak in Cyberpunk 2077, reduce duplicated logic + general cleanup

author: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2026-03-31 16:15:00 +1100
committer: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2026-03-31 16:15:00 +1100
commit: 5e3837cadac73ba5b7d4085cddc48b0e816d826a (patch)
tree: ced4c413506409f17806a5c6ea4864b4026ca924
parent: df2933fd9c0ea2a99e89a6837123dfdf8b549d4a (diff)
9 files changed, 122 insertions, 58 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
index b52fec4..33f2aa4 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -34,7 +34,7 @@ void DeviceContext::update_params(
     const bool was_low_latency_requested) {
 
     // If we don't have a target (AMD's anti_lag doesn't differentiate between
-    // swapchains), just write it to everything.
+    // swapchains) just write it to everything.
     if (!target.has_value()) {
         for (auto& iter : this->swapchain_monitors) {
             iter.second->update_params(was_low_latency_requested, present_delay);
@@ -49,7 +49,7 @@ void DeviceContext::update_params(
 
 void DeviceContext::notify_present(
     const VkSwapchainKHR& swapchain,
-    const QueueContext::submissions_t& submissions) {
+    const QueueContext::submissions_ptr_t& submissions) {
 
     const auto iter = this->swapchain_monitors.find(swapchain);
     assert(iter != std::end(this->swapchain_monitors));
diff --git a/src/device_context.hh b/src/device_context.hh
index a46f479..ed2991b 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -53,7 +53,7 @@ class DeviceContext final : public Context {
                        const bool was_low_latency_requested);
 
     void notify_present(const VkSwapchainKHR& swapchain,
-                        const QueueContext::submissions_t& submissions);
+                        const QueueContext::submissions_ptr_t& submissions);
 };
 
 }; // namespace low_latency
diff --git a/src/layer.cc b/src/layer.cc
index cf9f56e..2743030 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -756,11 +756,11 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateSwapchainKHR(
     }
 
     auto insertion = [&]() -> std::unique_ptr<SwapchainMonitor> {
-        if (!layer_context.should_expose_reflex) {
-            return std::make_unique<AntiLagSwapchainMonitor>(
+        if (layer_context.should_expose_reflex) {
+            return std::make_unique<ReflexSwapchainMonitor>(
                 *context, was_low_latency_requested);
         }
-        return std::make_unique<ReflexSwapchainMonitor>(
+        return std::make_unique<AntiLagSwapchainMonitor>(
             *context, was_low_latency_requested);
     }();
     const auto did_emplace = context->swapchain_monitors
diff --git a/src/layer_context.hh b/src/layer_context.hh
index 4979379..049684d 100644
--- a/src/layer_context.hh
+++ b/src/layer_context.hh
@@ -52,8 +52,8 @@ class LayerContext final : public Context {
     // provided instead of VK_AMD_anti_lag.
     static constexpr auto EXPOSE_REFLEX_ENV = "LOW_LATENCY_LAYER_EXPOSE_REFLEX";
 
-    // If this is not null and set to 1 then the card's vendor and id will be
-    // spoofed to appear as a NVIDIA card.
+    // If this is not null and set to 1 then the card's vendor, id, and device
+    // name will be modified to appear as a NVIDIA card.
     static constexpr auto SPOOF_NVIDIA_ENV = "LOW_LATENCY_LAYER_SPOOF_NVIDIA";
 
   public:
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 84b06fe..e9f9c3c 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -47,6 +47,45 @@ QueueContext::~QueueContext() {
     this->timestamp_pool.reset();
 }
 
+QueueContext::Submissions::Submissions() {}
+
+QueueContext::Submissions::~Submissions() {}
+
+void QueueContext::Submissions::add_submission(
+    const std::shared_ptr<TimestampPool::Handle> head,
+    const std::shared_ptr<TimestampPool::Handle> tail,
+    const DeviceClock::time_point_t& now) {
+
+    this->submissions.emplace_back(std::make_unique<Submission>(Submission{
+        .head_handle = head,
+        .tail_handle = tail,
+        .cpu_present_time = now,
+    }));
+
+    // Manual eviction of likely irrelevant timing information.
+    if (std::size(this->submissions) > this->MAX_TRACKED_SUBMISSIONS) {
+        this->submissions.pop_front();
+    }
+}
+
+bool QueueContext::Submissions::has_completed() const {
+    if (this->submissions.empty()) {
+        return true;
+    }
+
+    const auto& last_submission = this->submissions.back();
+    return last_submission->tail_handle->get_time().has_value();
+}
+
+void QueueContext::Submissions::await_completed() const {
+    if (this->submissions.empty()) {
+        return;
+    }
+
+    const auto& last_submission = this->submissions.back();
+    last_submission->tail_handle->await_time();
+}
+
 void QueueContext::notify_submit(
     const present_id_t& present_id,
     const std::shared_ptr<TimestampPool::Handle> head_handle,
@@ -57,23 +96,13 @@ void QueueContext::notify_submit(
     // mapping (might be empty, but handled with operator[]).
     auto& submissions = this->unpresented_submissions[present_id];
     if (submissions == nullptr) {
-        submissions =
-            std::make_shared<std::deque<std::unique_ptr<Submission>>>();
-
+        submissions = std::make_shared<Submissions>();
         if (present_id) {
             this->present_id_ring.emplace_back(present_id);
         }
     }
 
-    submissions->push_back(
-        std::make_unique<Submission>(Submission{.head_handle = head_handle,
-                                                .tail_handle = tail_handle,
-                                                .cpu_present_time = now}));
-
-    // This is probably hit if our queue never actually presents to anything.
-    if (std::size(*submissions) > this->MAX_TRACKED_SUBMISSIONS) {
-        submissions->pop_front();
-    }
+    submissions->add_submission(head_handle, tail_handle, now);
 
     if (std::size(this->present_id_ring) > MAX_TRACKED_PRESENT_IDS) {
         const auto evicted_present_id = this->present_id_ring.front();
@@ -90,8 +119,7 @@ void QueueContext::notify_present(const VkSwapchainKHR& swapchain,
     // We're avoiding a double hash here - don't use operator[] and erase.
     auto iter = this->unpresented_submissions.try_emplace(present_id).first;
     if (iter->second == nullptr) {
-        iter->second =
-            std::make_shared<std::deque<std::unique_ptr<Submission>>>();
+        iter->second = std::make_shared<Submissions>();
     }
 
     this->device.notify_present(swapchain, iter->second);
diff --git a/src/queue_context.hh b/src/queue_context.hh
index be73adc..2abd44c 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -16,10 +16,6 @@ namespace low_latency {
 
 class QueueContext final : public Context {
   private:
-    // The amount of queue submissions we allow tracked per queue before
-    // we give up tracking them. This is neccessary for queues which do not
-    // present anything.
-    static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u;
     static constexpr auto MAX_TRACKED_PRESENT_IDS = 50u;
 
   public:
@@ -70,15 +66,41 @@ class QueueContext final : public Context {
     // and notify our device that it needs to watch for when this completes.
     // We give it our submissions. Now, it's out of our hands. We remove the
     // present_id_t mapping when doing so.
-    struct Submission {
-        std::shared_ptr<TimestampPool::Handle> head_handle, tail_handle;
-        DeviceClock::time_point_t cpu_present_time;
+
+    class Submissions final {
+        // The amount of queue submissions we allow tracked per queue before
+        // we give up tracking them. This is neccessary for queues which do not
+        // present anything.
+        static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u;
+
+        struct Submission final {
+            std::shared_ptr<TimestampPool::Handle> head_handle, tail_handle;
+            DeviceClock::time_point_t cpu_present_time;
+        };
+        std::deque<std::unique_ptr<Submission>> submissions;
+
+      public:
+        Submissions();
+        Submissions(const Submissions&) = delete;
+        Submissions(Submissions&&) = delete;
+        Submissions operator=(const Submissions&) = delete;
+        Submissions operator=(Submissions&&) = delete;
+        ~Submissions();
+
+      public:
+        void add_submission(const std::shared_ptr<TimestampPool::Handle> head,
+                            const std::shared_ptr<TimestampPool::Handle> tail,
+                            const DeviceClock::time_point_t& now);
+
+        // Non-blocking - true if this submission has completed on the GPU.
+        bool has_completed() const;
+        // Blocking wait until the last submission has completed.
+        void await_completed() const;
     };
 
-    using submissions_t =
-        std::shared_ptr<std::deque<std::unique_ptr<Submission>>>;
     using present_id_t = std::uint64_t;
-    std::unordered_map<present_id_t, submissions_t> unpresented_submissions;
+    using submissions_ptr_t = std::shared_ptr<Submissions>;
+    std::unordered_map<present_id_t, submissions_ptr_t> unpresented_submissions;
 
     // We might be tracking present_ids which aren't presented to - and as a
     // result we don't ever clear those Submissions. So manually evict them by
diff --git a/src/swapchain_monitor.cc b/src/swapchain_monitor.cc
index f12bafa..bc4fc9b 100644
--- a/src/swapchain_monitor.cc
+++ b/src/swapchain_monitor.cc
@@ -23,6 +23,18 @@ void SwapchainMonitor::update_params(
     this->present_delay = present_delay;
 }
 
+void SwapchainMonitor::prune_submissions() {
+    // If our submissions grow too large, we should delete them from our
+    // tracking. It would be nice if this was handled elegantly by some custom
+    // container and we didn't have to call this manually each time we insert.
+    // Also this exact logic is repeated in QueueContext's Submission.
+    if (std::size(this->in_flight_submissions) >
+        this->MAX_TRACKED_IN_FLIGHT_SUBMISSIONS) {
+
+        this->in_flight_submissions.pop_front();
+    }
+}
+
 ReflexSwapchainMonitor::ReflexSwapchainMonitor(
     const DeviceContext& device, const bool was_low_latency_requested)
     : SwapchainMonitor(device, was_low_latency_requested),
@@ -55,12 +67,10 @@ void ReflexSwapchainMonitor::do_monitor(const std::stop_token stoken) {
 
         // Look for the latest submission and make sure it's completed.
         if (!this->in_flight_submissions.empty()) {
-            const auto submission = this->in_flight_submissions.back();
+            const auto last_submission = this->in_flight_submissions.back();
             this->in_flight_submissions.clear();
 
-            if (!submission->empty()) {
-                submission->back()->tail_handle->await_time();
-            }
+            last_submission->await_completed();
         }
 
         // We might want to signal them all? In theory it's the same timeline
@@ -80,7 +90,6 @@ void ReflexSwapchainMonitor::notify_semaphore(
 
     const auto wakeup_semaphore = WakeupSemaphore{
         .timeline_semaphore = timeline_semaphore, .value = value};
-
     // Signal immediately if low_latency isn't requested or if we have no
     // outstanding work.
     if (!this->was_low_latency_requested ||
@@ -95,7 +104,7 @@ void ReflexSwapchainMonitor::notify_semaphore(
 }
 
 void ReflexSwapchainMonitor::notify_present(
-    const QueueContext::submissions_t& submissions) {
+    const QueueContext::submissions_ptr_t& submissions) {
 
     const auto lock = std::scoped_lock{this->mutex};
 
@@ -104,17 +113,17 @@ void ReflexSwapchainMonitor::notify_present(
     }
 
     // Fast path where this work has already completed.
-    if (!this->wakeup_semaphores.empty() && !submissions->empty()) {
-
-        const auto& finished = submissions->back()->tail_handle->get_time();
-        if (finished.has_value()) {
-            this->wakeup_semaphores.back().signal(this->device);
-            this->wakeup_semaphores.clear();
-            return;
-        }
+    // In this case, don't wake up the thread. We can just signal
+    // what we have immediately on this thread.
+    if (!this->wakeup_semaphores.empty() && submissions->has_completed()) {
+        this->wakeup_semaphores.back().signal(this->device);
+        this->wakeup_semaphores.clear();
+        return;
     }
 
     this->in_flight_submissions.emplace_back(submissions);
+    this->prune_submissions();
+
     this->cv.notify_one();
 }
 
@@ -123,15 +132,15 @@ AntiLagSwapchainMonitor::AntiLagSwapchainMonitor(
     : SwapchainMonitor(device, was_low_latency_requested) {}
 
 AntiLagSwapchainMonitor::~AntiLagSwapchainMonitor() {}
-
 void AntiLagSwapchainMonitor::notify_present(
-    const QueueContext::submissions_t& submissions) {
+    const QueueContext::submissions_ptr_t& submissions) {
 
     if (!this->was_low_latency_requested) {
         return;
     }
 
     this->in_flight_submissions.emplace_back(submissions);
+    this->prune_submissions();
 }
 
 void AntiLagSwapchainMonitor::await_submissions() {
@@ -139,13 +148,10 @@ void AntiLagSwapchainMonitor::await_submissions() {
         return;
     }
 
-    const auto last_submissions = this->in_flight_submissions.back();
+    const auto last_submission = this->in_flight_submissions.back();
     this->in_flight_submissions.clear();
-    if (last_submissions->empty()) {
-        return;
-    }
 
-    last_submissions->back()->tail_handle->await_time();
+    last_submission->await_completed();
 }
 
 } // namespace low_latency
 \ No newline at end of file
diff --git a/src/swapchain_monitor.hh b/src/swapchain_monitor.hh
index b993b83..295ac5b 100644
--- a/src/swapchain_monitor.hh
+++ b/src/swapchain_monitor.hh
@@ -21,6 +21,9 @@ class DeviceContext;
 // currently have an option to frame pace, to disable low_latency mode
 // (become a no-op), and must track in_flight_submissions to function.
 class SwapchainMonitor {
+  private:
+    static constexpr auto MAX_TRACKED_IN_FLIGHT_SUBMISSIONS = 50u;
+
   protected:
     const DeviceContext& device;
 
@@ -28,7 +31,12 @@ class SwapchainMonitor {
     std::chrono::milliseconds present_delay = std::chrono::milliseconds{0};
     bool was_low_latency_requested = false;
 
-    std::deque<QueueContext::submissions_t> in_flight_submissions;
+    std::deque<QueueContext::submissions_ptr_t> in_flight_submissions;
+
+  protected:
+    // Small fix to avoid submissions growing limitlessly in size if this
+    // swapchain is never presented to.
+    void prune_submissions();
 
   public:
     SwapchainMonitor(const DeviceContext& device,
@@ -45,7 +53,7 @@ class SwapchainMonitor {
 
   public:
     virtual void
-    notify_present(const QueueContext::submissions_t& submissions) = 0;
+    notify_present(const QueueContext::submissions_ptr_t& submissions) = 0;
 };
 
 // Provides asynchronous monitoring of submissions and signalling of some
@@ -79,7 +87,7 @@ class ReflexSwapchainMonitor final : public SwapchainMonitor {
 
   public:
     virtual void
-    notify_present(const QueueContext::submissions_t& submissions) override;
+    notify_present(const QueueContext::submissions_ptr_t& submissions) override;
 };
 
 // Much simpler synchronous waiting with no thread requirement.
@@ -95,7 +103,7 @@ class AntiLagSwapchainMonitor final : public SwapchainMonitor {
 
   public:
     virtual void
-    notify_present(const QueueContext::submissions_t& submissions) override;
+    notify_present(const QueueContext::submissions_ptr_t& submissions) override;
 };
 
 } // namespace low_latency
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index a618cfb..2d43c58 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -105,7 +105,7 @@ std::shared_ptr<TimestampPool::Handle> TimestampPool::acquire() {
     const auto query_index = *std::begin(query_chunk.free_indices);
     query_chunk.free_indices.erase(query_index);
 
-    // Custom deleter function that puts the handle on our async deleter queue.
+    // Custom deleter function that puts the handle on our async reaper queue.
     const auto reaper_deleter = [this](Handle* const handle) {
         if (!handle) {
             return;
author	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2026-03-31 16:15:00 +1100
committer	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2026-03-31 16:15:00 +1100
commit	5e3837cadac73ba5b7d4085cddc48b0e816d826a (patch)
tree	ced4c413506409f17806a5c6ea4864b4026ca924
parent	df2933fd9c0ea2a99e89a6837123dfdf8b549d4a (diff)