aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas James <Eele1Ephe7uZahRie@tutanota.com>2026-03-31 16:15:00 +1100
committerNicolas James <Eele1Ephe7uZahRie@tutanota.com>2026-03-31 16:15:00 +1100
commit5e3837cadac73ba5b7d4085cddc48b0e816d826a (patch)
treeced4c413506409f17806a5c6ea4864b4026ca924
parentdf2933fd9c0ea2a99e89a6837123dfdf8b549d4a (diff)
Fix vram leak in Cyberpunk 2077, reduce duplicated logic + general cleanup
-rw-r--r--src/device_context.cc4
-rw-r--r--src/device_context.hh2
-rw-r--r--src/layer.cc6
-rw-r--r--src/layer_context.hh4
-rw-r--r--src/queue_context.cc56
-rw-r--r--src/queue_context.hh42
-rw-r--r--src/swapchain_monitor.cc48
-rw-r--r--src/swapchain_monitor.hh16
-rw-r--r--src/timestamp_pool.cc2
9 files changed, 122 insertions, 58 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
index b52fec4..33f2aa4 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -34,7 +34,7 @@ void DeviceContext::update_params(
const bool was_low_latency_requested) {
// If we don't have a target (AMD's anti_lag doesn't differentiate between
- // swapchains), just write it to everything.
+ // swapchains) just write it to everything.
if (!target.has_value()) {
for (auto& iter : this->swapchain_monitors) {
iter.second->update_params(was_low_latency_requested, present_delay);
@@ -49,7 +49,7 @@ void DeviceContext::update_params(
void DeviceContext::notify_present(
const VkSwapchainKHR& swapchain,
- const QueueContext::submissions_t& submissions) {
+ const QueueContext::submissions_ptr_t& submissions) {
const auto iter = this->swapchain_monitors.find(swapchain);
assert(iter != std::end(this->swapchain_monitors));
diff --git a/src/device_context.hh b/src/device_context.hh
index a46f479..ed2991b 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -53,7 +53,7 @@ class DeviceContext final : public Context {
const bool was_low_latency_requested);
void notify_present(const VkSwapchainKHR& swapchain,
- const QueueContext::submissions_t& submissions);
+ const QueueContext::submissions_ptr_t& submissions);
};
}; // namespace low_latency
diff --git a/src/layer.cc b/src/layer.cc
index cf9f56e..2743030 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -756,11 +756,11 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateSwapchainKHR(
}
auto insertion = [&]() -> std::unique_ptr<SwapchainMonitor> {
- if (!layer_context.should_expose_reflex) {
- return std::make_unique<AntiLagSwapchainMonitor>(
+ if (layer_context.should_expose_reflex) {
+ return std::make_unique<ReflexSwapchainMonitor>(
*context, was_low_latency_requested);
}
- return std::make_unique<ReflexSwapchainMonitor>(
+ return std::make_unique<AntiLagSwapchainMonitor>(
*context, was_low_latency_requested);
}();
const auto did_emplace = context->swapchain_monitors
diff --git a/src/layer_context.hh b/src/layer_context.hh
index 4979379..049684d 100644
--- a/src/layer_context.hh
+++ b/src/layer_context.hh
@@ -52,8 +52,8 @@ class LayerContext final : public Context {
// provided instead of VK_AMD_anti_lag.
static constexpr auto EXPOSE_REFLEX_ENV = "LOW_LATENCY_LAYER_EXPOSE_REFLEX";
- // If this is not null and set to 1 then the card's vendor and id will be
- // spoofed to appear as a NVIDIA card.
+ // If this is not null and set to 1 then the card's vendor, id, and device
+ // name will be modified to appear as a NVIDIA card.
static constexpr auto SPOOF_NVIDIA_ENV = "LOW_LATENCY_LAYER_SPOOF_NVIDIA";
public:
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 84b06fe..e9f9c3c 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -47,6 +47,45 @@ QueueContext::~QueueContext() {
this->timestamp_pool.reset();
}
+QueueContext::Submissions::Submissions() {}
+
+QueueContext::Submissions::~Submissions() {}
+
+void QueueContext::Submissions::add_submission(
+ const std::shared_ptr<TimestampPool::Handle> head,
+ const std::shared_ptr<TimestampPool::Handle> tail,
+ const DeviceClock::time_point_t& now) {
+
+ this->submissions.emplace_back(std::make_unique<Submission>(Submission{
+ .head_handle = head,
+ .tail_handle = tail,
+ .cpu_present_time = now,
+ }));
+
+ // Manual eviction of likely irrelevant timing information.
+ if (std::size(this->submissions) > this->MAX_TRACKED_SUBMISSIONS) {
+ this->submissions.pop_front();
+ }
+}
+
+bool QueueContext::Submissions::has_completed() const {
+ if (this->submissions.empty()) {
+ return true;
+ }
+
+ const auto& last_submission = this->submissions.back();
+ return last_submission->tail_handle->get_time().has_value();
+}
+
+void QueueContext::Submissions::await_completed() const {
+ if (this->submissions.empty()) {
+ return;
+ }
+
+ const auto& last_submission = this->submissions.back();
+ last_submission->tail_handle->await_time();
+}
+
void QueueContext::notify_submit(
const present_id_t& present_id,
const std::shared_ptr<TimestampPool::Handle> head_handle,
@@ -57,23 +96,13 @@ void QueueContext::notify_submit(
// mapping (might be empty, but handled with operator[]).
auto& submissions = this->unpresented_submissions[present_id];
if (submissions == nullptr) {
- submissions =
- std::make_shared<std::deque<std::unique_ptr<Submission>>>();
-
+ submissions = std::make_shared<Submissions>();
if (present_id) {
this->present_id_ring.emplace_back(present_id);
}
}
- submissions->push_back(
- std::make_unique<Submission>(Submission{.head_handle = head_handle,
- .tail_handle = tail_handle,
- .cpu_present_time = now}));
-
- // This is probably hit if our queue never actually presents to anything.
- if (std::size(*submissions) > this->MAX_TRACKED_SUBMISSIONS) {
- submissions->pop_front();
- }
+ submissions->add_submission(head_handle, tail_handle, now);
if (std::size(this->present_id_ring) > MAX_TRACKED_PRESENT_IDS) {
const auto evicted_present_id = this->present_id_ring.front();
@@ -90,8 +119,7 @@ void QueueContext::notify_present(const VkSwapchainKHR& swapchain,
// We're avoiding a double hash here - don't use operator[] and erase.
auto iter = this->unpresented_submissions.try_emplace(present_id).first;
if (iter->second == nullptr) {
- iter->second =
- std::make_shared<std::deque<std::unique_ptr<Submission>>>();
+ iter->second = std::make_shared<Submissions>();
}
this->device.notify_present(swapchain, iter->second);
diff --git a/src/queue_context.hh b/src/queue_context.hh
index be73adc..2abd44c 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -16,10 +16,6 @@ namespace low_latency {
class QueueContext final : public Context {
private:
- // The amount of queue submissions we allow tracked per queue before
- // we give up tracking them. This is neccessary for queues which do not
- // present anything.
- static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u;
static constexpr auto MAX_TRACKED_PRESENT_IDS = 50u;
public:
@@ -70,15 +66,41 @@ class QueueContext final : public Context {
// and notify our device that it needs to watch for when this completes.
// We give it our submissions. Now, it's out of our hands. We remove the
// present_id_t mapping when doing so.
- struct Submission {
- std::shared_ptr<TimestampPool::Handle> head_handle, tail_handle;
- DeviceClock::time_point_t cpu_present_time;
+
+ class Submissions final {
+ // The amount of queue submissions we allow tracked per queue before
+ // we give up tracking them. This is neccessary for queues which do not
+ // present anything.
+ static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u;
+
+ struct Submission final {
+ std::shared_ptr<TimestampPool::Handle> head_handle, tail_handle;
+ DeviceClock::time_point_t cpu_present_time;
+ };
+ std::deque<std::unique_ptr<Submission>> submissions;
+
+ public:
+ Submissions();
+ Submissions(const Submissions&) = delete;
+ Submissions(Submissions&&) = delete;
+ Submissions operator=(const Submissions&) = delete;
+ Submissions operator=(Submissions&&) = delete;
+ ~Submissions();
+
+ public:
+ void add_submission(const std::shared_ptr<TimestampPool::Handle> head,
+ const std::shared_ptr<TimestampPool::Handle> tail,
+ const DeviceClock::time_point_t& now);
+
+ // Non-blocking - true if this submission has completed on the GPU.
+ bool has_completed() const;
+ // Blocking wait until the last submission has completed.
+ void await_completed() const;
};
- using submissions_t =
- std::shared_ptr<std::deque<std::unique_ptr<Submission>>>;
using present_id_t = std::uint64_t;
- std::unordered_map<present_id_t, submissions_t> unpresented_submissions;
+ using submissions_ptr_t = std::shared_ptr<Submissions>;
+ std::unordered_map<present_id_t, submissions_ptr_t> unpresented_submissions;
// We might be tracking present_ids which aren't presented to - and as a
// result we don't ever clear those Submissions. So manually evict them by
diff --git a/src/swapchain_monitor.cc b/src/swapchain_monitor.cc
index f12bafa..bc4fc9b 100644
--- a/src/swapchain_monitor.cc
+++ b/src/swapchain_monitor.cc
@@ -23,6 +23,18 @@ void SwapchainMonitor::update_params(
this->present_delay = present_delay;
}
+void SwapchainMonitor::prune_submissions() {
+ // If our submissions grow too large, we should delete them from our
+ // tracking. It would be nice if this was handled elegantly by some custom
+ // container and we didn't have to call this manually each time we insert.
+ // Also this exact logic is repeated in QueueContext's Submission.
+ if (std::size(this->in_flight_submissions) >
+ this->MAX_TRACKED_IN_FLIGHT_SUBMISSIONS) {
+
+ this->in_flight_submissions.pop_front();
+ }
+}
+
ReflexSwapchainMonitor::ReflexSwapchainMonitor(
const DeviceContext& device, const bool was_low_latency_requested)
: SwapchainMonitor(device, was_low_latency_requested),
@@ -55,12 +67,10 @@ void ReflexSwapchainMonitor::do_monitor(const std::stop_token stoken) {
// Look for the latest submission and make sure it's completed.
if (!this->in_flight_submissions.empty()) {
- const auto submission = this->in_flight_submissions.back();
+ const auto last_submission = this->in_flight_submissions.back();
this->in_flight_submissions.clear();
- if (!submission->empty()) {
- submission->back()->tail_handle->await_time();
- }
+ last_submission->await_completed();
}
// We might want to signal them all? In theory it's the same timeline
@@ -80,7 +90,6 @@ void ReflexSwapchainMonitor::notify_semaphore(
const auto wakeup_semaphore = WakeupSemaphore{
.timeline_semaphore = timeline_semaphore, .value = value};
-
// Signal immediately if low_latency isn't requested or if we have no
// outstanding work.
if (!this->was_low_latency_requested ||
@@ -95,7 +104,7 @@ void ReflexSwapchainMonitor::notify_semaphore(
}
void ReflexSwapchainMonitor::notify_present(
- const QueueContext::submissions_t& submissions) {
+ const QueueContext::submissions_ptr_t& submissions) {
const auto lock = std::scoped_lock{this->mutex};
@@ -104,17 +113,17 @@ void ReflexSwapchainMonitor::notify_present(
}
// Fast path where this work has already completed.
- if (!this->wakeup_semaphores.empty() && !submissions->empty()) {
-
- const auto& finished = submissions->back()->tail_handle->get_time();
- if (finished.has_value()) {
- this->wakeup_semaphores.back().signal(this->device);
- this->wakeup_semaphores.clear();
- return;
- }
+ // In this case, don't wake up the thread. We can just signal
+ // what we have immediately on this thread.
+ if (!this->wakeup_semaphores.empty() && submissions->has_completed()) {
+ this->wakeup_semaphores.back().signal(this->device);
+ this->wakeup_semaphores.clear();
+ return;
}
this->in_flight_submissions.emplace_back(submissions);
+ this->prune_submissions();
+
this->cv.notify_one();
}
@@ -123,15 +132,15 @@ AntiLagSwapchainMonitor::AntiLagSwapchainMonitor(
: SwapchainMonitor(device, was_low_latency_requested) {}
AntiLagSwapchainMonitor::~AntiLagSwapchainMonitor() {}
-
void AntiLagSwapchainMonitor::notify_present(
- const QueueContext::submissions_t& submissions) {
+ const QueueContext::submissions_ptr_t& submissions) {
if (!this->was_low_latency_requested) {
return;
}
this->in_flight_submissions.emplace_back(submissions);
+ this->prune_submissions();
}
void AntiLagSwapchainMonitor::await_submissions() {
@@ -139,13 +148,10 @@ void AntiLagSwapchainMonitor::await_submissions() {
return;
}
- const auto last_submissions = this->in_flight_submissions.back();
+ const auto last_submission = this->in_flight_submissions.back();
this->in_flight_submissions.clear();
- if (last_submissions->empty()) {
- return;
- }
- last_submissions->back()->tail_handle->await_time();
+ last_submission->await_completed();
}
} // namespace low_latency \ No newline at end of file
diff --git a/src/swapchain_monitor.hh b/src/swapchain_monitor.hh
index b993b83..295ac5b 100644
--- a/src/swapchain_monitor.hh
+++ b/src/swapchain_monitor.hh
@@ -21,6 +21,9 @@ class DeviceContext;
// currently have an option to frame pace, to disable low_latency mode
// (become a no-op), and must track in_flight_submissions to function.
class SwapchainMonitor {
+ private:
+ static constexpr auto MAX_TRACKED_IN_FLIGHT_SUBMISSIONS = 50u;
+
protected:
const DeviceContext& device;
@@ -28,7 +31,12 @@ class SwapchainMonitor {
std::chrono::milliseconds present_delay = std::chrono::milliseconds{0};
bool was_low_latency_requested = false;
- std::deque<QueueContext::submissions_t> in_flight_submissions;
+ std::deque<QueueContext::submissions_ptr_t> in_flight_submissions;
+
+ protected:
+ // Small fix to avoid submissions growing limitlessly in size if this
+ // swapchain is never presented to.
+ void prune_submissions();
public:
SwapchainMonitor(const DeviceContext& device,
@@ -45,7 +53,7 @@ class SwapchainMonitor {
public:
virtual void
- notify_present(const QueueContext::submissions_t& submissions) = 0;
+ notify_present(const QueueContext::submissions_ptr_t& submissions) = 0;
};
// Provides asynchronous monitoring of submissions and signalling of some
@@ -79,7 +87,7 @@ class ReflexSwapchainMonitor final : public SwapchainMonitor {
public:
virtual void
- notify_present(const QueueContext::submissions_t& submissions) override;
+ notify_present(const QueueContext::submissions_ptr_t& submissions) override;
};
// Much simpler synchronous waiting with no thread requirement.
@@ -95,7 +103,7 @@ class AntiLagSwapchainMonitor final : public SwapchainMonitor {
public:
virtual void
- notify_present(const QueueContext::submissions_t& submissions) override;
+ notify_present(const QueueContext::submissions_ptr_t& submissions) override;
};
} // namespace low_latency
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index a618cfb..2d43c58 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -105,7 +105,7 @@ std::shared_ptr<TimestampPool::Handle> TimestampPool::acquire() {
const auto query_index = *std::begin(query_chunk.free_indices);
query_chunk.free_indices.erase(query_index);
- // Custom deleter function that puts the handle on our async deleter queue.
+ // Custom deleter function that puts the handle on our async reaper queue.
const auto reaper_deleter = [this](Handle* const handle) {
if (!handle) {
return;