From 312d8736ae0df55c9f33e4eb5c00e4cd77e1c33f Mon Sep 17 00:00:00 2001 From: Nicolas James Date: Mon, 6 Apr 2026 17:03:35 +1000 Subject: Add refactored VK_NV_low_latency2 impl, (fixes many threading issues) --- src/strategies/low_latency2/device_strategy.cc | 83 ++++++++++++++++++++++- src/strategies/low_latency2/device_strategy.hh | 11 +++ src/strategies/low_latency2/queue_strategy.cc | 86 ++++++++++++++++++------ src/strategies/low_latency2/queue_strategy.hh | 22 ++++++ src/strategies/low_latency2/swapchain_monitor.cc | 40 ++++++++--- src/strategies/low_latency2/swapchain_monitor.hh | 10 ++- 6 files changed, 219 insertions(+), 33 deletions(-) (limited to 'src/strategies') diff --git a/src/strategies/low_latency2/device_strategy.cc b/src/strategies/low_latency2/device_strategy.cc index 3a970a2..18ea21f 100644 --- a/src/strategies/low_latency2/device_strategy.cc +++ b/src/strategies/low_latency2/device_strategy.cc @@ -1,4 +1,6 @@ #include "device_strategy.hh" +#include "device_context.hh" +#include "queue_strategy.hh" #include "helper.hh" #include @@ -15,7 +17,10 @@ void LowLatency2DeviceStrategy::notify_create_swapchain( // VK_NV_low_latency2 allows a swapchain to be created with the low latency // mode already on via VkSwapchainLatencyCreateInfoNV. - auto was_low_latency_requested = bool{false}; + // Default to enabled - if the app is using VK_NV_low_latency2 at all it + // wants pacing. VkSwapchainLatencyCreateInfoNV can override this, but + // apps like CS2 recreate swapchains without it (apparent app bug). + auto was_low_latency_requested = bool{true}; if (const auto slci = find_next( &info, VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV); slci) { @@ -38,4 +43,80 @@ void LowLatency2DeviceStrategy::notify_destroy_swapchain( this->swapchain_monitors.erase(swapchain); } +void LowLatency2DeviceStrategy::notify_latency_sleep_mode( + const VkSwapchainKHR& swapchain, + const VkLatencySleepModeInfoNV* const info) { + + const auto lock = std::shared_lock{this->mutex}; + + const auto iter = this->swapchain_monitors.find(swapchain); + assert(iter != std::end(this->swapchain_monitors)); + + using namespace std::chrono; + if (info) { + iter->second.update_params(info->lowLatencyMode, + microseconds{info->minimumIntervalUs}); + } else { + iter->second.update_params(false, 0us); + } +} + +void LowLatency2DeviceStrategy::submit_swapchain_present_id( + const VkSwapchainKHR& swapchain, const std::uint64_t& present_id) { + + // Iterate through all queues and grab any work that's associated with this + // present_id. Turn it into a vector of work that we give to our swapchain + // monitor. + auto work = [&]() -> std::vector>> { + auto work = std::vector>>{}; + const auto lock = std::scoped_lock{this->device.mutex}; + for (const auto& queue_iter : this->device.queues) { + const auto& queue = queue_iter.second; + + const auto strategy = + dynamic_cast(queue->strategy.get()); + assert(strategy); + + if (strategy->is_out_of_band.load(std::memory_order::relaxed)) { + continue; + } + + // Need the lock now - we're modifying it. + const auto strategy_lock = std::unique_lock{strategy->mutex}; + const auto iter = strategy->present_id_submissions.find(present_id); + if (iter == std::end(strategy->present_id_submissions)) { + continue; + } + + // Make sure we clean it up from the present as well. + work.push_back(std::move(iter->second)); + strategy->present_id_submissions.erase(iter); + } + return work; + }(); + + const auto lock = std::scoped_lock{this->mutex}; + + // Fail hard here, the swapchain must exist or something has gone wrong with + // Vulkan bookkeeping. + const auto iter = this->swapchain_monitors.find(swapchain); + assert(iter != std::end(this->swapchain_monitors)); + + // Notify our monitor that this work has to be completed before they signal + // whatever semaphore is currently sitting in it. + iter->second.attach_work(std::move(work)); +} + +void LowLatency2DeviceStrategy::notify_latency_sleep_nv( + const VkSwapchainKHR& swapchain, const VkLatencySleepInfoNV& info) { + + const auto lock = std::scoped_lock{this->mutex}; + + // Again, fail hard here - something has gone terribly wrong. + const auto iter = this->swapchain_monitors.find(swapchain); + assert(iter != std::end(this->swapchain_monitors)); + + iter->second.notify_semaphore(info.signalSemaphore, info.value); +} + } // namespace low_latency diff --git a/src/strategies/low_latency2/device_strategy.hh b/src/strategies/low_latency2/device_strategy.hh index af1b471..499b2aa 100644 --- a/src/strategies/low_latency2/device_strategy.hh +++ b/src/strategies/low_latency2/device_strategy.hh @@ -6,6 +6,7 @@ #include #include +#include namespace low_latency { @@ -27,6 +28,16 @@ class LowLatency2DeviceStrategy final : public DeviceStrategy { const VkSwapchainCreateInfoKHR& info) override; virtual void notify_destroy_swapchain(const VkSwapchainKHR& swapchain) override; + + public: + void submit_swapchain_present_id(const VkSwapchainKHR& swapchain, + const std::uint64_t& present_id); + + void notify_latency_sleep_mode(const VkSwapchainKHR& swapchain, + const VkLatencySleepModeInfoNV* const info); + + void notify_latency_sleep_nv(const VkSwapchainKHR& swapchain, + const VkLatencySleepInfoNV& info); }; } // namespace low_latency diff --git a/src/strategies/low_latency2/queue_strategy.cc b/src/strategies/low_latency2/queue_strategy.cc index 855ff5d..9a68b78 100644 --- a/src/strategies/low_latency2/queue_strategy.cc +++ b/src/strategies/low_latency2/queue_strategy.cc @@ -1,8 +1,10 @@ #include "queue_strategy.hh" +#include "device_context.hh" +#include "device_strategy.hh" #include "helper.hh" +#include "queue_context.hh" -#include -#include +#include namespace low_latency { @@ -11,34 +13,80 @@ LowLatency2QueueStrategy::LowLatency2QueueStrategy(QueueContext& queue) LowLatency2QueueStrategy::~LowLatency2QueueStrategy() {} +template +static void notify_submit_impl(LowLatency2QueueStrategy& strategy, + const T& submit, + std::unique_ptr submission) { + + // It's actually not a requirement that we have this present id. + const auto lspi = find_next( + &submit, VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV); + const auto present_id = lspi ? lspi->presentID : 0; + + const auto lock = std::scoped_lock{strategy.mutex}; + const auto [iter, inserted] = + strategy.present_id_submissions.try_emplace(present_id); + iter->second.push_back(std::move(submission)); + + // Remove stale submissions if we're presenting a lot to the same + // present_id. This doesn't affect anything because we're waiting on the + // last. It begs the question: should we should just store the last only? + if (std::size(iter->second) >= + LowLatency2QueueStrategy::MAX_TRACKED_OBJECTS) { + + iter->second.pop_front(); + } + + // Add our present_id to our ring tracking if it's non-zero. + if (inserted && present_id) { + strategy.present_id_ring.push_back(present_id); + } + + // Remove stale present_id's if they weren't presented to. + if (std::size(strategy.present_id_ring) > + LowLatency2QueueStrategy::MAX_TRACKED_OBJECTS) { + + const auto to_remove = strategy.present_id_ring.front(); + strategy.present_id_ring.pop_front(); + strategy.present_id_submissions.erase(to_remove); + } +} + void LowLatency2QueueStrategy::notify_submit( - [[maybe_unused]] const VkSubmitInfo& submit, - [[maybe_unused]] std::unique_ptr submission) {} + const VkSubmitInfo& submit, std::unique_ptr submission) { + + notify_submit_impl(*this, submit, std::move(submission)); +} void LowLatency2QueueStrategy::notify_submit( - [[maybe_unused]] const VkSubmitInfo2& submit, - [[maybe_unused]] std::unique_ptr submission) {} + const VkSubmitInfo2& submit, std::unique_ptr submission) { + + notify_submit_impl(*this, submit, std::move(submission)); +} void LowLatency2QueueStrategy::notify_present(const VkPresentInfoKHR& present) { const auto pid = find_next(&present, VK_STRUCTURE_TYPE_PRESENT_ID_KHR); - // All submissions should be tagged with a present_id. If it isn't, I'm not - // going to fail hard here - we will just ignore it. - if (!pid) { - return; - } - - const auto swapchains = - std::span{present.pSwapchains, present.swapchainCount}; - const auto present_ids = - std::span{pid->pPresentIds, present.swapchainCount}; - for (const auto& [swapchain, present_id] : - std::views::zip(swapchains, present_ids)) { + const auto device_strategy = dynamic_cast( + this->queue.device.strategy.get()); + assert(device_strategy); - // TODO + for (auto i = std::uint32_t{0}; i < present.swapchainCount; ++i) { + const auto& swapchain = present.pSwapchains[i]; + const auto present_id = [&]() -> std::uint64_t { + if (pid && pid->pPresentIds) { + return pid->pPresentIds[i]; + } + return 0; + }(); + device_strategy->submit_swapchain_present_id(swapchain, present_id); } } +void LowLatency2QueueStrategy::notify_out_of_band() { + this->is_out_of_band.store(true, std::memory_order_relaxed); +} + } // namespace low_latency diff --git a/src/strategies/low_latency2/queue_strategy.hh b/src/strategies/low_latency2/queue_strategy.hh index 223f559..a090e1b 100644 --- a/src/strategies/low_latency2/queue_strategy.hh +++ b/src/strategies/low_latency2/queue_strategy.hh @@ -2,12 +2,31 @@ #define STRATEGIES_LOW_LATENCY2_QUEUE_STRATEGY_HH_ #include "strategies/queue_strategy.hh" +#include "submission.hh" + +#include +#include +#include +#include +#include namespace low_latency { class QueueContext; class LowLatency2QueueStrategy final : public QueueStrategy { + public: + static constexpr auto MAX_TRACKED_OBJECTS = 50; + + // Mapping of present_id's to submissions. Grabbed later by the device + // strategy when we present and actually can associate them to some + // vkSwapchainKHR. + std::mutex mutex{}; + std::unordered_map>> + present_id_submissions{}; + std::deque present_id_ring{}; + std::atomic is_out_of_band{}; // atomic so we don't need a lock check + public: LowLatency2QueueStrategy(QueueContext& queue); virtual ~LowLatency2QueueStrategy(); @@ -18,6 +37,9 @@ class LowLatency2QueueStrategy final : public QueueStrategy { virtual void notify_submit(const VkSubmitInfo2& submit, std::unique_ptr submission) override; virtual void notify_present(const VkPresentInfoKHR& present) override; + + public: + void notify_out_of_band(); }; } // namespace low_latency diff --git a/src/strategies/low_latency2/swapchain_monitor.cc b/src/strategies/low_latency2/swapchain_monitor.cc index 3c9b5e7..4c19251 100644 --- a/src/strategies/low_latency2/swapchain_monitor.cc +++ b/src/strategies/low_latency2/swapchain_monitor.cc @@ -2,8 +2,16 @@ #include "device_context.hh" #include "helper.hh" +#include + namespace low_latency { +SwapchainMonitor::SwapchainMonitor(const DeviceContext& device) + : device(device), + monitor_worker(std::bind_front(&SwapchainMonitor::do_monitor, this)) {} + +SwapchainMonitor::~SwapchainMonitor() {} + void SwapchainMonitor::WakeupSemaphore::signal( const DeviceContext& device) const { @@ -47,14 +55,28 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) { break; } - // Unlock, wait for work to finish, signal semaphore. + // Unlock, wait for work to finish, lock again. lock.unlock(); - // Ugly and duplicated - will fix this soon. - if (!semaphore_submission.submissions->empty()) { - semaphore_submission.submissions->back().end->await_time(); + for (const auto& submission : semaphore_submission.submissions) { + if (!submission.empty()) { + submission.back()->end->await_time(); + } } - // TODO add wait for frame pacing + lock.lock(); + using namespace std::chrono; + if (this->present_delay != 0us) { + const auto last_time = this->last_signal_time; + const auto delay = this->present_delay; + if (last_time.has_value()) { + lock.unlock(); + std::this_thread::sleep_until(*last_time + delay); + lock.lock(); + } + this->last_signal_time.emplace(steady_clock::now()); + } + lock.unlock(); + semaphore_submission.wakeup_semaphore.signal(this->device); } } @@ -74,8 +96,7 @@ void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore, } // Signal immediately if we have no outstanding work. - if (!this->pending_submissions) { - this->pending_submissions.reset(); + if (this->pending_submissions.empty()) { wakeup_semaphore.signal(this->device); return; } @@ -84,20 +105,19 @@ void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore, .wakeup_semaphore = wakeup_semaphore, .submissions = std::move(this->pending_submissions), }); - this->pending_submissions.reset(); + this->pending_submissions.clear(); lock.unlock(); this->cv.notify_one(); } void SwapchainMonitor::attach_work( - std::unique_ptr> submissions) { + std::vector>> submissions) { const auto lock = std::scoped_lock{this->mutex}; if (!this->was_low_latency_requested) { return; } - this->pending_submissions = std::move(submissions); } diff --git a/src/strategies/low_latency2/swapchain_monitor.hh b/src/strategies/low_latency2/swapchain_monitor.hh index 9031bbb..47c3a75 100644 --- a/src/strategies/low_latency2/swapchain_monitor.hh +++ b/src/strategies/low_latency2/swapchain_monitor.hh @@ -27,13 +27,14 @@ class SwapchainMonitor final { void signal(const DeviceContext& device) const; }; - std::unique_ptr> pending_submissions{}; + // An empty vector here represents our 'no work' state. + std::vector>> pending_submissions{}; // A pairing of semaphore -> submissions. // If the Submissions completes then signal the bundled semaphore. struct SemaphoreSubmissions { WakeupSemaphore wakeup_semaphore{}; - std::unique_ptr> submissions{}; + std::vector>> submissions{}; }; std::optional semaphore_submission{}; @@ -47,6 +48,8 @@ class SwapchainMonitor final { std::condition_variable_any cv{}; std::jthread monitor_worker{}; + std::optional last_signal_time; + void do_monitor(const std::stop_token stoken); public: @@ -64,7 +67,8 @@ class SwapchainMonitor final { void notify_semaphore(const VkSemaphore& timeline_semaphore, const std::uint64_t& value); - void attach_work(std::unique_ptr> submissions); + void attach_work( + std::vector>> submissions); }; } // namespace low_latency -- cgit v1.2.3