diff options
| author | Nicolas James <nj3ahxac@gmail.com> | 2026-04-06 17:03:35 +1000 |
|---|---|---|
| committer | Nicolas James <nj3ahxac@gmail.com> | 2026-04-06 17:03:35 +1000 |
| commit | 312d8736ae0df55c9f33e4eb5c00e4cd77e1c33f (patch) | |
| tree | 170816e372c43ddc6522e059f7d6fdc8757330f2 /src | |
| parent | a9a083ea5c649498d2f12e611dbc7c767d152130 (diff) | |
Add refactored VK_NV_low_latency2 impl, (fixes many threading issues)
Diffstat (limited to 'src')
| -rw-r--r-- | src/layer.cc | 31 | ||||
| -rw-r--r-- | src/physical_device_context.hh | 3 | ||||
| -rw-r--r-- | src/strategies/low_latency2/device_strategy.cc | 83 | ||||
| -rw-r--r-- | src/strategies/low_latency2/device_strategy.hh | 11 | ||||
| -rw-r--r-- | src/strategies/low_latency2/queue_strategy.cc | 86 | ||||
| -rw-r--r-- | src/strategies/low_latency2/queue_strategy.hh | 22 | ||||
| -rw-r--r-- | src/strategies/low_latency2/swapchain_monitor.cc | 40 | ||||
| -rw-r--r-- | src/strategies/low_latency2/swapchain_monitor.hh | 10 |
8 files changed, 246 insertions, 40 deletions
diff --git a/src/layer.cc b/src/layer.cc index 335ebf3..471d2fb 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -21,6 +21,8 @@ #include "layer_context.hh" #include "queue_context.hh" #include "strategies/anti_lag/device_strategy.hh" +#include "strategies/low_latency2/device_strategy.hh" +#include "strategies/low_latency2/queue_strategy.hh" #include "timestamp_pool.hh" namespace low_latency { @@ -775,8 +777,10 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateSwapchainKHR( return result; } - assert(pCreateInfo); - context->strategy->notify_create_swapchain(*pSwapchain, *pCreateInfo); + if (context->was_capability_requested) { + assert(pCreateInfo); + context->strategy->notify_create_swapchain(*pSwapchain, *pCreateInfo); + } return VK_SUCCESS; } @@ -788,7 +792,9 @@ DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, context->vtable.DestroySwapchainKHR(device, swapchain, pAllocator); - context->strategy->notify_destroy_swapchain(swapchain); + if (context->was_capability_requested) { + context->strategy->notify_destroy_swapchain(swapchain); + } } static VKAPI_ATTR void VKAPI_CALL @@ -809,6 +815,12 @@ VkResult LatencySleepNV(VkDevice device, const auto context = layer_context.get_context(device); assert(pSleepInfo); + // call device strategy notify semaphore, no problem :) + const auto strategy = + dynamic_cast<LowLatency2DeviceStrategy*>(context->strategy.get()); + assert(strategy); + strategy->notify_latency_sleep_nv(swapchain, *pSleepInfo); + return VK_SUCCESS; } @@ -817,9 +829,12 @@ void QueueNotifyOutOfBandNV( [[maybe_unused]] const VkOutOfBandQueueTypeInfoNV* pQueueTypeInfo) { // Kind of interesting how you can't turn it back on once it's turned off. - // Also I really have no idea why pQueueTypeInfo's VkOutOfBandQueueTypeNV - // enum even exists (I guess we will find out later when nothing works). const auto context = layer_context.get_context(queue); + + const auto strategy = + dynamic_cast<LowLatency2QueueStrategy*>(context->strategy.get()); + assert(strategy); + strategy->notify_out_of_band(); } VkResult SetLatencySleepModeNV( @@ -827,6 +842,12 @@ VkResult SetLatencySleepModeNV( [[maybe_unused]] const VkLatencySleepModeInfoNV* pSleepModeInfo) { const auto context = layer_context.get_context(device); + const auto strategy = + dynamic_cast<LowLatency2DeviceStrategy*>(context->strategy.get()); + assert(strategy); + + strategy->notify_latency_sleep_mode(swapchain, pSleepModeInfo); + return VK_SUCCESS; } diff --git a/src/physical_device_context.hh b/src/physical_device_context.hh index d2e094e..1feed6a 100644 --- a/src/physical_device_context.hh +++ b/src/physical_device_context.hh @@ -18,8 +18,7 @@ class PhysicalDeviceContext final : public Context { static constexpr auto required_extensions = { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME, - VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, - VK_KHR_PRESENT_ID_EXTENSION_NAME}; + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME}; public: InstanceContext& instance; diff --git a/src/strategies/low_latency2/device_strategy.cc b/src/strategies/low_latency2/device_strategy.cc index 3a970a2..18ea21f 100644 --- a/src/strategies/low_latency2/device_strategy.cc +++ b/src/strategies/low_latency2/device_strategy.cc @@ -1,4 +1,6 @@ #include "device_strategy.hh" +#include "device_context.hh" +#include "queue_strategy.hh" #include "helper.hh" #include <mutex> @@ -15,7 +17,10 @@ void LowLatency2DeviceStrategy::notify_create_swapchain( // VK_NV_low_latency2 allows a swapchain to be created with the low latency // mode already on via VkSwapchainLatencyCreateInfoNV. - auto was_low_latency_requested = bool{false}; + // Default to enabled - if the app is using VK_NV_low_latency2 at all it + // wants pacing. VkSwapchainLatencyCreateInfoNV can override this, but + // apps like CS2 recreate swapchains without it (apparent app bug). + auto was_low_latency_requested = bool{true}; if (const auto slci = find_next<VkSwapchainLatencyCreateInfoNV>( &info, VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV); slci) { @@ -38,4 +43,80 @@ void LowLatency2DeviceStrategy::notify_destroy_swapchain( this->swapchain_monitors.erase(swapchain); } +void LowLatency2DeviceStrategy::notify_latency_sleep_mode( + const VkSwapchainKHR& swapchain, + const VkLatencySleepModeInfoNV* const info) { + + const auto lock = std::shared_lock{this->mutex}; + + const auto iter = this->swapchain_monitors.find(swapchain); + assert(iter != std::end(this->swapchain_monitors)); + + using namespace std::chrono; + if (info) { + iter->second.update_params(info->lowLatencyMode, + microseconds{info->minimumIntervalUs}); + } else { + iter->second.update_params(false, 0us); + } +} + +void LowLatency2DeviceStrategy::submit_swapchain_present_id( + const VkSwapchainKHR& swapchain, const std::uint64_t& present_id) { + + // Iterate through all queues and grab any work that's associated with this + // present_id. Turn it into a vector of work that we give to our swapchain + // monitor. + auto work = [&]() -> std::vector<std::deque<std::unique_ptr<Submission>>> { + auto work = std::vector<std::deque<std::unique_ptr<Submission>>>{}; + const auto lock = std::scoped_lock{this->device.mutex}; + for (const auto& queue_iter : this->device.queues) { + const auto& queue = queue_iter.second; + + const auto strategy = + dynamic_cast<LowLatency2QueueStrategy*>(queue->strategy.get()); + assert(strategy); + + if (strategy->is_out_of_band.load(std::memory_order::relaxed)) { + continue; + } + + // Need the lock now - we're modifying it. + const auto strategy_lock = std::unique_lock{strategy->mutex}; + const auto iter = strategy->present_id_submissions.find(present_id); + if (iter == std::end(strategy->present_id_submissions)) { + continue; + } + + // Make sure we clean it up from the present as well. + work.push_back(std::move(iter->second)); + strategy->present_id_submissions.erase(iter); + } + return work; + }(); + + const auto lock = std::scoped_lock{this->mutex}; + + // Fail hard here, the swapchain must exist or something has gone wrong with + // Vulkan bookkeeping. + const auto iter = this->swapchain_monitors.find(swapchain); + assert(iter != std::end(this->swapchain_monitors)); + + // Notify our monitor that this work has to be completed before they signal + // whatever semaphore is currently sitting in it. + iter->second.attach_work(std::move(work)); +} + +void LowLatency2DeviceStrategy::notify_latency_sleep_nv( + const VkSwapchainKHR& swapchain, const VkLatencySleepInfoNV& info) { + + const auto lock = std::scoped_lock{this->mutex}; + + // Again, fail hard here - something has gone terribly wrong. + const auto iter = this->swapchain_monitors.find(swapchain); + assert(iter != std::end(this->swapchain_monitors)); + + iter->second.notify_semaphore(info.signalSemaphore, info.value); +} + } // namespace low_latency diff --git a/src/strategies/low_latency2/device_strategy.hh b/src/strategies/low_latency2/device_strategy.hh index af1b471..499b2aa 100644 --- a/src/strategies/low_latency2/device_strategy.hh +++ b/src/strategies/low_latency2/device_strategy.hh @@ -6,6 +6,7 @@ #include <shared_mutex> #include <unordered_map> +#include <vulkan/vulkan_core.h> namespace low_latency { @@ -27,6 +28,16 @@ class LowLatency2DeviceStrategy final : public DeviceStrategy { const VkSwapchainCreateInfoKHR& info) override; virtual void notify_destroy_swapchain(const VkSwapchainKHR& swapchain) override; + + public: + void submit_swapchain_present_id(const VkSwapchainKHR& swapchain, + const std::uint64_t& present_id); + + void notify_latency_sleep_mode(const VkSwapchainKHR& swapchain, + const VkLatencySleepModeInfoNV* const info); + + void notify_latency_sleep_nv(const VkSwapchainKHR& swapchain, + const VkLatencySleepInfoNV& info); }; } // namespace low_latency diff --git a/src/strategies/low_latency2/queue_strategy.cc b/src/strategies/low_latency2/queue_strategy.cc index 855ff5d..9a68b78 100644 --- a/src/strategies/low_latency2/queue_strategy.cc +++ b/src/strategies/low_latency2/queue_strategy.cc @@ -1,8 +1,10 @@ #include "queue_strategy.hh" +#include "device_context.hh" +#include "device_strategy.hh" #include "helper.hh" +#include "queue_context.hh" -#include <ranges> -#include <span> +#include <vulkan/vulkan_core.h> namespace low_latency { @@ -11,34 +13,80 @@ LowLatency2QueueStrategy::LowLatency2QueueStrategy(QueueContext& queue) LowLatency2QueueStrategy::~LowLatency2QueueStrategy() {} +template <typename T> +static void notify_submit_impl(LowLatency2QueueStrategy& strategy, + const T& submit, + std::unique_ptr<Submission> submission) { + + // It's actually not a requirement that we have this present id. + const auto lspi = find_next<VkLatencySubmissionPresentIdNV>( + &submit, VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV); + const auto present_id = lspi ? lspi->presentID : 0; + + const auto lock = std::scoped_lock{strategy.mutex}; + const auto [iter, inserted] = + strategy.present_id_submissions.try_emplace(present_id); + iter->second.push_back(std::move(submission)); + + // Remove stale submissions if we're presenting a lot to the same + // present_id. This doesn't affect anything because we're waiting on the + // last. It begs the question: should we should just store the last only? + if (std::size(iter->second) >= + LowLatency2QueueStrategy::MAX_TRACKED_OBJECTS) { + + iter->second.pop_front(); + } + + // Add our present_id to our ring tracking if it's non-zero. + if (inserted && present_id) { + strategy.present_id_ring.push_back(present_id); + } + + // Remove stale present_id's if they weren't presented to. + if (std::size(strategy.present_id_ring) > + LowLatency2QueueStrategy::MAX_TRACKED_OBJECTS) { + + const auto to_remove = strategy.present_id_ring.front(); + strategy.present_id_ring.pop_front(); + strategy.present_id_submissions.erase(to_remove); + } +} + void LowLatency2QueueStrategy::notify_submit( - [[maybe_unused]] const VkSubmitInfo& submit, - [[maybe_unused]] std::unique_ptr<Submission> submission) {} + const VkSubmitInfo& submit, std::unique_ptr<Submission> submission) { + + notify_submit_impl(*this, submit, std::move(submission)); +} void LowLatency2QueueStrategy::notify_submit( - [[maybe_unused]] const VkSubmitInfo2& submit, - [[maybe_unused]] std::unique_ptr<Submission> submission) {} + const VkSubmitInfo2& submit, std::unique_ptr<Submission> submission) { + + notify_submit_impl(*this, submit, std::move(submission)); +} void LowLatency2QueueStrategy::notify_present(const VkPresentInfoKHR& present) { const auto pid = find_next<VkPresentIdKHR>(&present, VK_STRUCTURE_TYPE_PRESENT_ID_KHR); - // All submissions should be tagged with a present_id. If it isn't, I'm not - // going to fail hard here - we will just ignore it. - if (!pid) { - return; - } - - const auto swapchains = - std::span{present.pSwapchains, present.swapchainCount}; - const auto present_ids = - std::span{pid->pPresentIds, present.swapchainCount}; - for (const auto& [swapchain, present_id] : - std::views::zip(swapchains, present_ids)) { + const auto device_strategy = dynamic_cast<LowLatency2DeviceStrategy*>( + this->queue.device.strategy.get()); + assert(device_strategy); - // TODO + for (auto i = std::uint32_t{0}; i < present.swapchainCount; ++i) { + const auto& swapchain = present.pSwapchains[i]; + const auto present_id = [&]() -> std::uint64_t { + if (pid && pid->pPresentIds) { + return pid->pPresentIds[i]; + } + return 0; + }(); + device_strategy->submit_swapchain_present_id(swapchain, present_id); } } +void LowLatency2QueueStrategy::notify_out_of_band() { + this->is_out_of_band.store(true, std::memory_order_relaxed); +} + } // namespace low_latency diff --git a/src/strategies/low_latency2/queue_strategy.hh b/src/strategies/low_latency2/queue_strategy.hh index 223f559..a090e1b 100644 --- a/src/strategies/low_latency2/queue_strategy.hh +++ b/src/strategies/low_latency2/queue_strategy.hh @@ -2,6 +2,13 @@ #define STRATEGIES_LOW_LATENCY2_QUEUE_STRATEGY_HH_ #include "strategies/queue_strategy.hh" +#include "submission.hh" + +#include <atomic> +#include <deque> +#include <memory> +#include <mutex> +#include <unordered_map> namespace low_latency { @@ -9,6 +16,18 @@ class QueueContext; class LowLatency2QueueStrategy final : public QueueStrategy { public: + static constexpr auto MAX_TRACKED_OBJECTS = 50; + + // Mapping of present_id's to submissions. Grabbed later by the device + // strategy when we present and actually can associate them to some + // vkSwapchainKHR. + std::mutex mutex{}; + std::unordered_map<std::uint64_t, std::deque<std::unique_ptr<Submission>>> + present_id_submissions{}; + std::deque<std::uint64_t> present_id_ring{}; + std::atomic<bool> is_out_of_band{}; // atomic so we don't need a lock check + + public: LowLatency2QueueStrategy(QueueContext& queue); virtual ~LowLatency2QueueStrategy(); @@ -18,6 +37,9 @@ class LowLatency2QueueStrategy final : public QueueStrategy { virtual void notify_submit(const VkSubmitInfo2& submit, std::unique_ptr<Submission> submission) override; virtual void notify_present(const VkPresentInfoKHR& present) override; + + public: + void notify_out_of_band(); }; } // namespace low_latency diff --git a/src/strategies/low_latency2/swapchain_monitor.cc b/src/strategies/low_latency2/swapchain_monitor.cc index 3c9b5e7..4c19251 100644 --- a/src/strategies/low_latency2/swapchain_monitor.cc +++ b/src/strategies/low_latency2/swapchain_monitor.cc @@ -2,8 +2,16 @@ #include "device_context.hh" #include "helper.hh" +#include <functional> + namespace low_latency { +SwapchainMonitor::SwapchainMonitor(const DeviceContext& device) + : device(device), + monitor_worker(std::bind_front(&SwapchainMonitor::do_monitor, this)) {} + +SwapchainMonitor::~SwapchainMonitor() {} + void SwapchainMonitor::WakeupSemaphore::signal( const DeviceContext& device) const { @@ -47,14 +55,28 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) { break; } - // Unlock, wait for work to finish, signal semaphore. + // Unlock, wait for work to finish, lock again. lock.unlock(); - // Ugly and duplicated - will fix this soon. - if (!semaphore_submission.submissions->empty()) { - semaphore_submission.submissions->back().end->await_time(); + for (const auto& submission : semaphore_submission.submissions) { + if (!submission.empty()) { + submission.back()->end->await_time(); + } } - // TODO add wait for frame pacing + lock.lock(); + using namespace std::chrono; + if (this->present_delay != 0us) { + const auto last_time = this->last_signal_time; + const auto delay = this->present_delay; + if (last_time.has_value()) { + lock.unlock(); + std::this_thread::sleep_until(*last_time + delay); + lock.lock(); + } + this->last_signal_time.emplace(steady_clock::now()); + } + lock.unlock(); + semaphore_submission.wakeup_semaphore.signal(this->device); } } @@ -74,8 +96,7 @@ void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore, } // Signal immediately if we have no outstanding work. - if (!this->pending_submissions) { - this->pending_submissions.reset(); + if (this->pending_submissions.empty()) { wakeup_semaphore.signal(this->device); return; } @@ -84,20 +105,19 @@ void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore, .wakeup_semaphore = wakeup_semaphore, .submissions = std::move(this->pending_submissions), }); - this->pending_submissions.reset(); + this->pending_submissions.clear(); lock.unlock(); this->cv.notify_one(); } void SwapchainMonitor::attach_work( - std::unique_ptr<std::deque<Submission>> submissions) { + std::vector<std::deque<std::unique_ptr<Submission>>> submissions) { const auto lock = std::scoped_lock{this->mutex}; if (!this->was_low_latency_requested) { return; } - this->pending_submissions = std::move(submissions); } diff --git a/src/strategies/low_latency2/swapchain_monitor.hh b/src/strategies/low_latency2/swapchain_monitor.hh index 9031bbb..47c3a75 100644 --- a/src/strategies/low_latency2/swapchain_monitor.hh +++ b/src/strategies/low_latency2/swapchain_monitor.hh @@ -27,13 +27,14 @@ class SwapchainMonitor final { void signal(const DeviceContext& device) const; }; - std::unique_ptr<std::deque<Submission>> pending_submissions{}; + // An empty vector here represents our 'no work' state. + std::vector<std::deque<std::unique_ptr<Submission>>> pending_submissions{}; // A pairing of semaphore -> submissions. // If the Submissions completes then signal the bundled semaphore. struct SemaphoreSubmissions { WakeupSemaphore wakeup_semaphore{}; - std::unique_ptr<std::deque<Submission>> submissions{}; + std::vector<std::deque<std::unique_ptr<Submission>>> submissions{}; }; std::optional<SemaphoreSubmissions> semaphore_submission{}; @@ -47,6 +48,8 @@ class SwapchainMonitor final { std::condition_variable_any cv{}; std::jthread monitor_worker{}; + std::optional<std::chrono::steady_clock::time_point> last_signal_time; + void do_monitor(const std::stop_token stoken); public: @@ -64,7 +67,8 @@ class SwapchainMonitor final { void notify_semaphore(const VkSemaphore& timeline_semaphore, const std::uint64_t& value); - void attach_work(std::unique_ptr<std::deque<Submission>> submissions); + void attach_work( + std::vector<std::deque<std::unique_ptr<Submission>>> submissions); }; } // namespace low_latency |
