From a9a083ea5c649498d2f12e611dbc7c767d152130 Mon Sep 17 00:00:00 2001 From: Nicolas James Date: Mon, 6 Apr 2026 12:18:10 +1000 Subject: Add WIP refactored reflex impl --- src/layer.cc | 28 ++---- src/layer_context.hh | 6 +- src/strategies/anti_lag/device_strategy.cc | 10 ++- src/strategies/anti_lag/device_strategy.hh | 9 +- src/strategies/anti_lag/queue_strategy.cc | 3 + src/strategies/anti_lag/queue_strategy.hh | 1 + src/strategies/device_strategy.hh | 8 +- src/strategies/low_latency2/device_strategy.cc | 31 +++++++ src/strategies/low_latency2/device_strategy.hh | 16 ++++ src/strategies/low_latency2/queue_strategy.cc | 26 ++++++ src/strategies/low_latency2/queue_strategy.hh | 1 + src/strategies/low_latency2/swapchain_monitor.cc | 104 +++++++++++++++++++++++ src/strategies/low_latency2/swapchain_monitor.hh | 72 ++++++++++++++++ src/strategies/queue_strategy.hh | 1 + src/submission.cc | 4 +- src/submission.hh | 3 +- 16 files changed, 293 insertions(+), 30 deletions(-) create mode 100644 src/strategies/low_latency2/swapchain_monitor.cc create mode 100644 src/strategies/low_latency2/swapchain_monitor.hh (limited to 'src') diff --git a/src/layer.cc b/src/layer.cc index 1a91dd0..335ebf3 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -527,16 +527,8 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { return result; } - const auto pid = find_next( - present_info, VK_STRUCTURE_TYPE_PRESENT_ID_KHR); - - for (auto i = std::uint32_t{0}; i < present_info->swapchainCount; ++i) { - [[maybe_unused]] const auto& swapchain = present_info->pSwapchains[i]; - - // For VK_AMD_anti_lag, providing a pPresentId isn't part of the spec. - // So we just set it to 0 if it isn't provided. - [[maybe_unused]] const auto present_id = pid ? pid->pPresentIds[i] : 0; - } + assert(present_info); + context->strategy->notify_present(*present_info); return result; } @@ -559,7 +551,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL EnumerateDeviceExtensionProperties( physical_device, pLayerName, pPropertyCount, pProperties); } - // If we're exposing reflex we want to provide their extension instead. + // If we're exposing reflex we want to provide that extension instead. const auto extension_properties = [&]() -> VkExtensionProperties { if (context->instance.layer.should_expose_reflex) { return {.extensionName = VK_NV_LOW_LATENCY_2_EXTENSION_NAME, @@ -783,16 +775,8 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateSwapchainKHR( return result; } - // VK_NV_low_latency2 allows a swapchain to be created with the low latency - // mode already on via VkSwapchainLatencyCreateInfoNV. - [[maybe_unused]] auto was_low_latency_requested = - true; // enable by default? - if (const auto slci = find_next( - pCreateInfo, VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV); - slci) { - - was_low_latency_requested = slci->latencyModeEnable; - } + assert(pCreateInfo); + context->strategy->notify_create_swapchain(*pSwapchain, *pCreateInfo); return VK_SUCCESS; } @@ -803,6 +787,8 @@ DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, const auto context = layer_context.get_context(device); context->vtable.DestroySwapchainKHR(device, swapchain, pAllocator); + + context->strategy->notify_destroy_swapchain(swapchain); } static VKAPI_ATTR void VKAPI_CALL diff --git a/src/layer_context.hh b/src/layer_context.hh index 5c16926..e7b24b3 100644 --- a/src/layer_context.hh +++ b/src/layer_context.hh @@ -1,7 +1,7 @@ #ifndef LAYER_CONTEXT_HH_ #define LAYER_CONTEXT_HH_ -#include +#include #include #include @@ -63,7 +63,7 @@ class LayerContext final : public Context { static constexpr auto NVIDIA_DEVICE_NAME = "NVIDIA GeForce RTX 5090"; public: - std::mutex mutex; + std::shared_mutex mutex; std::unordered_map> contexts; bool should_expose_reflex = false; @@ -82,7 +82,7 @@ class LayerContext final : public Context { std::shared_ptr> get_context(const DT& dt) { const auto key = get_key(dt); - const auto lock = std::scoped_lock{this->mutex}; + const auto lock = std::shared_lock{this->mutex}; const auto it = this->contexts.find(key); assert(it != std::end(this->contexts)); diff --git a/src/strategies/anti_lag/device_strategy.cc b/src/strategies/anti_lag/device_strategy.cc index df11dca..b391371 100644 --- a/src/strategies/anti_lag/device_strategy.cc +++ b/src/strategies/anti_lag/device_strategy.cc @@ -54,8 +54,7 @@ void AntiLagDeviceStrategy::notify_update(const VkAntiLagDataAMD& data) { } } - // We might need to wait a little more time to meet our frame limit, if - // necessary. + // We might need to wait a little more time to meet our frame limit. using namespace std::chrono; if (this->delay != 0us && this->previous_input_release.has_value()) { std::this_thread::sleep_until(*this->previous_input_release + @@ -80,4 +79,11 @@ bool AntiLagDeviceStrategy::should_track_submissions() { return true; } +// Stub - anti_lag doesn't differentiate between swapchains. +void AntiLagDeviceStrategy::notify_create_swapchain( + const VkSwapchainKHR&, const VkSwapchainCreateInfoKHR&) {} + +// Stub - again, AL doesn't care about swapchains. +void AntiLagDeviceStrategy::notify_destroy_swapchain(const VkSwapchainKHR&) {} + } // namespace low_latency \ No newline at end of file diff --git a/src/strategies/anti_lag/device_strategy.hh b/src/strategies/anti_lag/device_strategy.hh index 836e748..3533647 100644 --- a/src/strategies/anti_lag/device_strategy.hh +++ b/src/strategies/anti_lag/device_strategy.hh @@ -18,7 +18,7 @@ class AntiLagDeviceStrategy final : public DeviceStrategy { std::shared_mutex mutex{}; // If this is nullopt don't track the submission. std::optional frame_index{}; - std::optional previous_input_release; + std::optional previous_input_release{}; std::chrono::microseconds delay{}; bool is_enabled{}; @@ -26,6 +26,13 @@ class AntiLagDeviceStrategy final : public DeviceStrategy { AntiLagDeviceStrategy(DeviceContext& device); virtual ~AntiLagDeviceStrategy(); + public: + virtual void + notify_create_swapchain(const VkSwapchainKHR& swapchain, + const VkSwapchainCreateInfoKHR& info) override; + virtual void + notify_destroy_swapchain(const VkSwapchainKHR& swapchain) override; + public: void notify_update(const VkAntiLagDataAMD& data); diff --git a/src/strategies/anti_lag/queue_strategy.cc b/src/strategies/anti_lag/queue_strategy.cc index 9dbe127..0834a96 100644 --- a/src/strategies/anti_lag/queue_strategy.cc +++ b/src/strategies/anti_lag/queue_strategy.cc @@ -59,4 +59,7 @@ void AntiLagQueueStrategy::await_complete() { last->end->await_time(); } +// Stub - AntiLag doesn't care about presents. +void AntiLagQueueStrategy::notify_present(const VkPresentInfoKHR&) {} + } // namespace low_latency diff --git a/src/strategies/anti_lag/queue_strategy.hh b/src/strategies/anti_lag/queue_strategy.hh index 3887474..37c44a5 100644 --- a/src/strategies/anti_lag/queue_strategy.hh +++ b/src/strategies/anti_lag/queue_strategy.hh @@ -25,6 +25,7 @@ class AntiLagQueueStrategy final : public QueueStrategy { std::unique_ptr submission) override; virtual void notify_submit(const VkSubmitInfo2& submit, std::unique_ptr submission) override; + virtual void notify_present(const VkPresentInfoKHR& present) override; public: // Wait for all pending submissions to complete. Resets pending submissions diff --git a/src/strategies/device_strategy.hh b/src/strategies/device_strategy.hh index 7b7bacd..0fd7acd 100644 --- a/src/strategies/device_strategy.hh +++ b/src/strategies/device_strategy.hh @@ -1,6 +1,9 @@ #ifndef STRATEGIES_DEVICE_STRATEGY_HH_ #define STRATEGIES_DEVICE_STRATEGY_HH_ +#include +#include + namespace low_latency { class DeviceContext; @@ -14,7 +17,10 @@ class DeviceStrategy { virtual ~DeviceStrategy(); public: - + virtual void + notify_create_swapchain(const VkSwapchainKHR& swapchain, + const VkSwapchainCreateInfoKHR& info) = 0; + virtual void notify_destroy_swapchain(const VkSwapchainKHR& swapchain) = 0; }; } // namespace low_latency diff --git a/src/strategies/low_latency2/device_strategy.cc b/src/strategies/low_latency2/device_strategy.cc index 7c10088..3a970a2 100644 --- a/src/strategies/low_latency2/device_strategy.cc +++ b/src/strategies/low_latency2/device_strategy.cc @@ -1,5 +1,8 @@ #include "device_strategy.hh" +#include "helper.hh" +#include + namespace low_latency { LowLatency2DeviceStrategy::LowLatency2DeviceStrategy(DeviceContext& device) @@ -7,4 +10,32 @@ LowLatency2DeviceStrategy::LowLatency2DeviceStrategy(DeviceContext& device) LowLatency2DeviceStrategy::~LowLatency2DeviceStrategy() {} +void LowLatency2DeviceStrategy::notify_create_swapchain( + const VkSwapchainKHR& swapchain, const VkSwapchainCreateInfoKHR& info) { + + // VK_NV_low_latency2 allows a swapchain to be created with the low latency + // mode already on via VkSwapchainLatencyCreateInfoNV. + auto was_low_latency_requested = bool{false}; + if (const auto slci = find_next( + &info, VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV); + slci) { + + was_low_latency_requested = slci->latencyModeEnable; + } + + const auto lock = std::scoped_lock{this->mutex}; + const auto iter = this->swapchain_monitors.emplace(swapchain, this->device); + assert(iter.second); + iter.first->second.update_params(was_low_latency_requested, + std::chrono::microseconds{0}); +} + +void LowLatency2DeviceStrategy::notify_destroy_swapchain( + const VkSwapchainKHR& swapchain) { + + const auto lock = std::scoped_lock{this->mutex}; + + this->swapchain_monitors.erase(swapchain); +} + } // namespace low_latency diff --git a/src/strategies/low_latency2/device_strategy.hh b/src/strategies/low_latency2/device_strategy.hh index 18f8bd9..af1b471 100644 --- a/src/strategies/low_latency2/device_strategy.hh +++ b/src/strategies/low_latency2/device_strategy.hh @@ -2,15 +2,31 @@ #define STRATEGIES_LOW_LATENCY2_DEVICE_STRATEGY_HH_ #include "strategies/device_strategy.hh" +#include "swapchain_monitor.hh" + +#include +#include namespace low_latency { class DeviceContext; class LowLatency2DeviceStrategy final : public DeviceStrategy { + private: + std::shared_mutex mutex; + // swapchain -> swapchain monitor + std::unordered_map swapchain_monitors; + public: LowLatency2DeviceStrategy(DeviceContext& device); virtual ~LowLatency2DeviceStrategy(); + + public: + virtual void + notify_create_swapchain(const VkSwapchainKHR& swapchain, + const VkSwapchainCreateInfoKHR& info) override; + virtual void + notify_destroy_swapchain(const VkSwapchainKHR& swapchain) override; }; } // namespace low_latency diff --git a/src/strategies/low_latency2/queue_strategy.cc b/src/strategies/low_latency2/queue_strategy.cc index e67d279..855ff5d 100644 --- a/src/strategies/low_latency2/queue_strategy.cc +++ b/src/strategies/low_latency2/queue_strategy.cc @@ -1,4 +1,8 @@ #include "queue_strategy.hh" +#include "helper.hh" + +#include +#include namespace low_latency { @@ -15,4 +19,26 @@ void LowLatency2QueueStrategy::notify_submit( [[maybe_unused]] const VkSubmitInfo2& submit, [[maybe_unused]] std::unique_ptr submission) {} +void LowLatency2QueueStrategy::notify_present(const VkPresentInfoKHR& present) { + + const auto pid = + find_next(&present, VK_STRUCTURE_TYPE_PRESENT_ID_KHR); + + // All submissions should be tagged with a present_id. If it isn't, I'm not + // going to fail hard here - we will just ignore it. + if (!pid) { + return; + } + + const auto swapchains = + std::span{present.pSwapchains, present.swapchainCount}; + const auto present_ids = + std::span{pid->pPresentIds, present.swapchainCount}; + for (const auto& [swapchain, present_id] : + std::views::zip(swapchains, present_ids)) { + + // TODO + } +} + } // namespace low_latency diff --git a/src/strategies/low_latency2/queue_strategy.hh b/src/strategies/low_latency2/queue_strategy.hh index ad31df4..223f559 100644 --- a/src/strategies/low_latency2/queue_strategy.hh +++ b/src/strategies/low_latency2/queue_strategy.hh @@ -17,6 +17,7 @@ class LowLatency2QueueStrategy final : public QueueStrategy { std::unique_ptr submission) override; virtual void notify_submit(const VkSubmitInfo2& submit, std::unique_ptr submission) override; + virtual void notify_present(const VkPresentInfoKHR& present) override; }; } // namespace low_latency diff --git a/src/strategies/low_latency2/swapchain_monitor.cc b/src/strategies/low_latency2/swapchain_monitor.cc new file mode 100644 index 0000000..3c9b5e7 --- /dev/null +++ b/src/strategies/low_latency2/swapchain_monitor.cc @@ -0,0 +1,104 @@ +#include "swapchain_monitor.hh" +#include "device_context.hh" +#include "helper.hh" + +namespace low_latency { + +void SwapchainMonitor::WakeupSemaphore::signal( + const DeviceContext& device) const { + + const auto ssi = + VkSemaphoreSignalInfo{.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, + .semaphore = this->timeline_semaphore, + .value = this->value}; + THROW_NOT_VKSUCCESS(device.vtable.SignalSemaphore(device.device, &ssi)); +} + +void SwapchainMonitor::update_params(const bool was_low_latency_requested, + const std::chrono::microseconds delay) { + + const auto lock = std::scoped_lock{this->mutex}; + + this->was_low_latency_requested = was_low_latency_requested; + this->present_delay = delay; +} + +void SwapchainMonitor::do_monitor(const std::stop_token stoken) { + for (;;) { + auto lock = std::unique_lock{this->mutex}; + this->cv.wait(lock, stoken, + [&]() { return this->semaphore_submission.has_value(); }); + + // Stop only if we're stopped and we have nothing to signal. + if (stoken.stop_requested() && + !this->semaphore_submission.has_value()) { + break; + } + + // Grab the most recent semaphore. When work completes, signal it. + const auto semaphore_submission = + std::move(*this->semaphore_submission); + this->semaphore_submission.reset(); + + // If we're stopping, signal the semaphore and don't worry about work + // actually completing. + if (stoken.stop_requested()) { + semaphore_submission.wakeup_semaphore.signal(this->device); + break; + } + + // Unlock, wait for work to finish, signal semaphore. + lock.unlock(); + // Ugly and duplicated - will fix this soon. + if (!semaphore_submission.submissions->empty()) { + semaphore_submission.submissions->back().end->await_time(); + } + + // TODO add wait for frame pacing + semaphore_submission.wakeup_semaphore.signal(this->device); + } +} + +void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore, + const std::uint64_t& value) { + + auto lock = std::unique_lock{this->mutex}; + + const auto wakeup_semaphore = WakeupSemaphore{ + .timeline_semaphore = timeline_semaphore, .value = value}; + + // Signal immediately if reflex is off or it's a no-op submit. + if (!this->was_low_latency_requested) { + wakeup_semaphore.signal(this->device); + return; + } + + // Signal immediately if we have no outstanding work. + if (!this->pending_submissions) { + this->pending_submissions.reset(); + wakeup_semaphore.signal(this->device); + return; + } + + this->semaphore_submission.emplace(SemaphoreSubmissions{ + .wakeup_semaphore = wakeup_semaphore, + .submissions = std::move(this->pending_submissions), + }); + this->pending_submissions.reset(); + + lock.unlock(); + this->cv.notify_one(); +} + +void SwapchainMonitor::attach_work( + std::unique_ptr> submissions) { + + const auto lock = std::scoped_lock{this->mutex}; + if (!this->was_low_latency_requested) { + return; + } + + this->pending_submissions = std::move(submissions); +} + +} // namespace low_latency \ No newline at end of file diff --git a/src/strategies/low_latency2/swapchain_monitor.hh b/src/strategies/low_latency2/swapchain_monitor.hh new file mode 100644 index 0000000..9031bbb --- /dev/null +++ b/src/strategies/low_latency2/swapchain_monitor.hh @@ -0,0 +1,72 @@ + +#ifndef SWAPCHAIN_MONITOR_HH_ +#define SWAPCHAIN_MONITOR_HH_ + +#include + +#include +#include +#include +#include +#include +#include + +#include "submission.hh" + +namespace low_latency { + +class DeviceContext; + +class SwapchainMonitor final { + private: + struct WakeupSemaphore { + VkSemaphore timeline_semaphore{}; + std::uint64_t value{}; + + public: + void signal(const DeviceContext& device) const; + }; + + std::unique_ptr> pending_submissions{}; + + // A pairing of semaphore -> submissions. + // If the Submissions completes then signal the bundled semaphore. + struct SemaphoreSubmissions { + WakeupSemaphore wakeup_semaphore{}; + std::unique_ptr> submissions{}; + }; + std::optional semaphore_submission{}; + + protected: + const DeviceContext& device; + + std::mutex mutex{}; + std::chrono::microseconds present_delay{}; + bool was_low_latency_requested{}; + + std::condition_variable_any cv{}; + std::jthread monitor_worker{}; + + void do_monitor(const std::stop_token stoken); + + public: + SwapchainMonitor(const DeviceContext& device); + SwapchainMonitor(const SwapchainMonitor&) = delete; + SwapchainMonitor(SwapchainMonitor&&) = delete; + SwapchainMonitor operator=(const SwapchainMonitor&) = delete; + SwapchainMonitor operator=(SwapchainMonitor&&) = delete; + ~SwapchainMonitor(); + + public: + void update_params(const bool was_low_latency_requested, + const std::chrono::microseconds delay); + + void notify_semaphore(const VkSemaphore& timeline_semaphore, + const std::uint64_t& value); + + void attach_work(std::unique_ptr> submissions); +}; + +} // namespace low_latency + +#endif \ No newline at end of file diff --git a/src/strategies/queue_strategy.hh b/src/strategies/queue_strategy.hh index 37bad02..b4fbcb9 100644 --- a/src/strategies/queue_strategy.hh +++ b/src/strategies/queue_strategy.hh @@ -23,6 +23,7 @@ class QueueStrategy { std::unique_ptr submission) = 0; virtual void notify_submit(const VkSubmitInfo2& submit, std::unique_ptr submission) = 0; + virtual void notify_present(const VkPresentInfoKHR& present) = 0; }; } // namespace low_latency diff --git a/src/submission.cc b/src/submission.cc index 1bf61cd..1a6cb72 100644 --- a/src/submission.cc +++ b/src/submission.cc @@ -1,3 +1,5 @@ #include "submission.hh" -namespace low_latency {} \ No newline at end of file +namespace low_latency { + +} // namespace low_latency \ No newline at end of file diff --git a/src/submission.hh b/src/submission.hh index 69ec576..e14f3f0 100644 --- a/src/submission.hh +++ b/src/submission.hh @@ -6,7 +6,8 @@ namespace low_latency { -struct Submission { +class Submission { + public: std::shared_ptr start, end; DeviceClock::time_point_t time; }; -- cgit v1.2.3