From cf0bced6cd86782e9706acda1b3b6ce6b4e98481 Mon Sep 17 00:00:00 2001 From: Nicolas James Date: Sun, 5 Apr 2026 22:01:11 +1000 Subject: Implement refactored AL2, todo frame limit --- src/device_context.hh | 5 +-- src/layer.cc | 45 ++++++++++---------- src/strategies/anti_lag/device_strategy.cc | 59 +++++++++++++++++++++++++++ src/strategies/anti_lag/device_strategy.hh | 17 ++++++++ src/strategies/anti_lag/queue_strategy.cc | 52 +++++++++++++++++++++++ src/strategies/anti_lag/queue_strategy.hh | 19 +++++++++ src/strategies/device_strategy.hh | 4 ++ src/strategies/low_latency2/queue_strategy.cc | 8 ++++ src/strategies/low_latency2/queue_strategy.hh | 6 +++ src/strategies/queue_strategy.hh | 12 ++++++ src/submission.cc | 3 ++ src/submission.hh | 16 ++++++++ src/submissions.cc | 0 src/submissions.hh | 12 ------ 14 files changed, 221 insertions(+), 37 deletions(-) create mode 100644 src/submission.cc create mode 100644 src/submission.hh delete mode 100644 src/submissions.cc delete mode 100644 src/submissions.hh diff --git a/src/device_context.hh b/src/device_context.hh index 975d67c..950d132 100644 --- a/src/device_context.hh +++ b/src/device_context.hh @@ -2,6 +2,7 @@ #define DEVICE_CONTEXT_HH_ #include +#include #include #include @@ -22,17 +23,15 @@ class DeviceContext final : public Context { public: InstanceContext& instance; PhysicalDeviceContext& physical_device; - // Whether or not we were asked to do NV_VK_LowLatency2 or VK_AMD_anti_lag // at the device level. const bool was_capability_requested; - const VkDevice device; const VkuDeviceDispatchTable vtable; + std::shared_mutex mutex; std::unique_ptr clock; std::unordered_map> queues; - std::unique_ptr strategy; public: diff --git a/src/layer.cc b/src/layer.cc index 516f9d3..627fee2 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -20,6 +20,7 @@ #include "instance_context.hh" #include "layer_context.hh" #include "queue_context.hh" +#include "strategies/anti_lag/device_strategy.hh" #include "timestamp_pool.hh" namespace low_latency { @@ -307,7 +308,7 @@ GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index, // insert a nullptr key, then it didn't already exist so we should // construct a new one. const auto key = layer_context.get_key(*queue); - const auto lock = std::scoped_lock{layer_context.mutex}; + const auto layer_lock = std::scoped_lock{layer_context.mutex}; const auto [it, inserted] = layer_context.contexts.try_emplace(key); if (inserted) { it->second = std::make_shared(*context, *queue, @@ -317,6 +318,7 @@ GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index, // it->second should be QueueContext, also it might already be there. const auto ptr = std::dynamic_pointer_cast(it->second); assert(ptr); + const auto device_lock = std::scoped_lock{context->mutex}; context->queues.emplace(*queue, ptr); } @@ -341,6 +343,7 @@ static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2( const auto ptr = std::dynamic_pointer_cast(it->second); assert(ptr); + const auto device_lock = std::scoped_lock{context->mutex}; context->queues.emplace(*queue, ptr); } @@ -400,10 +403,16 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, [&](const auto& submit) { const auto head_handle = context->timestamp_pool->acquire(); head_handle->write_command(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); - const auto tail_handle = context->timestamp_pool->acquire(); tail_handle->write_command(VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); + context->strategy->notify_submit( + submit, std::make_unique(Submission{ + .start = head_handle, + .end = tail_handle, + .time = now, + })); + handles.emplace_back(head_handle); handles.emplace_back(tail_handle); next_cbs.emplace_back([&]() -> auto { @@ -445,7 +454,7 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, auto next_cbs = std::vector>{}; auto handles = std::vector>{}; - [[maybe_unused]] const auto now = DeviceClock::now(); + const auto now = DeviceClock::now(); std::ranges::transform( std::span{submit_infos, submit_count}, std::back_inserter(next_submits), @@ -455,6 +464,13 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, const auto tail_handle = context->timestamp_pool->acquire(); tail_handle->write_command(VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); + context->strategy->notify_submit( + submit, std::make_unique(Submission{ + .start = head_handle, + .end = tail_handle, + .time = now, + })); + handles.emplace_back(head_handle); handles.emplace_back(tail_handle); next_cbs.emplace_back([&]() -> auto { @@ -786,25 +802,10 @@ AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD* pData) { const auto context = layer_context.get_context(device); assert(pData); - // AL2 is a synchronous while NVIDIA's low_latencty2 is asynchronous. - // It's difficult to model an asynchronous impl inside a synchronous impl, - // but it's easy to do the inverse. AMD's extension piggybacks on NVIDIA's - // more complicated implementation. - - [[maybe_unused]] const auto present_delay = - [&]() -> std::chrono::milliseconds { - using namespace std::chrono; - if (!pData->maxFPS) { - return 0ms; - } - return duration_cast(1s / pData->maxFPS); - }(); - - if (!pData->pPresentationInfo || - pData->pPresentationInfo->stage != VK_ANTI_LAG_STAGE_INPUT_AMD) { - - return; - } + const auto strategy = + dynamic_cast(context->strategy.get()); + assert(strategy); + strategy->notify_update(*pData); } VkResult LatencySleepNV(VkDevice device, diff --git a/src/strategies/anti_lag/device_strategy.cc b/src/strategies/anti_lag/device_strategy.cc index 5032c97..8a32daa 100644 --- a/src/strategies/anti_lag/device_strategy.cc +++ b/src/strategies/anti_lag/device_strategy.cc @@ -1,4 +1,9 @@ #include "device_strategy.hh" +#include "device_context.hh" + +#include "queue_strategy.hh" + +#include namespace low_latency { @@ -7,4 +12,58 @@ AntiLagDeviceStrategy::AntiLagDeviceStrategy(DeviceContext& device) AntiLagDeviceStrategy::~AntiLagDeviceStrategy() {} +void AntiLagDeviceStrategy::notify_update(const VkAntiLagDataAMD& data) { + const auto lock = std::scoped_lock{this->mutex}; + + this->is_enabled = !(data.mode == VK_ANTI_LAG_MODE_OFF_AMD); + + this->delay = [&]() -> std::chrono::microseconds { + using namespace std::chrono; + if (!data.maxFPS) { + return 0us; + } + return duration_cast(1s / data.maxFPS); + }(); + + if (!data.pPresentationInfo) { + return; + } + + // If we're at the input stage, start marking submissions as relevant. + // If we're at the present stage, stop collecting submissions by making + // our frame_index nullopt. + if (data.pPresentationInfo->stage == VK_ANTI_LAG_STAGE_PRESENT_AMD) { + this->frame_index.reset(); + return; + } + this->frame_index.emplace(data.pPresentationInfo->frameIndex); + + // We're in input now. Wait for all queue submissions to complete. + const auto device_lock = std::shared_lock{this->device.mutex}; + for (const auto& iter : this->device.queues) { + const auto& queue = iter.second; + + const auto strategy = + dynamic_cast(queue->strategy.get()); + assert(strategy); + + strategy->await_complete(); + } +} + +bool AntiLagDeviceStrategy::should_track_submissions() { + const auto lock = std::shared_lock{this->mutex}; + + if (!this->is_enabled) { + return false; + } + + // Don't track submissions if our frame index is nullopt! + if (!this->frame_index.has_value()) { + return false; + } + + return true; +} + } // namespace low_latency \ No newline at end of file diff --git a/src/strategies/anti_lag/device_strategy.hh b/src/strategies/anti_lag/device_strategy.hh index 8a9afee..46197b0 100644 --- a/src/strategies/anti_lag/device_strategy.hh +++ b/src/strategies/anti_lag/device_strategy.hh @@ -3,14 +3,31 @@ #include "strategies/device_strategy.hh" +#include + +#include +#include + namespace low_latency { class DeviceContext; class AntiLagDeviceStrategy final : public DeviceStrategy { + private: + std::shared_mutex mutex{}; + // If this is nullopt don't track the submission. + std::optional frame_index{}; + std::chrono::microseconds delay{}; + bool is_enabled{}; + public: AntiLagDeviceStrategy(DeviceContext& device); virtual ~AntiLagDeviceStrategy(); + + public: + void notify_update(const VkAntiLagDataAMD& data); + + bool should_track_submissions(); }; } // namespace low_latency diff --git a/src/strategies/anti_lag/queue_strategy.cc b/src/strategies/anti_lag/queue_strategy.cc index ba60535..9dbe127 100644 --- a/src/strategies/anti_lag/queue_strategy.cc +++ b/src/strategies/anti_lag/queue_strategy.cc @@ -1,4 +1,7 @@ #include "queue_strategy.hh" +#include "device_context.hh" +#include "device_strategy.hh" +#include "queue_context.hh" namespace low_latency { @@ -7,4 +10,53 @@ AntiLagQueueStrategy::AntiLagQueueStrategy(QueueContext& queue) AntiLagQueueStrategy::~AntiLagQueueStrategy() {} +void AntiLagQueueStrategy::notify_submit( + [[maybe_unused]] const VkSubmitInfo& submit, + std::unique_ptr submission) { + + const auto strategy = + dynamic_cast(this->queue.device.strategy.get()); + assert(strategy); + if (!strategy->should_track_submissions()) { + return; + } + + const auto lock = std::scoped_lock(this->mutex); + this->pending_submissions.push_back(std::move(submission)); +} + +void AntiLagQueueStrategy::notify_submit( + [[maybe_unused]] const VkSubmitInfo2& submit, + std::unique_ptr submission) { + + const auto strategy = + dynamic_cast(this->queue.device.strategy.get()); + assert(strategy); + if (!strategy->should_track_submissions()) { + return; + } + + const auto lock = std::scoped_lock(this->mutex); + this->pending_submissions.push_back(std::move(submission)); +} + +void AntiLagQueueStrategy::await_complete() { + + // Grab submissions while under a lock. + const auto submissions = [&]() -> std::deque> { + const auto lock = std::scoped_lock{this->mutex}; + + auto submissions = std::move(this->pending_submissions); + this->pending_submissions.clear(); + return submissions; + }(); + + // Wait for completion on the last submission. + if (submissions.empty()) { + return; + } + const auto& last = submissions.back(); + last->end->await_time(); +} + } // namespace low_latency diff --git a/src/strategies/anti_lag/queue_strategy.hh b/src/strategies/anti_lag/queue_strategy.hh index 81ae653..3887474 100644 --- a/src/strategies/anti_lag/queue_strategy.hh +++ b/src/strategies/anti_lag/queue_strategy.hh @@ -3,14 +3,33 @@ #include "strategies/queue_strategy.hh" +#include +#include +#include + namespace low_latency { class QueueContext; class AntiLagQueueStrategy final : public QueueStrategy { + private: + std::mutex mutex; + std::deque> pending_submissions; + public: AntiLagQueueStrategy(QueueContext& queue); virtual ~AntiLagQueueStrategy(); + + public: + virtual void notify_submit(const VkSubmitInfo& submit, + std::unique_ptr submission) override; + virtual void notify_submit(const VkSubmitInfo2& submit, + std::unique_ptr submission) override; + + public: + // Wait for all pending submissions to complete. Resets pending submissions + // once done. + void await_complete(); }; } // namespace low_latency diff --git a/src/strategies/device_strategy.hh b/src/strategies/device_strategy.hh index 1b95e11..7b7bacd 100644 --- a/src/strategies/device_strategy.hh +++ b/src/strategies/device_strategy.hh @@ -6,11 +6,15 @@ namespace low_latency { class DeviceContext; class DeviceStrategy { + protected: DeviceContext& device; public: DeviceStrategy(DeviceContext& device); virtual ~DeviceStrategy(); + + public: + }; } // namespace low_latency diff --git a/src/strategies/low_latency2/queue_strategy.cc b/src/strategies/low_latency2/queue_strategy.cc index 85e1aae..e67d279 100644 --- a/src/strategies/low_latency2/queue_strategy.cc +++ b/src/strategies/low_latency2/queue_strategy.cc @@ -7,4 +7,12 @@ LowLatency2QueueStrategy::LowLatency2QueueStrategy(QueueContext& queue) LowLatency2QueueStrategy::~LowLatency2QueueStrategy() {} +void LowLatency2QueueStrategy::notify_submit( + [[maybe_unused]] const VkSubmitInfo& submit, + [[maybe_unused]] std::unique_ptr submission) {} + +void LowLatency2QueueStrategy::notify_submit( + [[maybe_unused]] const VkSubmitInfo2& submit, + [[maybe_unused]] std::unique_ptr submission) {} + } // namespace low_latency diff --git a/src/strategies/low_latency2/queue_strategy.hh b/src/strategies/low_latency2/queue_strategy.hh index 9688cf4..ad31df4 100644 --- a/src/strategies/low_latency2/queue_strategy.hh +++ b/src/strategies/low_latency2/queue_strategy.hh @@ -11,6 +11,12 @@ class LowLatency2QueueStrategy final : public QueueStrategy { public: LowLatency2QueueStrategy(QueueContext& queue); virtual ~LowLatency2QueueStrategy(); + + public: + virtual void notify_submit(const VkSubmitInfo& submit, + std::unique_ptr submission) override; + virtual void notify_submit(const VkSubmitInfo2& submit, + std::unique_ptr submission) override; }; } // namespace low_latency diff --git a/src/strategies/queue_strategy.hh b/src/strategies/queue_strategy.hh index 0b9edc8..37bad02 100644 --- a/src/strategies/queue_strategy.hh +++ b/src/strategies/queue_strategy.hh @@ -1,16 +1,28 @@ #ifndef STRATEGIES_QUEUE_STRATEGY_HH_ #define STRATEGIES_QUEUE_STRATEGY_HH_ +#include "submission.hh" +#include "timestamp_pool.hh" + +#include + namespace low_latency { class QueueContext; class QueueStrategy { + protected: QueueContext& queue; public: QueueStrategy(QueueContext& queue); virtual ~QueueStrategy(); + + public: + virtual void notify_submit(const VkSubmitInfo& submit, + std::unique_ptr submission) = 0; + virtual void notify_submit(const VkSubmitInfo2& submit, + std::unique_ptr submission) = 0; }; } // namespace low_latency diff --git a/src/submission.cc b/src/submission.cc new file mode 100644 index 0000000..1bf61cd --- /dev/null +++ b/src/submission.cc @@ -0,0 +1,3 @@ +#include "submission.hh" + +namespace low_latency {} \ No newline at end of file diff --git a/src/submission.hh b/src/submission.hh new file mode 100644 index 0000000..69ec576 --- /dev/null +++ b/src/submission.hh @@ -0,0 +1,16 @@ +#ifndef SUBMISSIONS_HH_ +#define SUBMISSIONS_HH_ + +#include "device_clock.hh" +#include "timestamp_pool.hh" + +namespace low_latency { + +struct Submission { + std::shared_ptr start, end; + DeviceClock::time_point_t time; +}; + +} // namespace low_latency + +#endif \ No newline at end of file diff --git a/src/submissions.cc b/src/submissions.cc deleted file mode 100644 index e69de29..0000000 diff --git a/src/submissions.hh b/src/submissions.hh deleted file mode 100644 index 41c963f..0000000 --- a/src/submissions.hh +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef SUBMISSIONS_HH_ -#define SUBMISSIONS_HH_ - -// - -class Submissions { - - - -}; - -#endif \ No newline at end of file -- cgit v1.2.3