diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/device_context.cc | 4 | ||||
| -rw-r--r-- | src/device_context.hh | 2 | ||||
| -rw-r--r-- | src/layer.cc | 6 | ||||
| -rw-r--r-- | src/layer_context.hh | 4 | ||||
| -rw-r--r-- | src/queue_context.cc | 56 | ||||
| -rw-r--r-- | src/queue_context.hh | 42 | ||||
| -rw-r--r-- | src/swapchain_monitor.cc | 48 | ||||
| -rw-r--r-- | src/swapchain_monitor.hh | 16 | ||||
| -rw-r--r-- | src/timestamp_pool.cc | 2 |
9 files changed, 122 insertions, 58 deletions
diff --git a/src/device_context.cc b/src/device_context.cc index b52fec4..33f2aa4 100644 --- a/src/device_context.cc +++ b/src/device_context.cc @@ -34,7 +34,7 @@ void DeviceContext::update_params( const bool was_low_latency_requested) { // If we don't have a target (AMD's anti_lag doesn't differentiate between - // swapchains), just write it to everything. + // swapchains) just write it to everything. if (!target.has_value()) { for (auto& iter : this->swapchain_monitors) { iter.second->update_params(was_low_latency_requested, present_delay); @@ -49,7 +49,7 @@ void DeviceContext::update_params( void DeviceContext::notify_present( const VkSwapchainKHR& swapchain, - const QueueContext::submissions_t& submissions) { + const QueueContext::submissions_ptr_t& submissions) { const auto iter = this->swapchain_monitors.find(swapchain); assert(iter != std::end(this->swapchain_monitors)); diff --git a/src/device_context.hh b/src/device_context.hh index a46f479..ed2991b 100644 --- a/src/device_context.hh +++ b/src/device_context.hh @@ -53,7 +53,7 @@ class DeviceContext final : public Context { const bool was_low_latency_requested); void notify_present(const VkSwapchainKHR& swapchain, - const QueueContext::submissions_t& submissions); + const QueueContext::submissions_ptr_t& submissions); }; }; // namespace low_latency diff --git a/src/layer.cc b/src/layer.cc index cf9f56e..2743030 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -756,11 +756,11 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateSwapchainKHR( } auto insertion = [&]() -> std::unique_ptr<SwapchainMonitor> { - if (!layer_context.should_expose_reflex) { - return std::make_unique<AntiLagSwapchainMonitor>( + if (layer_context.should_expose_reflex) { + return std::make_unique<ReflexSwapchainMonitor>( *context, was_low_latency_requested); } - return std::make_unique<ReflexSwapchainMonitor>( + return std::make_unique<AntiLagSwapchainMonitor>( *context, was_low_latency_requested); }(); const auto did_emplace = context->swapchain_monitors diff --git a/src/layer_context.hh b/src/layer_context.hh index 4979379..049684d 100644 --- a/src/layer_context.hh +++ b/src/layer_context.hh @@ -52,8 +52,8 @@ class LayerContext final : public Context { // provided instead of VK_AMD_anti_lag. static constexpr auto EXPOSE_REFLEX_ENV = "LOW_LATENCY_LAYER_EXPOSE_REFLEX"; - // If this is not null and set to 1 then the card's vendor and id will be - // spoofed to appear as a NVIDIA card. + // If this is not null and set to 1 then the card's vendor, id, and device + // name will be modified to appear as a NVIDIA card. static constexpr auto SPOOF_NVIDIA_ENV = "LOW_LATENCY_LAYER_SPOOF_NVIDIA"; public: diff --git a/src/queue_context.cc b/src/queue_context.cc index 84b06fe..e9f9c3c 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -47,6 +47,45 @@ QueueContext::~QueueContext() { this->timestamp_pool.reset(); } +QueueContext::Submissions::Submissions() {} + +QueueContext::Submissions::~Submissions() {} + +void QueueContext::Submissions::add_submission( + const std::shared_ptr<TimestampPool::Handle> head, + const std::shared_ptr<TimestampPool::Handle> tail, + const DeviceClock::time_point_t& now) { + + this->submissions.emplace_back(std::make_unique<Submission>(Submission{ + .head_handle = head, + .tail_handle = tail, + .cpu_present_time = now, + })); + + // Manual eviction of likely irrelevant timing information. + if (std::size(this->submissions) > this->MAX_TRACKED_SUBMISSIONS) { + this->submissions.pop_front(); + } +} + +bool QueueContext::Submissions::has_completed() const { + if (this->submissions.empty()) { + return true; + } + + const auto& last_submission = this->submissions.back(); + return last_submission->tail_handle->get_time().has_value(); +} + +void QueueContext::Submissions::await_completed() const { + if (this->submissions.empty()) { + return; + } + + const auto& last_submission = this->submissions.back(); + last_submission->tail_handle->await_time(); +} + void QueueContext::notify_submit( const present_id_t& present_id, const std::shared_ptr<TimestampPool::Handle> head_handle, @@ -57,23 +96,13 @@ void QueueContext::notify_submit( // mapping (might be empty, but handled with operator[]). auto& submissions = this->unpresented_submissions[present_id]; if (submissions == nullptr) { - submissions = - std::make_shared<std::deque<std::unique_ptr<Submission>>>(); - + submissions = std::make_shared<Submissions>(); if (present_id) { this->present_id_ring.emplace_back(present_id); } } - submissions->push_back( - std::make_unique<Submission>(Submission{.head_handle = head_handle, - .tail_handle = tail_handle, - .cpu_present_time = now})); - - // This is probably hit if our queue never actually presents to anything. - if (std::size(*submissions) > this->MAX_TRACKED_SUBMISSIONS) { - submissions->pop_front(); - } + submissions->add_submission(head_handle, tail_handle, now); if (std::size(this->present_id_ring) > MAX_TRACKED_PRESENT_IDS) { const auto evicted_present_id = this->present_id_ring.front(); @@ -90,8 +119,7 @@ void QueueContext::notify_present(const VkSwapchainKHR& swapchain, // We're avoiding a double hash here - don't use operator[] and erase. auto iter = this->unpresented_submissions.try_emplace(present_id).first; if (iter->second == nullptr) { - iter->second = - std::make_shared<std::deque<std::unique_ptr<Submission>>>(); + iter->second = std::make_shared<Submissions>(); } this->device.notify_present(swapchain, iter->second); diff --git a/src/queue_context.hh b/src/queue_context.hh index be73adc..2abd44c 100644 --- a/src/queue_context.hh +++ b/src/queue_context.hh @@ -16,10 +16,6 @@ namespace low_latency { class QueueContext final : public Context { private: - // The amount of queue submissions we allow tracked per queue before - // we give up tracking them. This is neccessary for queues which do not - // present anything. - static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u; static constexpr auto MAX_TRACKED_PRESENT_IDS = 50u; public: @@ -70,15 +66,41 @@ class QueueContext final : public Context { // and notify our device that it needs to watch for when this completes. // We give it our submissions. Now, it's out of our hands. We remove the // present_id_t mapping when doing so. - struct Submission { - std::shared_ptr<TimestampPool::Handle> head_handle, tail_handle; - DeviceClock::time_point_t cpu_present_time; + + class Submissions final { + // The amount of queue submissions we allow tracked per queue before + // we give up tracking them. This is neccessary for queues which do not + // present anything. + static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u; + + struct Submission final { + std::shared_ptr<TimestampPool::Handle> head_handle, tail_handle; + DeviceClock::time_point_t cpu_present_time; + }; + std::deque<std::unique_ptr<Submission>> submissions; + + public: + Submissions(); + Submissions(const Submissions&) = delete; + Submissions(Submissions&&) = delete; + Submissions operator=(const Submissions&) = delete; + Submissions operator=(Submissions&&) = delete; + ~Submissions(); + + public: + void add_submission(const std::shared_ptr<TimestampPool::Handle> head, + const std::shared_ptr<TimestampPool::Handle> tail, + const DeviceClock::time_point_t& now); + + // Non-blocking - true if this submission has completed on the GPU. + bool has_completed() const; + // Blocking wait until the last submission has completed. + void await_completed() const; }; - using submissions_t = - std::shared_ptr<std::deque<std::unique_ptr<Submission>>>; using present_id_t = std::uint64_t; - std::unordered_map<present_id_t, submissions_t> unpresented_submissions; + using submissions_ptr_t = std::shared_ptr<Submissions>; + std::unordered_map<present_id_t, submissions_ptr_t> unpresented_submissions; // We might be tracking present_ids which aren't presented to - and as a // result we don't ever clear those Submissions. So manually evict them by diff --git a/src/swapchain_monitor.cc b/src/swapchain_monitor.cc index f12bafa..bc4fc9b 100644 --- a/src/swapchain_monitor.cc +++ b/src/swapchain_monitor.cc @@ -23,6 +23,18 @@ void SwapchainMonitor::update_params( this->present_delay = present_delay; } +void SwapchainMonitor::prune_submissions() { + // If our submissions grow too large, we should delete them from our + // tracking. It would be nice if this was handled elegantly by some custom + // container and we didn't have to call this manually each time we insert. + // Also this exact logic is repeated in QueueContext's Submission. + if (std::size(this->in_flight_submissions) > + this->MAX_TRACKED_IN_FLIGHT_SUBMISSIONS) { + + this->in_flight_submissions.pop_front(); + } +} + ReflexSwapchainMonitor::ReflexSwapchainMonitor( const DeviceContext& device, const bool was_low_latency_requested) : SwapchainMonitor(device, was_low_latency_requested), @@ -55,12 +67,10 @@ void ReflexSwapchainMonitor::do_monitor(const std::stop_token stoken) { // Look for the latest submission and make sure it's completed. if (!this->in_flight_submissions.empty()) { - const auto submission = this->in_flight_submissions.back(); + const auto last_submission = this->in_flight_submissions.back(); this->in_flight_submissions.clear(); - if (!submission->empty()) { - submission->back()->tail_handle->await_time(); - } + last_submission->await_completed(); } // We might want to signal them all? In theory it's the same timeline @@ -80,7 +90,6 @@ void ReflexSwapchainMonitor::notify_semaphore( const auto wakeup_semaphore = WakeupSemaphore{ .timeline_semaphore = timeline_semaphore, .value = value}; - // Signal immediately if low_latency isn't requested or if we have no // outstanding work. if (!this->was_low_latency_requested || @@ -95,7 +104,7 @@ void ReflexSwapchainMonitor::notify_semaphore( } void ReflexSwapchainMonitor::notify_present( - const QueueContext::submissions_t& submissions) { + const QueueContext::submissions_ptr_t& submissions) { const auto lock = std::scoped_lock{this->mutex}; @@ -104,17 +113,17 @@ void ReflexSwapchainMonitor::notify_present( } // Fast path where this work has already completed. - if (!this->wakeup_semaphores.empty() && !submissions->empty()) { - - const auto& finished = submissions->back()->tail_handle->get_time(); - if (finished.has_value()) { - this->wakeup_semaphores.back().signal(this->device); - this->wakeup_semaphores.clear(); - return; - } + // In this case, don't wake up the thread. We can just signal + // what we have immediately on this thread. + if (!this->wakeup_semaphores.empty() && submissions->has_completed()) { + this->wakeup_semaphores.back().signal(this->device); + this->wakeup_semaphores.clear(); + return; } this->in_flight_submissions.emplace_back(submissions); + this->prune_submissions(); + this->cv.notify_one(); } @@ -123,15 +132,15 @@ AntiLagSwapchainMonitor::AntiLagSwapchainMonitor( : SwapchainMonitor(device, was_low_latency_requested) {} AntiLagSwapchainMonitor::~AntiLagSwapchainMonitor() {} - void AntiLagSwapchainMonitor::notify_present( - const QueueContext::submissions_t& submissions) { + const QueueContext::submissions_ptr_t& submissions) { if (!this->was_low_latency_requested) { return; } this->in_flight_submissions.emplace_back(submissions); + this->prune_submissions(); } void AntiLagSwapchainMonitor::await_submissions() { @@ -139,13 +148,10 @@ void AntiLagSwapchainMonitor::await_submissions() { return; } - const auto last_submissions = this->in_flight_submissions.back(); + const auto last_submission = this->in_flight_submissions.back(); this->in_flight_submissions.clear(); - if (last_submissions->empty()) { - return; - } - last_submissions->back()->tail_handle->await_time(); + last_submission->await_completed(); } } // namespace low_latency
\ No newline at end of file diff --git a/src/swapchain_monitor.hh b/src/swapchain_monitor.hh index b993b83..295ac5b 100644 --- a/src/swapchain_monitor.hh +++ b/src/swapchain_monitor.hh @@ -21,6 +21,9 @@ class DeviceContext; // currently have an option to frame pace, to disable low_latency mode // (become a no-op), and must track in_flight_submissions to function. class SwapchainMonitor { + private: + static constexpr auto MAX_TRACKED_IN_FLIGHT_SUBMISSIONS = 50u; + protected: const DeviceContext& device; @@ -28,7 +31,12 @@ class SwapchainMonitor { std::chrono::milliseconds present_delay = std::chrono::milliseconds{0}; bool was_low_latency_requested = false; - std::deque<QueueContext::submissions_t> in_flight_submissions; + std::deque<QueueContext::submissions_ptr_t> in_flight_submissions; + + protected: + // Small fix to avoid submissions growing limitlessly in size if this + // swapchain is never presented to. + void prune_submissions(); public: SwapchainMonitor(const DeviceContext& device, @@ -45,7 +53,7 @@ class SwapchainMonitor { public: virtual void - notify_present(const QueueContext::submissions_t& submissions) = 0; + notify_present(const QueueContext::submissions_ptr_t& submissions) = 0; }; // Provides asynchronous monitoring of submissions and signalling of some @@ -79,7 +87,7 @@ class ReflexSwapchainMonitor final : public SwapchainMonitor { public: virtual void - notify_present(const QueueContext::submissions_t& submissions) override; + notify_present(const QueueContext::submissions_ptr_t& submissions) override; }; // Much simpler synchronous waiting with no thread requirement. @@ -95,7 +103,7 @@ class AntiLagSwapchainMonitor final : public SwapchainMonitor { public: virtual void - notify_present(const QueueContext::submissions_t& submissions) override; + notify_present(const QueueContext::submissions_ptr_t& submissions) override; }; } // namespace low_latency diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc index a618cfb..2d43c58 100644 --- a/src/timestamp_pool.cc +++ b/src/timestamp_pool.cc @@ -105,7 +105,7 @@ std::shared_ptr<TimestampPool::Handle> TimestampPool::acquire() { const auto query_index = *std::begin(query_chunk.free_indices); query_chunk.free_indices.erase(query_index); - // Custom deleter function that puts the handle on our async deleter queue. + // Custom deleter function that puts the handle on our async reaper queue. const auto reaper_deleter = [this](Handle* const handle) { if (!handle) { return; |
