diff options
| author | Nicolas James <nj3ahxac@gmail.com> | 2026-04-08 12:55:29 +1000 |
|---|---|---|
| committer | Nicolas James <nj3ahxac@gmail.com> | 2026-04-08 12:55:29 +1000 |
| commit | 453d5b0052bd17ed74d47570ffff403ffcd9ebb3 (patch) | |
| tree | 14ab71a5ff27d5485faea061f6e8fe26c0e19640 /src | |
| parent | eb9719cc8b9a308654ccd2c3bce8a7047b6e2a1a (diff) | |
Fix refactor latency regression for VK_NV_LowLatency2 by checking if work has already completed
Diffstat (limited to 'src')
| -rw-r--r-- | src/frame_span.cc | 7 | ||||
| -rw-r--r-- | src/frame_span.hh | 2 | ||||
| -rw-r--r-- | src/strategies/low_latency2/queue_strategy.cc | 9 | ||||
| -rw-r--r-- | src/strategies/low_latency2/queue_strategy.hh | 3 | ||||
| -rw-r--r-- | src/strategies/low_latency2/swapchain_monitor.cc | 27 | ||||
| -rw-r--r-- | src/strategies/low_latency2/swapchain_monitor.hh | 7 | ||||
| -rw-r--r-- | src/timestamp_pool.cc | 33 | ||||
| -rw-r--r-- | src/timestamp_pool.hh | 10 |
8 files changed, 78 insertions, 20 deletions
diff --git a/src/frame_span.cc b/src/frame_span.cc index 732d6f3..8764aa1 100644 --- a/src/frame_span.cc +++ b/src/frame_span.cc @@ -21,4 +21,11 @@ void FrameSpan::await_completed() const { this->head_handle->await_end(); } +bool FrameSpan::has_completed() const { + if (this->tail_handle) { + return this->tail_handle->has_end(); + } + return this->head_handle->has_end(); +} + } // namespace low_latency
\ No newline at end of file diff --git a/src/frame_span.hh b/src/frame_span.hh index 5220702..638554b 100644 --- a/src/frame_span.hh +++ b/src/frame_span.hh @@ -28,6 +28,8 @@ class FrameSpan { void update(std::shared_ptr<TimestampPool::Handle> handle); public: + // Check if GPU work has completed without hanging. + bool has_completed() const; // Wait for for GPU work to complete. void await_completed() const; }; diff --git a/src/strategies/low_latency2/queue_strategy.cc b/src/strategies/low_latency2/queue_strategy.cc index a020c0d..e3ed808 100644 --- a/src/strategies/low_latency2/queue_strategy.cc +++ b/src/strategies/low_latency2/queue_strategy.cc @@ -29,15 +29,14 @@ static void notify_submit_impl(LowLatency2QueueStrategy& strategy, const auto [iter, inserted] = strategy.frame_spans.try_emplace(present_id); if (inserted) { iter->second = std::make_unique<FrameSpan>(std::move(handle)); + // Add our present_id to our ring tracking if it's non-zero. + if (present_id) { + strategy.stale_present_ids.push_back(present_id); + } } else { iter->second->update(std::move(handle)); } - // Add our present_id to our ring tracking if it's non-zero. - if (inserted && present_id) { - strategy.stale_present_ids.push_back(present_id); - } - // Remove stale present_id's if they weren't presented to. if (std::size(strategy.stale_present_ids) > LowLatency2QueueStrategy::MAX_TRACKED_PRESENTS) { diff --git a/src/strategies/low_latency2/queue_strategy.hh b/src/strategies/low_latency2/queue_strategy.hh index 6d41027..2a03c91 100644 --- a/src/strategies/low_latency2/queue_strategy.hh +++ b/src/strategies/low_latency2/queue_strategy.hh @@ -16,6 +16,9 @@ class QueueContext; class LowLatency2QueueStrategy final : public QueueStrategy { public: + // It's possible that our tracking for present_ids grows without a limit if + // present isn't called. To guard against this, we store the last unique + // MAX_TRACKED_PRESENTS and use it to evict stale submissions. static constexpr auto MAX_TRACKED_PRESENTS = 50; // Mapping of present_id's to submissions. Grabbed later by the device diff --git a/src/strategies/low_latency2/swapchain_monitor.cc b/src/strategies/low_latency2/swapchain_monitor.cc index a70fa6c..7442eec 100644 --- a/src/strategies/low_latency2/swapchain_monitor.cc +++ b/src/strategies/low_latency2/swapchain_monitor.cc @@ -35,33 +35,33 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) { for (;;) { auto lock = std::unique_lock{this->mutex}; this->cv.wait(lock, stoken, - [&]() { return this->semaphore_spans.has_value(); }); + [&]() { return !this->pending_signals.empty(); }); // Stop only if we're stopped and we have nothing to signal. - if (stoken.stop_requested() && !this->semaphore_spans.has_value()) { + if (stoken.stop_requested() && this->pending_signals.empty()) { break; } // Grab the most recent semaphore. When work completes, signal it. - const auto semaphore_span = std::move(*this->semaphore_spans); - this->semaphore_spans.reset(); + const auto pending_signal = std::move(this->pending_signals.front()); + this->pending_signals.pop_front(); // If we're stopping, signal the semaphore and don't worry about work // actually completing. if (stoken.stop_requested()) { - semaphore_span.wakeup_semaphore.signal(this->device); + pending_signal.wakeup_semaphore.signal(this->device); break; } // Unlock, wait for work to finish, lock again. lock.unlock(); - for (const auto& frame_span : semaphore_span.frame_spans) { + for (const auto& frame_span : pending_signal.frame_spans) { if (frame_span) { frame_span->await_completed(); } } - lock.lock(); + using namespace std::chrono; if (this->present_delay != 0us) { const auto last_time = this->last_signal_time; @@ -75,7 +75,7 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) { } lock.unlock(); - semaphore_span.wakeup_semaphore.signal(this->device); + pending_signal.wakeup_semaphore.signal(this->device); } } @@ -94,12 +94,19 @@ void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore, } // Signal immediately if we have no outstanding work. - if (this->pending_frame_spans.empty()) { + if (std::ranges::all_of(this->pending_frame_spans, + [](const auto& frame_span) { + if (!frame_span) { + return true; + } + return frame_span->has_completed(); + })) { wakeup_semaphore.signal(this->device); + this->pending_signals.clear(); return; } - this->semaphore_spans.emplace(SemaphoreSpans{ + this->pending_signals.emplace_back(PendingSignal{ .wakeup_semaphore = wakeup_semaphore, .frame_spans = std::move(this->pending_frame_spans), }); diff --git a/src/strategies/low_latency2/swapchain_monitor.hh b/src/strategies/low_latency2/swapchain_monitor.hh index 837f8e4..a5f8362 100644 --- a/src/strategies/low_latency2/swapchain_monitor.hh +++ b/src/strategies/low_latency2/swapchain_monitor.hh @@ -26,16 +26,13 @@ class SwapchainMonitor final { void signal(const DeviceContext& device) const; }; - // An empty vector here represents our 'no work' state. std::vector<std::unique_ptr<FrameSpan>> pending_frame_spans{}; - // A pairing of semaphore -> submissions. - // If the Submissions completes then signal the bundled semaphore. - struct SemaphoreSpans { + struct PendingSignal { WakeupSemaphore wakeup_semaphore{}; std::vector<std::unique_ptr<FrameSpan>> frame_spans{}; }; - std::optional<SemaphoreSpans> semaphore_spans{}; + std::deque<PendingSignal> pending_signals{}; protected: const DeviceContext& device; diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc index afb12f7..d84169d 100644 --- a/src/timestamp_pool.cc +++ b/src/timestamp_pool.cc @@ -207,6 +207,39 @@ TimestampPool::Handle::await_time_impl(const std::uint32_t offset) const { void TimestampPool::Handle::await_start() const { this->await_time_impl(0); } void TimestampPool::Handle::await_end() const { this->await_time_impl(1); } +std::optional<std::uint64_t> +TimestampPool::Handle::has_time_impl(const std::uint32_t offset) const { + + const auto& context = this->timestamp_pool.queue_context.device; + const auto& vtable = context.vtable; + const auto& query_pool = *this->query_chunk.query_pool; + + auto query_result = std::array<std::uint64_t, 2>{}; + + const auto result = vtable.GetQueryPoolResults( + context.device, query_pool, this->query_index + offset, 1, + sizeof(query_result), &query_result, sizeof(query_result), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); + + if (result != VK_NOT_READY && result != VK_SUCCESS) { + throw result; + } + + if (!query_result[1]) { + return std::nullopt; + } + return query_result[0]; +} + +// Checks if the time is available - doesn't block. +bool TimestampPool::Handle::has_start() const { + return this->has_time_impl(0).has_value(); +} + +bool TimestampPool::Handle::has_end() const { + return this->has_time_impl(1).has_value(); +} + TimestampPool::~TimestampPool() {} } // namespace low_latency
\ No newline at end of file diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh index 809c6a4..767455a 100644 --- a/src/timestamp_pool.hh +++ b/src/timestamp_pool.hh @@ -10,6 +10,7 @@ #include <deque> #include <memory> #include <mutex> +#include <optional> #include <thread> #include <unordered_set> #include <vector> @@ -130,6 +131,15 @@ class TimestampPool final { // Blocks until the time is available. void await_start() const; void await_end() const; + + private: + std::optional<std::uint64_t> + has_time_impl(const std::uint32_t offset) const; + + public: + // Checks if the time is available - doesn't block. + bool has_start() const; + bool has_end() const; }; private: |
