aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNicolas James <nj3ahxac@gmail.com>2026-04-08 12:55:29 +1000
committerNicolas James <nj3ahxac@gmail.com>2026-04-08 12:55:29 +1000
commit453d5b0052bd17ed74d47570ffff403ffcd9ebb3 (patch)
tree14ab71a5ff27d5485faea061f6e8fe26c0e19640 /src
parenteb9719cc8b9a308654ccd2c3bce8a7047b6e2a1a (diff)
Fix refactor latency regression for VK_NV_LowLatency2 by checking if work has already completed
Diffstat (limited to 'src')
-rw-r--r--src/frame_span.cc7
-rw-r--r--src/frame_span.hh2
-rw-r--r--src/strategies/low_latency2/queue_strategy.cc9
-rw-r--r--src/strategies/low_latency2/queue_strategy.hh3
-rw-r--r--src/strategies/low_latency2/swapchain_monitor.cc27
-rw-r--r--src/strategies/low_latency2/swapchain_monitor.hh7
-rw-r--r--src/timestamp_pool.cc33
-rw-r--r--src/timestamp_pool.hh10
8 files changed, 78 insertions, 20 deletions
diff --git a/src/frame_span.cc b/src/frame_span.cc
index 732d6f3..8764aa1 100644
--- a/src/frame_span.cc
+++ b/src/frame_span.cc
@@ -21,4 +21,11 @@ void FrameSpan::await_completed() const {
this->head_handle->await_end();
}
+bool FrameSpan::has_completed() const {
+ if (this->tail_handle) {
+ return this->tail_handle->has_end();
+ }
+ return this->head_handle->has_end();
+}
+
} // namespace low_latency \ No newline at end of file
diff --git a/src/frame_span.hh b/src/frame_span.hh
index 5220702..638554b 100644
--- a/src/frame_span.hh
+++ b/src/frame_span.hh
@@ -28,6 +28,8 @@ class FrameSpan {
void update(std::shared_ptr<TimestampPool::Handle> handle);
public:
+ // Check if GPU work has completed without hanging.
+ bool has_completed() const;
// Wait for for GPU work to complete.
void await_completed() const;
};
diff --git a/src/strategies/low_latency2/queue_strategy.cc b/src/strategies/low_latency2/queue_strategy.cc
index a020c0d..e3ed808 100644
--- a/src/strategies/low_latency2/queue_strategy.cc
+++ b/src/strategies/low_latency2/queue_strategy.cc
@@ -29,15 +29,14 @@ static void notify_submit_impl(LowLatency2QueueStrategy& strategy,
const auto [iter, inserted] = strategy.frame_spans.try_emplace(present_id);
if (inserted) {
iter->second = std::make_unique<FrameSpan>(std::move(handle));
+ // Add our present_id to our ring tracking if it's non-zero.
+ if (present_id) {
+ strategy.stale_present_ids.push_back(present_id);
+ }
} else {
iter->second->update(std::move(handle));
}
- // Add our present_id to our ring tracking if it's non-zero.
- if (inserted && present_id) {
- strategy.stale_present_ids.push_back(present_id);
- }
-
// Remove stale present_id's if they weren't presented to.
if (std::size(strategy.stale_present_ids) >
LowLatency2QueueStrategy::MAX_TRACKED_PRESENTS) {
diff --git a/src/strategies/low_latency2/queue_strategy.hh b/src/strategies/low_latency2/queue_strategy.hh
index 6d41027..2a03c91 100644
--- a/src/strategies/low_latency2/queue_strategy.hh
+++ b/src/strategies/low_latency2/queue_strategy.hh
@@ -16,6 +16,9 @@ class QueueContext;
class LowLatency2QueueStrategy final : public QueueStrategy {
public:
+ // It's possible that our tracking for present_ids grows without a limit if
+ // present isn't called. To guard against this, we store the last unique
+ // MAX_TRACKED_PRESENTS and use it to evict stale submissions.
static constexpr auto MAX_TRACKED_PRESENTS = 50;
// Mapping of present_id's to submissions. Grabbed later by the device
diff --git a/src/strategies/low_latency2/swapchain_monitor.cc b/src/strategies/low_latency2/swapchain_monitor.cc
index a70fa6c..7442eec 100644
--- a/src/strategies/low_latency2/swapchain_monitor.cc
+++ b/src/strategies/low_latency2/swapchain_monitor.cc
@@ -35,33 +35,33 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) {
for (;;) {
auto lock = std::unique_lock{this->mutex};
this->cv.wait(lock, stoken,
- [&]() { return this->semaphore_spans.has_value(); });
+ [&]() { return !this->pending_signals.empty(); });
// Stop only if we're stopped and we have nothing to signal.
- if (stoken.stop_requested() && !this->semaphore_spans.has_value()) {
+ if (stoken.stop_requested() && this->pending_signals.empty()) {
break;
}
// Grab the most recent semaphore. When work completes, signal it.
- const auto semaphore_span = std::move(*this->semaphore_spans);
- this->semaphore_spans.reset();
+ const auto pending_signal = std::move(this->pending_signals.front());
+ this->pending_signals.pop_front();
// If we're stopping, signal the semaphore and don't worry about work
// actually completing.
if (stoken.stop_requested()) {
- semaphore_span.wakeup_semaphore.signal(this->device);
+ pending_signal.wakeup_semaphore.signal(this->device);
break;
}
// Unlock, wait for work to finish, lock again.
lock.unlock();
- for (const auto& frame_span : semaphore_span.frame_spans) {
+ for (const auto& frame_span : pending_signal.frame_spans) {
if (frame_span) {
frame_span->await_completed();
}
}
-
lock.lock();
+
using namespace std::chrono;
if (this->present_delay != 0us) {
const auto last_time = this->last_signal_time;
@@ -75,7 +75,7 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) {
}
lock.unlock();
- semaphore_span.wakeup_semaphore.signal(this->device);
+ pending_signal.wakeup_semaphore.signal(this->device);
}
}
@@ -94,12 +94,19 @@ void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore,
}
// Signal immediately if we have no outstanding work.
- if (this->pending_frame_spans.empty()) {
+ if (std::ranges::all_of(this->pending_frame_spans,
+ [](const auto& frame_span) {
+ if (!frame_span) {
+ return true;
+ }
+ return frame_span->has_completed();
+ })) {
wakeup_semaphore.signal(this->device);
+ this->pending_signals.clear();
return;
}
- this->semaphore_spans.emplace(SemaphoreSpans{
+ this->pending_signals.emplace_back(PendingSignal{
.wakeup_semaphore = wakeup_semaphore,
.frame_spans = std::move(this->pending_frame_spans),
});
diff --git a/src/strategies/low_latency2/swapchain_monitor.hh b/src/strategies/low_latency2/swapchain_monitor.hh
index 837f8e4..a5f8362 100644
--- a/src/strategies/low_latency2/swapchain_monitor.hh
+++ b/src/strategies/low_latency2/swapchain_monitor.hh
@@ -26,16 +26,13 @@ class SwapchainMonitor final {
void signal(const DeviceContext& device) const;
};
- // An empty vector here represents our 'no work' state.
std::vector<std::unique_ptr<FrameSpan>> pending_frame_spans{};
- // A pairing of semaphore -> submissions.
- // If the Submissions completes then signal the bundled semaphore.
- struct SemaphoreSpans {
+ struct PendingSignal {
WakeupSemaphore wakeup_semaphore{};
std::vector<std::unique_ptr<FrameSpan>> frame_spans{};
};
- std::optional<SemaphoreSpans> semaphore_spans{};
+ std::deque<PendingSignal> pending_signals{};
protected:
const DeviceContext& device;
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index afb12f7..d84169d 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -207,6 +207,39 @@ TimestampPool::Handle::await_time_impl(const std::uint32_t offset) const {
void TimestampPool::Handle::await_start() const { this->await_time_impl(0); }
void TimestampPool::Handle::await_end() const { this->await_time_impl(1); }
+std::optional<std::uint64_t>
+TimestampPool::Handle::has_time_impl(const std::uint32_t offset) const {
+
+ const auto& context = this->timestamp_pool.queue_context.device;
+ const auto& vtable = context.vtable;
+ const auto& query_pool = *this->query_chunk.query_pool;
+
+ auto query_result = std::array<std::uint64_t, 2>{};
+
+ const auto result = vtable.GetQueryPoolResults(
+ context.device, query_pool, this->query_index + offset, 1,
+ sizeof(query_result), &query_result, sizeof(query_result),
+ VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
+
+ if (result != VK_NOT_READY && result != VK_SUCCESS) {
+ throw result;
+ }
+
+ if (!query_result[1]) {
+ return std::nullopt;
+ }
+ return query_result[0];
+}
+
+// Checks if the time is available - doesn't block.
+bool TimestampPool::Handle::has_start() const {
+ return this->has_time_impl(0).has_value();
+}
+
+bool TimestampPool::Handle::has_end() const {
+ return this->has_time_impl(1).has_value();
+}
+
TimestampPool::~TimestampPool() {}
} // namespace low_latency \ No newline at end of file
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index 809c6a4..767455a 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -10,6 +10,7 @@
#include <deque>
#include <memory>
#include <mutex>
+#include <optional>
#include <thread>
#include <unordered_set>
#include <vector>
@@ -130,6 +131,15 @@ class TimestampPool final {
// Blocks until the time is available.
void await_start() const;
void await_end() const;
+
+ private:
+ std::optional<std::uint64_t>
+ has_time_impl(const std::uint32_t offset) const;
+
+ public:
+ // Checks if the time is available - doesn't block.
+ bool has_start() const;
+ bool has_end() const;
};
private: