aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNicolas James <nj3ahxac@gmail.com>2026-04-06 17:03:35 +1000
committerNicolas James <nj3ahxac@gmail.com>2026-04-06 17:03:35 +1000
commit312d8736ae0df55c9f33e4eb5c00e4cd77e1c33f (patch)
tree170816e372c43ddc6522e059f7d6fdc8757330f2 /src
parenta9a083ea5c649498d2f12e611dbc7c767d152130 (diff)
Add refactored VK_NV_low_latency2 impl, (fixes many threading issues)
Diffstat (limited to 'src')
-rw-r--r--src/layer.cc31
-rw-r--r--src/physical_device_context.hh3
-rw-r--r--src/strategies/low_latency2/device_strategy.cc83
-rw-r--r--src/strategies/low_latency2/device_strategy.hh11
-rw-r--r--src/strategies/low_latency2/queue_strategy.cc86
-rw-r--r--src/strategies/low_latency2/queue_strategy.hh22
-rw-r--r--src/strategies/low_latency2/swapchain_monitor.cc40
-rw-r--r--src/strategies/low_latency2/swapchain_monitor.hh10
8 files changed, 246 insertions, 40 deletions
diff --git a/src/layer.cc b/src/layer.cc
index 335ebf3..471d2fb 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -21,6 +21,8 @@
#include "layer_context.hh"
#include "queue_context.hh"
#include "strategies/anti_lag/device_strategy.hh"
+#include "strategies/low_latency2/device_strategy.hh"
+#include "strategies/low_latency2/queue_strategy.hh"
#include "timestamp_pool.hh"
namespace low_latency {
@@ -775,8 +777,10 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateSwapchainKHR(
return result;
}
- assert(pCreateInfo);
- context->strategy->notify_create_swapchain(*pSwapchain, *pCreateInfo);
+ if (context->was_capability_requested) {
+ assert(pCreateInfo);
+ context->strategy->notify_create_swapchain(*pSwapchain, *pCreateInfo);
+ }
return VK_SUCCESS;
}
@@ -788,7 +792,9 @@ DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain,
context->vtable.DestroySwapchainKHR(device, swapchain, pAllocator);
- context->strategy->notify_destroy_swapchain(swapchain);
+ if (context->was_capability_requested) {
+ context->strategy->notify_destroy_swapchain(swapchain);
+ }
}
static VKAPI_ATTR void VKAPI_CALL
@@ -809,6 +815,12 @@ VkResult LatencySleepNV(VkDevice device,
const auto context = layer_context.get_context(device);
assert(pSleepInfo);
+ // call device strategy notify semaphore, no problem :)
+ const auto strategy =
+ dynamic_cast<LowLatency2DeviceStrategy*>(context->strategy.get());
+ assert(strategy);
+ strategy->notify_latency_sleep_nv(swapchain, *pSleepInfo);
+
return VK_SUCCESS;
}
@@ -817,9 +829,12 @@ void QueueNotifyOutOfBandNV(
[[maybe_unused]] const VkOutOfBandQueueTypeInfoNV* pQueueTypeInfo) {
// Kind of interesting how you can't turn it back on once it's turned off.
- // Also I really have no idea why pQueueTypeInfo's VkOutOfBandQueueTypeNV
- // enum even exists (I guess we will find out later when nothing works).
const auto context = layer_context.get_context(queue);
+
+ const auto strategy =
+ dynamic_cast<LowLatency2QueueStrategy*>(context->strategy.get());
+ assert(strategy);
+ strategy->notify_out_of_band();
}
VkResult SetLatencySleepModeNV(
@@ -827,6 +842,12 @@ VkResult SetLatencySleepModeNV(
[[maybe_unused]] const VkLatencySleepModeInfoNV* pSleepModeInfo) {
const auto context = layer_context.get_context(device);
+ const auto strategy =
+ dynamic_cast<LowLatency2DeviceStrategy*>(context->strategy.get());
+ assert(strategy);
+
+ strategy->notify_latency_sleep_mode(swapchain, pSleepModeInfo);
+
return VK_SUCCESS;
}
diff --git a/src/physical_device_context.hh b/src/physical_device_context.hh
index d2e094e..1feed6a 100644
--- a/src/physical_device_context.hh
+++ b/src/physical_device_context.hh
@@ -18,8 +18,7 @@ class PhysicalDeviceContext final : public Context {
static constexpr auto required_extensions = {
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME,
- VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
- VK_KHR_PRESENT_ID_EXTENSION_NAME};
+ VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME};
public:
InstanceContext& instance;
diff --git a/src/strategies/low_latency2/device_strategy.cc b/src/strategies/low_latency2/device_strategy.cc
index 3a970a2..18ea21f 100644
--- a/src/strategies/low_latency2/device_strategy.cc
+++ b/src/strategies/low_latency2/device_strategy.cc
@@ -1,4 +1,6 @@
#include "device_strategy.hh"
+#include "device_context.hh"
+#include "queue_strategy.hh"
#include "helper.hh"
#include <mutex>
@@ -15,7 +17,10 @@ void LowLatency2DeviceStrategy::notify_create_swapchain(
// VK_NV_low_latency2 allows a swapchain to be created with the low latency
// mode already on via VkSwapchainLatencyCreateInfoNV.
- auto was_low_latency_requested = bool{false};
+ // Default to enabled - if the app is using VK_NV_low_latency2 at all it
+ // wants pacing. VkSwapchainLatencyCreateInfoNV can override this, but
+ // apps like CS2 recreate swapchains without it (apparent app bug).
+ auto was_low_latency_requested = bool{true};
if (const auto slci = find_next<VkSwapchainLatencyCreateInfoNV>(
&info, VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV);
slci) {
@@ -38,4 +43,80 @@ void LowLatency2DeviceStrategy::notify_destroy_swapchain(
this->swapchain_monitors.erase(swapchain);
}
+void LowLatency2DeviceStrategy::notify_latency_sleep_mode(
+ const VkSwapchainKHR& swapchain,
+ const VkLatencySleepModeInfoNV* const info) {
+
+ const auto lock = std::shared_lock{this->mutex};
+
+ const auto iter = this->swapchain_monitors.find(swapchain);
+ assert(iter != std::end(this->swapchain_monitors));
+
+ using namespace std::chrono;
+ if (info) {
+ iter->second.update_params(info->lowLatencyMode,
+ microseconds{info->minimumIntervalUs});
+ } else {
+ iter->second.update_params(false, 0us);
+ }
+}
+
+void LowLatency2DeviceStrategy::submit_swapchain_present_id(
+ const VkSwapchainKHR& swapchain, const std::uint64_t& present_id) {
+
+ // Iterate through all queues and grab any work that's associated with this
+ // present_id. Turn it into a vector of work that we give to our swapchain
+ // monitor.
+ auto work = [&]() -> std::vector<std::deque<std::unique_ptr<Submission>>> {
+ auto work = std::vector<std::deque<std::unique_ptr<Submission>>>{};
+ const auto lock = std::scoped_lock{this->device.mutex};
+ for (const auto& queue_iter : this->device.queues) {
+ const auto& queue = queue_iter.second;
+
+ const auto strategy =
+ dynamic_cast<LowLatency2QueueStrategy*>(queue->strategy.get());
+ assert(strategy);
+
+ if (strategy->is_out_of_band.load(std::memory_order::relaxed)) {
+ continue;
+ }
+
+ // Need the lock now - we're modifying it.
+ const auto strategy_lock = std::unique_lock{strategy->mutex};
+ const auto iter = strategy->present_id_submissions.find(present_id);
+ if (iter == std::end(strategy->present_id_submissions)) {
+ continue;
+ }
+
+ // Make sure we clean it up from the present as well.
+ work.push_back(std::move(iter->second));
+ strategy->present_id_submissions.erase(iter);
+ }
+ return work;
+ }();
+
+ const auto lock = std::scoped_lock{this->mutex};
+
+ // Fail hard here, the swapchain must exist or something has gone wrong with
+ // Vulkan bookkeeping.
+ const auto iter = this->swapchain_monitors.find(swapchain);
+ assert(iter != std::end(this->swapchain_monitors));
+
+ // Notify our monitor that this work has to be completed before they signal
+ // whatever semaphore is currently sitting in it.
+ iter->second.attach_work(std::move(work));
+}
+
+void LowLatency2DeviceStrategy::notify_latency_sleep_nv(
+ const VkSwapchainKHR& swapchain, const VkLatencySleepInfoNV& info) {
+
+ const auto lock = std::scoped_lock{this->mutex};
+
+ // Again, fail hard here - something has gone terribly wrong.
+ const auto iter = this->swapchain_monitors.find(swapchain);
+ assert(iter != std::end(this->swapchain_monitors));
+
+ iter->second.notify_semaphore(info.signalSemaphore, info.value);
+}
+
} // namespace low_latency
diff --git a/src/strategies/low_latency2/device_strategy.hh b/src/strategies/low_latency2/device_strategy.hh
index af1b471..499b2aa 100644
--- a/src/strategies/low_latency2/device_strategy.hh
+++ b/src/strategies/low_latency2/device_strategy.hh
@@ -6,6 +6,7 @@
#include <shared_mutex>
#include <unordered_map>
+#include <vulkan/vulkan_core.h>
namespace low_latency {
@@ -27,6 +28,16 @@ class LowLatency2DeviceStrategy final : public DeviceStrategy {
const VkSwapchainCreateInfoKHR& info) override;
virtual void
notify_destroy_swapchain(const VkSwapchainKHR& swapchain) override;
+
+ public:
+ void submit_swapchain_present_id(const VkSwapchainKHR& swapchain,
+ const std::uint64_t& present_id);
+
+ void notify_latency_sleep_mode(const VkSwapchainKHR& swapchain,
+ const VkLatencySleepModeInfoNV* const info);
+
+ void notify_latency_sleep_nv(const VkSwapchainKHR& swapchain,
+ const VkLatencySleepInfoNV& info);
};
} // namespace low_latency
diff --git a/src/strategies/low_latency2/queue_strategy.cc b/src/strategies/low_latency2/queue_strategy.cc
index 855ff5d..9a68b78 100644
--- a/src/strategies/low_latency2/queue_strategy.cc
+++ b/src/strategies/low_latency2/queue_strategy.cc
@@ -1,8 +1,10 @@
#include "queue_strategy.hh"
+#include "device_context.hh"
+#include "device_strategy.hh"
#include "helper.hh"
+#include "queue_context.hh"
-#include <ranges>
-#include <span>
+#include <vulkan/vulkan_core.h>
namespace low_latency {
@@ -11,34 +13,80 @@ LowLatency2QueueStrategy::LowLatency2QueueStrategy(QueueContext& queue)
LowLatency2QueueStrategy::~LowLatency2QueueStrategy() {}
+template <typename T>
+static void notify_submit_impl(LowLatency2QueueStrategy& strategy,
+ const T& submit,
+ std::unique_ptr<Submission> submission) {
+
+ // It's actually not a requirement that we have this present id.
+ const auto lspi = find_next<VkLatencySubmissionPresentIdNV>(
+ &submit, VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV);
+ const auto present_id = lspi ? lspi->presentID : 0;
+
+ const auto lock = std::scoped_lock{strategy.mutex};
+ const auto [iter, inserted] =
+ strategy.present_id_submissions.try_emplace(present_id);
+ iter->second.push_back(std::move(submission));
+
+ // Remove stale submissions if we're presenting a lot to the same
+ // present_id. This doesn't affect anything because we're waiting on the
+ // last. It begs the question: should we should just store the last only?
+ if (std::size(iter->second) >=
+ LowLatency2QueueStrategy::MAX_TRACKED_OBJECTS) {
+
+ iter->second.pop_front();
+ }
+
+ // Add our present_id to our ring tracking if it's non-zero.
+ if (inserted && present_id) {
+ strategy.present_id_ring.push_back(present_id);
+ }
+
+ // Remove stale present_id's if they weren't presented to.
+ if (std::size(strategy.present_id_ring) >
+ LowLatency2QueueStrategy::MAX_TRACKED_OBJECTS) {
+
+ const auto to_remove = strategy.present_id_ring.front();
+ strategy.present_id_ring.pop_front();
+ strategy.present_id_submissions.erase(to_remove);
+ }
+}
+
void LowLatency2QueueStrategy::notify_submit(
- [[maybe_unused]] const VkSubmitInfo& submit,
- [[maybe_unused]] std::unique_ptr<Submission> submission) {}
+ const VkSubmitInfo& submit, std::unique_ptr<Submission> submission) {
+
+ notify_submit_impl(*this, submit, std::move(submission));
+}
void LowLatency2QueueStrategy::notify_submit(
- [[maybe_unused]] const VkSubmitInfo2& submit,
- [[maybe_unused]] std::unique_ptr<Submission> submission) {}
+ const VkSubmitInfo2& submit, std::unique_ptr<Submission> submission) {
+
+ notify_submit_impl(*this, submit, std::move(submission));
+}
void LowLatency2QueueStrategy::notify_present(const VkPresentInfoKHR& present) {
const auto pid =
find_next<VkPresentIdKHR>(&present, VK_STRUCTURE_TYPE_PRESENT_ID_KHR);
- // All submissions should be tagged with a present_id. If it isn't, I'm not
- // going to fail hard here - we will just ignore it.
- if (!pid) {
- return;
- }
-
- const auto swapchains =
- std::span{present.pSwapchains, present.swapchainCount};
- const auto present_ids =
- std::span{pid->pPresentIds, present.swapchainCount};
- for (const auto& [swapchain, present_id] :
- std::views::zip(swapchains, present_ids)) {
+ const auto device_strategy = dynamic_cast<LowLatency2DeviceStrategy*>(
+ this->queue.device.strategy.get());
+ assert(device_strategy);
- // TODO
+ for (auto i = std::uint32_t{0}; i < present.swapchainCount; ++i) {
+ const auto& swapchain = present.pSwapchains[i];
+ const auto present_id = [&]() -> std::uint64_t {
+ if (pid && pid->pPresentIds) {
+ return pid->pPresentIds[i];
+ }
+ return 0;
+ }();
+ device_strategy->submit_swapchain_present_id(swapchain, present_id);
}
}
+void LowLatency2QueueStrategy::notify_out_of_band() {
+ this->is_out_of_band.store(true, std::memory_order_relaxed);
+}
+
} // namespace low_latency
diff --git a/src/strategies/low_latency2/queue_strategy.hh b/src/strategies/low_latency2/queue_strategy.hh
index 223f559..a090e1b 100644
--- a/src/strategies/low_latency2/queue_strategy.hh
+++ b/src/strategies/low_latency2/queue_strategy.hh
@@ -2,6 +2,13 @@
#define STRATEGIES_LOW_LATENCY2_QUEUE_STRATEGY_HH_
#include "strategies/queue_strategy.hh"
+#include "submission.hh"
+
+#include <atomic>
+#include <deque>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
namespace low_latency {
@@ -9,6 +16,18 @@ class QueueContext;
class LowLatency2QueueStrategy final : public QueueStrategy {
public:
+ static constexpr auto MAX_TRACKED_OBJECTS = 50;
+
+ // Mapping of present_id's to submissions. Grabbed later by the device
+ // strategy when we present and actually can associate them to some
+ // vkSwapchainKHR.
+ std::mutex mutex{};
+ std::unordered_map<std::uint64_t, std::deque<std::unique_ptr<Submission>>>
+ present_id_submissions{};
+ std::deque<std::uint64_t> present_id_ring{};
+ std::atomic<bool> is_out_of_band{}; // atomic so we don't need a lock check
+
+ public:
LowLatency2QueueStrategy(QueueContext& queue);
virtual ~LowLatency2QueueStrategy();
@@ -18,6 +37,9 @@ class LowLatency2QueueStrategy final : public QueueStrategy {
virtual void notify_submit(const VkSubmitInfo2& submit,
std::unique_ptr<Submission> submission) override;
virtual void notify_present(const VkPresentInfoKHR& present) override;
+
+ public:
+ void notify_out_of_band();
};
} // namespace low_latency
diff --git a/src/strategies/low_latency2/swapchain_monitor.cc b/src/strategies/low_latency2/swapchain_monitor.cc
index 3c9b5e7..4c19251 100644
--- a/src/strategies/low_latency2/swapchain_monitor.cc
+++ b/src/strategies/low_latency2/swapchain_monitor.cc
@@ -2,8 +2,16 @@
#include "device_context.hh"
#include "helper.hh"
+#include <functional>
+
namespace low_latency {
+SwapchainMonitor::SwapchainMonitor(const DeviceContext& device)
+ : device(device),
+ monitor_worker(std::bind_front(&SwapchainMonitor::do_monitor, this)) {}
+
+SwapchainMonitor::~SwapchainMonitor() {}
+
void SwapchainMonitor::WakeupSemaphore::signal(
const DeviceContext& device) const {
@@ -47,14 +55,28 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) {
break;
}
- // Unlock, wait for work to finish, signal semaphore.
+ // Unlock, wait for work to finish, lock again.
lock.unlock();
- // Ugly and duplicated - will fix this soon.
- if (!semaphore_submission.submissions->empty()) {
- semaphore_submission.submissions->back().end->await_time();
+ for (const auto& submission : semaphore_submission.submissions) {
+ if (!submission.empty()) {
+ submission.back()->end->await_time();
+ }
}
- // TODO add wait for frame pacing
+ lock.lock();
+ using namespace std::chrono;
+ if (this->present_delay != 0us) {
+ const auto last_time = this->last_signal_time;
+ const auto delay = this->present_delay;
+ if (last_time.has_value()) {
+ lock.unlock();
+ std::this_thread::sleep_until(*last_time + delay);
+ lock.lock();
+ }
+ this->last_signal_time.emplace(steady_clock::now());
+ }
+ lock.unlock();
+
semaphore_submission.wakeup_semaphore.signal(this->device);
}
}
@@ -74,8 +96,7 @@ void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore,
}
// Signal immediately if we have no outstanding work.
- if (!this->pending_submissions) {
- this->pending_submissions.reset();
+ if (this->pending_submissions.empty()) {
wakeup_semaphore.signal(this->device);
return;
}
@@ -84,20 +105,19 @@ void SwapchainMonitor::notify_semaphore(const VkSemaphore& timeline_semaphore,
.wakeup_semaphore = wakeup_semaphore,
.submissions = std::move(this->pending_submissions),
});
- this->pending_submissions.reset();
+ this->pending_submissions.clear();
lock.unlock();
this->cv.notify_one();
}
void SwapchainMonitor::attach_work(
- std::unique_ptr<std::deque<Submission>> submissions) {
+ std::vector<std::deque<std::unique_ptr<Submission>>> submissions) {
const auto lock = std::scoped_lock{this->mutex};
if (!this->was_low_latency_requested) {
return;
}
-
this->pending_submissions = std::move(submissions);
}
diff --git a/src/strategies/low_latency2/swapchain_monitor.hh b/src/strategies/low_latency2/swapchain_monitor.hh
index 9031bbb..47c3a75 100644
--- a/src/strategies/low_latency2/swapchain_monitor.hh
+++ b/src/strategies/low_latency2/swapchain_monitor.hh
@@ -27,13 +27,14 @@ class SwapchainMonitor final {
void signal(const DeviceContext& device) const;
};
- std::unique_ptr<std::deque<Submission>> pending_submissions{};
+ // An empty vector here represents our 'no work' state.
+ std::vector<std::deque<std::unique_ptr<Submission>>> pending_submissions{};
// A pairing of semaphore -> submissions.
// If the Submissions completes then signal the bundled semaphore.
struct SemaphoreSubmissions {
WakeupSemaphore wakeup_semaphore{};
- std::unique_ptr<std::deque<Submission>> submissions{};
+ std::vector<std::deque<std::unique_ptr<Submission>>> submissions{};
};
std::optional<SemaphoreSubmissions> semaphore_submission{};
@@ -47,6 +48,8 @@ class SwapchainMonitor final {
std::condition_variable_any cv{};
std::jthread monitor_worker{};
+ std::optional<std::chrono::steady_clock::time_point> last_signal_time;
+
void do_monitor(const std::stop_token stoken);
public:
@@ -64,7 +67,8 @@ class SwapchainMonitor final {
void notify_semaphore(const VkSemaphore& timeline_semaphore,
const std::uint64_t& value);
- void attach_work(std::unique_ptr<std::deque<Submission>> submissions);
+ void attach_work(
+ std::vector<std::deque<std::unique_ptr<Submission>>> submissions);
};
} // namespace low_latency