aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas James <Eele1Ephe7uZahRie@tutanota.com>2026-03-30 22:47:12 +1100
committerNicolas James <Eele1Ephe7uZahRie@tutanota.com>2026-03-30 22:47:12 +1100
commit108801fe96d855c5ccf532639a6db8ff0065310e (patch)
tree24f551fbffad9ec4fd842f56dd530d65a1150723
parent7b17b60786d00c592f0ef18c8481148143baacbd (diff)
Move timestamp pool reacquisition to an asynchronous worker queue, fix device_context race during destructor
-rw-r--r--src/device_context.hh4
-rw-r--r--src/queue_context.cc15
-rw-r--r--src/queue_context.hh8
-rw-r--r--src/swapchain_monitor.cc4
-rw-r--r--src/timestamp_pool.cc93
-rw-r--r--src/timestamp_pool.hh71
6 files changed, 125 insertions, 70 deletions
diff --git a/src/device_context.hh b/src/device_context.hh
index 0e0a4eb..53970e5 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -31,10 +31,10 @@ class DeviceContext final : public Context {
const VkDevice device;
const VkuDeviceDispatchTable vtable;
- std::unordered_map<VkQueue, std::shared_ptr<QueueContext>> queues;
-
std::unique_ptr<DeviceClock> clock;
+ std::unordered_map<VkQueue, std::shared_ptr<QueueContext>> queues;
+
std::unordered_map<VkSwapchainKHR, SwapchainMonitor> swapchain_monitors;
public:
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 437a183..0356c5c 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -59,6 +59,10 @@ void QueueContext::notify_submit(
if (submissions == nullptr) {
submissions =
std::make_shared<std::deque<std::unique_ptr<Submission>>>();
+
+ if (present_id) {
+ this->present_id_ring.emplace_back(present_id);
+ }
}
submissions->push_back(
@@ -66,12 +70,17 @@ void QueueContext::notify_submit(
.tail_handle = tail_handle,
.cpu_present_time = now}));
- // This is probably hit if our queue never actually presents to anything,
- // because the only time we manually evict our unpresent_submissions is
- // when we present to something.
+ // This is probably hit if our queue never actually presents to anything.
if (std::size(*submissions) > this->MAX_TRACKED_SUBMISSIONS) {
submissions->pop_front();
}
+
+ if (std::size(this->present_id_ring) > MAX_TRACKED_PRESENT_IDS) {
+ const auto evicted_present_id = this->present_id_ring.front();
+ this->present_id_ring.pop_front();
+
+ this->unpresented_submissions.erase(evicted_present_id);
+ }
}
void QueueContext::notify_present(const VkSwapchainKHR& swapchain,
diff --git a/src/queue_context.hh b/src/queue_context.hh
index a52e718..873a85a 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -20,6 +20,7 @@ class QueueContext final : public Context {
// we give up tracking them. This is neccessary for queues which do not
// present anything.
static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u;
+ static constexpr auto MAX_TRACKED_PRESENT_IDS = 50u;
public:
DeviceContext& device;
@@ -79,6 +80,13 @@ class QueueContext final : public Context {
using present_id_t = std::uint64_t;
std::unordered_map<present_id_t, submissions_t> unpresented_submissions;
+ // We might be tracking present_ids which aren't presented to - and as a
+ // result we don't ever clear those Submissions. So manually evict them by
+ // removing the n'th oldest. This is elegant because even if our
+ // SwapchainMonitor has these stored (unlikely) they won't be destructed as
+ // it just decrements their std::shared_ptr use count.
+ std::deque<present_id_t> present_id_ring;
+
public:
QueueContext(DeviceContext& device_context, const VkQueue& queue,
const std::uint32_t& queue_family_index);
diff --git a/src/swapchain_monitor.cc b/src/swapchain_monitor.cc
index adeb315..bcf89e1 100644
--- a/src/swapchain_monitor.cc
+++ b/src/swapchain_monitor.cc
@@ -95,6 +95,10 @@ void SwapchainMonitor::notify_present(
const auto lock = std::scoped_lock{this->mutex};
+ if (!this->was_low_latency_requested) {
+ return;
+ }
+
// Fast path where this work has already completed.
if (!this->wakeup_semaphores.empty() && !submissions->empty()) {
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index 4bb236b..19a9560 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -3,6 +3,7 @@
#include "helper.hh"
#include "queue_context.hh"
+#include <functional>
#include <mutex>
#include <ranges>
#include <span>
@@ -36,8 +37,8 @@ TimestampPool::QueryChunk::QueryChunk(const QueueContext& queue_context)
command_buffers(std::make_unique<CommandBuffersOwner>(queue_context)) {
this->free_indices = []() {
- constexpr auto KEYS = std::views::iota(0u, QueryChunk::CHUNK_SIZE);
- return std::make_unique<free_indices_t>(std::from_range, KEYS);
+ constexpr auto keys = std::views::iota(0u, QueryChunk::CHUNK_SIZE);
+ return std::unordered_set<std::uint64_t>(std::from_range, keys);
}();
}
@@ -76,56 +77,80 @@ VkCommandBuffer TimestampPool::QueryChunk::CommandBuffersOwner::operator[](
TimestampPool::QueryChunk::~QueryChunk() {}
TimestampPool::TimestampPool(QueueContext& queue_context)
- : queue_context(queue_context) {
-
- // Allocate one block on construction, it's likely more than enough.
- auto query_chunk = std::make_shared<QueryChunk>(this->queue_context);
- this->query_chunks.emplace(std::move(query_chunk));
-}
+ : queue_context(queue_context),
+ reaper_worker(std::bind_front(&TimestampPool::do_reaper, this)) {}
std::shared_ptr<TimestampPool::Handle> TimestampPool::acquire() {
const auto lock = std::scoped_lock{this->mutex};
// Gets the empty one, or inserts a new one and returns it.
- const auto not_empty_iter = [this]() -> auto {
+ auto& query_chunk = [this]() -> auto& {
const auto not_empty_iter =
std::ranges::find_if(this->query_chunks, [](const auto& qc) {
assert(qc);
- return std::size(*qc->free_indices);
+ return std::size(qc->free_indices);
});
if (not_empty_iter != std::end(this->query_chunks)) {
- return not_empty_iter;
+ return **not_empty_iter;
}
- const auto insert = std::make_shared<QueryChunk>(this->queue_context);
- const auto [iter, did_insert] = this->query_chunks.emplace(insert);
+ const auto [iter, did_insert] = this->query_chunks.emplace(
+ std::make_unique<QueryChunk>(this->queue_context));
assert(did_insert);
- return iter;
+ return **iter;
}();
- // Grab any element from our set and erase it immediately after.
- auto& indices = *(*not_empty_iter)->free_indices;
- const auto query_index = *std::begin(indices);
- indices.erase(query_index);
+ // Pull any element from our set to use as our query_index here.
+ const auto query_index = *std::begin(query_chunk.free_indices);
+ query_chunk.free_indices.erase(query_index);
+
+ // Custom deleter function that puts the handle on our async deleter queue.
+ const auto reaper_deleter = [this](Handle* const handle) {
+ if (!handle) {
+ return;
+ }
+
+ const auto lock = std::scoped_lock{this->mutex};
+ this->expiring_handles.push_back(handle);
+ this->cv.notify_one();
+ };
- return std::make_shared<Handle>(*this, *not_empty_iter, query_index);
+ return std::shared_ptr<Handle>(new Handle(*this, query_chunk, query_index),
+ reaper_deleter);
}
TimestampPool::Handle::Handle(TimestampPool& timestamp_pool,
- const std::shared_ptr<QueryChunk>& origin_chunk,
+ QueryChunk& query_chunk,
const std::uint64_t& query_index)
- : timestamp_pool(timestamp_pool), origin_chunk(origin_chunk),
- query_pool(*origin_chunk->query_pool), query_index(query_index),
- command_buffer((*origin_chunk->command_buffers)[query_index]) {}
+ : timestamp_pool(timestamp_pool), query_chunk(query_chunk),
+ query_pool(*query_chunk.query_pool), query_index(query_index),
+ command_buffer((*query_chunk.command_buffers)[query_index]) {}
-TimestampPool::Handle::~Handle() {
- const auto lock = std::scoped_lock{this->timestamp_pool.mutex};
+TimestampPool::Handle::~Handle() {}
- // Parent destructing shouldn't mean we should have a bunch of
- // insertions for zero reason.
- if (const auto ptr = this->origin_chunk.lock(); ptr) {
- ptr->free_indices->insert(this->query_index);
+void TimestampPool::do_reaper(const std::stop_token stoken) {
+ for (;;) {
+ auto lock = std::unique_lock{this->mutex};
+ this->cv.wait(lock, stoken,
+ [&]() { return !this->expiring_handles.empty(); });
+
+ // Keep going and free everything before destructing.
+ if (stoken.stop_requested() && this->expiring_handles.empty()) {
+ break;
+ }
+
+ const auto handle_ptr = this->expiring_handles.front();
+ this->expiring_handles.pop_front();
+
+ // Allow more to go on the queue while we wait for it to finish.
+ lock.unlock();
+ handle_ptr->await_time();
+
+ // Lock our mutex, allow the queue to use it again and delete it.
+ lock.lock();
+ handle_ptr->query_chunk.free_indices.insert(handle_ptr->query_index);
+ delete handle_ptr;
}
}
@@ -175,7 +200,7 @@ std::optional<DeviceClock::time_point_t> TimestampPool::Handle::get_time() {
auto query_result = QueryResult{};
const auto result = vtable.GetQueryPoolResults(
- context.device, query_pool,
+ context.device, this->query_pool,
static_cast<std::uint32_t>(this->query_index), 1, sizeof(query_result),
&query_result, sizeof(query_result),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
@@ -202,7 +227,7 @@ DeviceClock::time_point_t TimestampPool::Handle::await_time() {
auto query_result = QueryResult{};
THROW_NOT_VKSUCCESS(vtable.GetQueryPoolResults(
- context.device, query_pool,
+ context.device, this->query_pool,
static_cast<std::uint32_t>(this->query_index), 1, sizeof(query_result),
&query_result, sizeof(query_result),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
@@ -212,12 +237,6 @@ DeviceClock::time_point_t TimestampPool::Handle::await_time() {
return context.clock->ticks_to_time(query_result.value);
}
-DeviceClock::time_point_t TimestampPool::Handle::get_time_required() {
- const auto time = this->get_time();
- assert(time.has_value());
- return *time;
-}
-
TimestampPool::~TimestampPool() {}
} // namespace low_latency \ No newline at end of file
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index d8ee359..bf3335d 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -2,20 +2,15 @@
#define TIMESTAMP_POOL_HH_
// The purpose of this file is to provide the definition of a 'timestamp pool'.
-// It manages blocks of timestamp query pools, hands them out when requested,
-// and allocates more when (if) we run out. It _should_ be thread safe.
-// Usage:
-// 1. Get handle with .acquire().
-// 2. Write start/end timestamp operations with the handle's pool and index
-// into the provided command buffer. Will return nullopt if they're
-// not yet available.
-// 3. Destruct the handle to return the key to the pool.
#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vulkan.hpp>
+#include <condition_variable>
+#include <deque>
#include <memory>
#include <mutex>
+#include <thread>
#include <unordered_set>
#include <vector>
@@ -26,19 +21,32 @@ namespace low_latency {
class QueueContext;
class DeviceContext;
+// A timestamp pool manages blocks of timestamp query pools, hands them out when
+// requested, and allocates more when (if) we run out. It _should_ be thread
+// safe.
+// Usage:
+// 1. Get handle with .acquire().
+// 2. Write start/end timestamp operations with the handle's pool and index
+// into the provided command buffer.
+// 3. Grab the time, or wait until it's ready, using get_time or await_time
+// respectively.
+// 4. Destruct the handle to return the key to the pool. The pool handles,
+// via an async reaper thread, when the actual handle's contents can be
+// reused as they must be alive until vulkan is done with them.
class TimestampPool final {
private:
QueueContext& queue_context;
- std::mutex mutex;
// A chunk of data which is useful for making timestamp queries.
// Allows association of an index to a query pool and command buffer.
// We reuse these when they're released.
- struct QueryChunk final {
+ class QueryChunk final {
+ friend class TimestampPool;
+
private:
static constexpr auto CHUNK_SIZE = 512u;
- public:
+ private:
struct QueryPoolOwner final {
private:
const QueueContext& queue_context;
@@ -55,10 +63,6 @@ class TimestampPool final {
public:
operator const VkQueryPool&() const { return this->query_pool; }
};
- std::unique_ptr<QueryPoolOwner> query_pool;
-
- using free_indices_t = std::unordered_set<std::uint64_t>;
- std::unique_ptr<free_indices_t> free_indices;
struct CommandBuffersOwner final {
private:
@@ -76,7 +80,11 @@ class TimestampPool final {
public:
VkCommandBuffer operator[](const std::size_t& i);
};
+
+ std::unique_ptr<QueryPoolOwner> query_pool;
std::unique_ptr<CommandBuffersOwner> command_buffers;
+ // A set of indices which are currently availabe in this chunk.
+ std::unordered_set<std::uint64_t> free_indices;
public:
QueryChunk(const QueueContext& queue_context);
@@ -86,19 +94,19 @@ class TimestampPool final {
QueryChunk operator=(QueryChunk&&) = delete;
~QueryChunk();
};
- std::unordered_set<std::shared_ptr<QueryChunk>> query_chunks;
public:
- // A handle represents a VkCommandBuffer and a query index.
- // Once the Handle goes out of scope, the query index will be returned
- // to the parent pool.
+ // A handle represents a VkCommandBuffer and a query index. It can be used
+ // to attach timing information to submissions. Once the Handle destructs
+ // the query index will be returned to the parent pool - but crucially only
+ // when Vulkan is done with it.
struct Handle final {
private:
friend class TimestampPool;
private:
TimestampPool& timestamp_pool;
- const std::weak_ptr<QueryChunk> origin_chunk;
+ QueryChunk& query_chunk;
public:
const VkQueryPool query_pool;
@@ -106,13 +114,12 @@ class TimestampPool final {
const VkCommandBuffer command_buffer;
public:
- Handle(TimestampPool& timestamp_pool,
- const std::shared_ptr<QueryChunk>& origin_chunk,
+ Handle(TimestampPool& timestamp_pool, QueryChunk& query_chunk,
const std::uint64_t& query_index);
Handle(const Handle& handle) = delete;
- Handle(Handle&&) = delete;
Handle operator=(const Handle& handle) = delete;
- Handle operator=(Handle&&) = delete;
+ Handle(Handle&&) = delete;
+ Handle& operator=(Handle&&) = delete;
~Handle();
public:
@@ -125,11 +132,20 @@ class TimestampPool final {
// Waits until the time is available and returns it.
DeviceClock::time_point_t await_time();
-
- // Calls get_time with the assumption it's already available.
- DeviceClock::time_point_t get_time_required();
};
+ private:
+ void do_reaper(const std::stop_token stoken);
+
+ private:
+ std::deque<Handle*> expiring_handles;
+ std::unordered_set<std::unique_ptr<QueryChunk>> query_chunks;
+
+ std::mutex mutex;
+ std::condition_variable_any cv;
+
+ std::jthread reaper_worker;
+
public:
TimestampPool(QueueContext& queue_context);
TimestampPool(const TimestampPool&) = delete;
@@ -139,7 +155,6 @@ class TimestampPool final {
~TimestampPool();
public:
- // Hands out a Handle!
std::shared_ptr<Handle> acquire();
};