diff options
| -rw-r--r-- | src/device_context.hh | 4 | ||||
| -rw-r--r-- | src/queue_context.cc | 15 | ||||
| -rw-r--r-- | src/queue_context.hh | 8 | ||||
| -rw-r--r-- | src/swapchain_monitor.cc | 4 | ||||
| -rw-r--r-- | src/timestamp_pool.cc | 93 | ||||
| -rw-r--r-- | src/timestamp_pool.hh | 71 |
6 files changed, 125 insertions, 70 deletions
diff --git a/src/device_context.hh b/src/device_context.hh index 0e0a4eb..53970e5 100644 --- a/src/device_context.hh +++ b/src/device_context.hh @@ -31,10 +31,10 @@ class DeviceContext final : public Context { const VkDevice device; const VkuDeviceDispatchTable vtable; - std::unordered_map<VkQueue, std::shared_ptr<QueueContext>> queues; - std::unique_ptr<DeviceClock> clock; + std::unordered_map<VkQueue, std::shared_ptr<QueueContext>> queues; + std::unordered_map<VkSwapchainKHR, SwapchainMonitor> swapchain_monitors; public: diff --git a/src/queue_context.cc b/src/queue_context.cc index 437a183..0356c5c 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -59,6 +59,10 @@ void QueueContext::notify_submit( if (submissions == nullptr) { submissions = std::make_shared<std::deque<std::unique_ptr<Submission>>>(); + + if (present_id) { + this->present_id_ring.emplace_back(present_id); + } } submissions->push_back( @@ -66,12 +70,17 @@ void QueueContext::notify_submit( .tail_handle = tail_handle, .cpu_present_time = now})); - // This is probably hit if our queue never actually presents to anything, - // because the only time we manually evict our unpresent_submissions is - // when we present to something. + // This is probably hit if our queue never actually presents to anything. if (std::size(*submissions) > this->MAX_TRACKED_SUBMISSIONS) { submissions->pop_front(); } + + if (std::size(this->present_id_ring) > MAX_TRACKED_PRESENT_IDS) { + const auto evicted_present_id = this->present_id_ring.front(); + this->present_id_ring.pop_front(); + + this->unpresented_submissions.erase(evicted_present_id); + } } void QueueContext::notify_present(const VkSwapchainKHR& swapchain, diff --git a/src/queue_context.hh b/src/queue_context.hh index a52e718..873a85a 100644 --- a/src/queue_context.hh +++ b/src/queue_context.hh @@ -20,6 +20,7 @@ class QueueContext final : public Context { // we give up tracking them. This is neccessary for queues which do not // present anything. static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u; + static constexpr auto MAX_TRACKED_PRESENT_IDS = 50u; public: DeviceContext& device; @@ -79,6 +80,13 @@ class QueueContext final : public Context { using present_id_t = std::uint64_t; std::unordered_map<present_id_t, submissions_t> unpresented_submissions; + // We might be tracking present_ids which aren't presented to - and as a + // result we don't ever clear those Submissions. So manually evict them by + // removing the n'th oldest. This is elegant because even if our + // SwapchainMonitor has these stored (unlikely) they won't be destructed as + // it just decrements their std::shared_ptr use count. + std::deque<present_id_t> present_id_ring; + public: QueueContext(DeviceContext& device_context, const VkQueue& queue, const std::uint32_t& queue_family_index); diff --git a/src/swapchain_monitor.cc b/src/swapchain_monitor.cc index adeb315..bcf89e1 100644 --- a/src/swapchain_monitor.cc +++ b/src/swapchain_monitor.cc @@ -95,6 +95,10 @@ void SwapchainMonitor::notify_present( const auto lock = std::scoped_lock{this->mutex}; + if (!this->was_low_latency_requested) { + return; + } + // Fast path where this work has already completed. if (!this->wakeup_semaphores.empty() && !submissions->empty()) { diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc index 4bb236b..19a9560 100644 --- a/src/timestamp_pool.cc +++ b/src/timestamp_pool.cc @@ -3,6 +3,7 @@ #include "helper.hh" #include "queue_context.hh" +#include <functional> #include <mutex> #include <ranges> #include <span> @@ -36,8 +37,8 @@ TimestampPool::QueryChunk::QueryChunk(const QueueContext& queue_context) command_buffers(std::make_unique<CommandBuffersOwner>(queue_context)) { this->free_indices = []() { - constexpr auto KEYS = std::views::iota(0u, QueryChunk::CHUNK_SIZE); - return std::make_unique<free_indices_t>(std::from_range, KEYS); + constexpr auto keys = std::views::iota(0u, QueryChunk::CHUNK_SIZE); + return std::unordered_set<std::uint64_t>(std::from_range, keys); }(); } @@ -76,56 +77,80 @@ VkCommandBuffer TimestampPool::QueryChunk::CommandBuffersOwner::operator[]( TimestampPool::QueryChunk::~QueryChunk() {} TimestampPool::TimestampPool(QueueContext& queue_context) - : queue_context(queue_context) { - - // Allocate one block on construction, it's likely more than enough. - auto query_chunk = std::make_shared<QueryChunk>(this->queue_context); - this->query_chunks.emplace(std::move(query_chunk)); -} + : queue_context(queue_context), + reaper_worker(std::bind_front(&TimestampPool::do_reaper, this)) {} std::shared_ptr<TimestampPool::Handle> TimestampPool::acquire() { const auto lock = std::scoped_lock{this->mutex}; // Gets the empty one, or inserts a new one and returns it. - const auto not_empty_iter = [this]() -> auto { + auto& query_chunk = [this]() -> auto& { const auto not_empty_iter = std::ranges::find_if(this->query_chunks, [](const auto& qc) { assert(qc); - return std::size(*qc->free_indices); + return std::size(qc->free_indices); }); if (not_empty_iter != std::end(this->query_chunks)) { - return not_empty_iter; + return **not_empty_iter; } - const auto insert = std::make_shared<QueryChunk>(this->queue_context); - const auto [iter, did_insert] = this->query_chunks.emplace(insert); + const auto [iter, did_insert] = this->query_chunks.emplace( + std::make_unique<QueryChunk>(this->queue_context)); assert(did_insert); - return iter; + return **iter; }(); - // Grab any element from our set and erase it immediately after. - auto& indices = *(*not_empty_iter)->free_indices; - const auto query_index = *std::begin(indices); - indices.erase(query_index); + // Pull any element from our set to use as our query_index here. + const auto query_index = *std::begin(query_chunk.free_indices); + query_chunk.free_indices.erase(query_index); + + // Custom deleter function that puts the handle on our async deleter queue. + const auto reaper_deleter = [this](Handle* const handle) { + if (!handle) { + return; + } + + const auto lock = std::scoped_lock{this->mutex}; + this->expiring_handles.push_back(handle); + this->cv.notify_one(); + }; - return std::make_shared<Handle>(*this, *not_empty_iter, query_index); + return std::shared_ptr<Handle>(new Handle(*this, query_chunk, query_index), + reaper_deleter); } TimestampPool::Handle::Handle(TimestampPool& timestamp_pool, - const std::shared_ptr<QueryChunk>& origin_chunk, + QueryChunk& query_chunk, const std::uint64_t& query_index) - : timestamp_pool(timestamp_pool), origin_chunk(origin_chunk), - query_pool(*origin_chunk->query_pool), query_index(query_index), - command_buffer((*origin_chunk->command_buffers)[query_index]) {} + : timestamp_pool(timestamp_pool), query_chunk(query_chunk), + query_pool(*query_chunk.query_pool), query_index(query_index), + command_buffer((*query_chunk.command_buffers)[query_index]) {} -TimestampPool::Handle::~Handle() { - const auto lock = std::scoped_lock{this->timestamp_pool.mutex}; +TimestampPool::Handle::~Handle() {} - // Parent destructing shouldn't mean we should have a bunch of - // insertions for zero reason. - if (const auto ptr = this->origin_chunk.lock(); ptr) { - ptr->free_indices->insert(this->query_index); +void TimestampPool::do_reaper(const std::stop_token stoken) { + for (;;) { + auto lock = std::unique_lock{this->mutex}; + this->cv.wait(lock, stoken, + [&]() { return !this->expiring_handles.empty(); }); + + // Keep going and free everything before destructing. + if (stoken.stop_requested() && this->expiring_handles.empty()) { + break; + } + + const auto handle_ptr = this->expiring_handles.front(); + this->expiring_handles.pop_front(); + + // Allow more to go on the queue while we wait for it to finish. + lock.unlock(); + handle_ptr->await_time(); + + // Lock our mutex, allow the queue to use it again and delete it. + lock.lock(); + handle_ptr->query_chunk.free_indices.insert(handle_ptr->query_index); + delete handle_ptr; } } @@ -175,7 +200,7 @@ std::optional<DeviceClock::time_point_t> TimestampPool::Handle::get_time() { auto query_result = QueryResult{}; const auto result = vtable.GetQueryPoolResults( - context.device, query_pool, + context.device, this->query_pool, static_cast<std::uint32_t>(this->query_index), 1, sizeof(query_result), &query_result, sizeof(query_result), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); @@ -202,7 +227,7 @@ DeviceClock::time_point_t TimestampPool::Handle::await_time() { auto query_result = QueryResult{}; THROW_NOT_VKSUCCESS(vtable.GetQueryPoolResults( - context.device, query_pool, + context.device, this->query_pool, static_cast<std::uint32_t>(this->query_index), 1, sizeof(query_result), &query_result, sizeof(query_result), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | @@ -212,12 +237,6 @@ DeviceClock::time_point_t TimestampPool::Handle::await_time() { return context.clock->ticks_to_time(query_result.value); } -DeviceClock::time_point_t TimestampPool::Handle::get_time_required() { - const auto time = this->get_time(); - assert(time.has_value()); - return *time; -} - TimestampPool::~TimestampPool() {} } // namespace low_latency
\ No newline at end of file diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh index d8ee359..bf3335d 100644 --- a/src/timestamp_pool.hh +++ b/src/timestamp_pool.hh @@ -2,20 +2,15 @@ #define TIMESTAMP_POOL_HH_ // The purpose of this file is to provide the definition of a 'timestamp pool'. -// It manages blocks of timestamp query pools, hands them out when requested, -// and allocates more when (if) we run out. It _should_ be thread safe. -// Usage: -// 1. Get handle with .acquire(). -// 2. Write start/end timestamp operations with the handle's pool and index -// into the provided command buffer. Will return nullopt if they're -// not yet available. -// 3. Destruct the handle to return the key to the pool. #include <vulkan/utility/vk_dispatch_table.h> #include <vulkan/vulkan.hpp> +#include <condition_variable> +#include <deque> #include <memory> #include <mutex> +#include <thread> #include <unordered_set> #include <vector> @@ -26,19 +21,32 @@ namespace low_latency { class QueueContext; class DeviceContext; +// A timestamp pool manages blocks of timestamp query pools, hands them out when +// requested, and allocates more when (if) we run out. It _should_ be thread +// safe. +// Usage: +// 1. Get handle with .acquire(). +// 2. Write start/end timestamp operations with the handle's pool and index +// into the provided command buffer. +// 3. Grab the time, or wait until it's ready, using get_time or await_time +// respectively. +// 4. Destruct the handle to return the key to the pool. The pool handles, +// via an async reaper thread, when the actual handle's contents can be +// reused as they must be alive until vulkan is done with them. class TimestampPool final { private: QueueContext& queue_context; - std::mutex mutex; // A chunk of data which is useful for making timestamp queries. // Allows association of an index to a query pool and command buffer. // We reuse these when they're released. - struct QueryChunk final { + class QueryChunk final { + friend class TimestampPool; + private: static constexpr auto CHUNK_SIZE = 512u; - public: + private: struct QueryPoolOwner final { private: const QueueContext& queue_context; @@ -55,10 +63,6 @@ class TimestampPool final { public: operator const VkQueryPool&() const { return this->query_pool; } }; - std::unique_ptr<QueryPoolOwner> query_pool; - - using free_indices_t = std::unordered_set<std::uint64_t>; - std::unique_ptr<free_indices_t> free_indices; struct CommandBuffersOwner final { private: @@ -76,7 +80,11 @@ class TimestampPool final { public: VkCommandBuffer operator[](const std::size_t& i); }; + + std::unique_ptr<QueryPoolOwner> query_pool; std::unique_ptr<CommandBuffersOwner> command_buffers; + // A set of indices which are currently availabe in this chunk. + std::unordered_set<std::uint64_t> free_indices; public: QueryChunk(const QueueContext& queue_context); @@ -86,19 +94,19 @@ class TimestampPool final { QueryChunk operator=(QueryChunk&&) = delete; ~QueryChunk(); }; - std::unordered_set<std::shared_ptr<QueryChunk>> query_chunks; public: - // A handle represents a VkCommandBuffer and a query index. - // Once the Handle goes out of scope, the query index will be returned - // to the parent pool. + // A handle represents a VkCommandBuffer and a query index. It can be used + // to attach timing information to submissions. Once the Handle destructs + // the query index will be returned to the parent pool - but crucially only + // when Vulkan is done with it. struct Handle final { private: friend class TimestampPool; private: TimestampPool& timestamp_pool; - const std::weak_ptr<QueryChunk> origin_chunk; + QueryChunk& query_chunk; public: const VkQueryPool query_pool; @@ -106,13 +114,12 @@ class TimestampPool final { const VkCommandBuffer command_buffer; public: - Handle(TimestampPool& timestamp_pool, - const std::shared_ptr<QueryChunk>& origin_chunk, + Handle(TimestampPool& timestamp_pool, QueryChunk& query_chunk, const std::uint64_t& query_index); Handle(const Handle& handle) = delete; - Handle(Handle&&) = delete; Handle operator=(const Handle& handle) = delete; - Handle operator=(Handle&&) = delete; + Handle(Handle&&) = delete; + Handle& operator=(Handle&&) = delete; ~Handle(); public: @@ -125,11 +132,20 @@ class TimestampPool final { // Waits until the time is available and returns it. DeviceClock::time_point_t await_time(); - - // Calls get_time with the assumption it's already available. - DeviceClock::time_point_t get_time_required(); }; + private: + void do_reaper(const std::stop_token stoken); + + private: + std::deque<Handle*> expiring_handles; + std::unordered_set<std::unique_ptr<QueryChunk>> query_chunks; + + std::mutex mutex; + std::condition_variable_any cv; + + std::jthread reaper_worker; + public: TimestampPool(QueueContext& queue_context); TimestampPool(const TimestampPool&) = delete; @@ -139,7 +155,6 @@ class TimestampPool final { ~TimestampPool(); public: - // Hands out a Handle! std::shared_ptr<Handle> acquire(); }; |
