From 341b9b65a57dee1d4d16ec0994fe9a414b542ba5 Mon Sep 17 00:00:00 2001 From: Nicolas James Date: Tue, 7 Apr 2026 01:43:56 +1000 Subject: Fix rare race when timestamps are returned to the pool and reused before their work completes --- src/layer.cc | 24 ++--- src/strategies/anti_lag/queue_strategy.cc | 2 +- src/strategies/low_latency2/swapchain_monitor.cc | 3 +- src/submission.hh | 2 +- src/timestamp_pool.cc | 111 +++++++++++------------ src/timestamp_pool.hh | 38 ++++---- 6 files changed, 84 insertions(+), 96 deletions(-) (limited to 'src') diff --git a/src/layer.cc b/src/layer.cc index b8a2bd0..a4759fa 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -399,24 +399,20 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, std::ranges::transform( submit_span, std::back_inserter(next_submits), [&](const auto& submit) { - const auto head_handle = context->timestamp_pool->acquire(); - head_handle->write_command(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); - const auto tail_handle = context->timestamp_pool->acquire(); - tail_handle->write_command(VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); + const auto handle = context->timestamp_pool->acquire(); submissions.push_back(std::make_unique(Submission{ - .start = head_handle, - .end = tail_handle, + .handle = handle, .time = now, })); next_cbs.emplace_back([&]() -> auto { auto cbs = std::make_unique(); - cbs->push_back(head_handle->command_buffer); + cbs->push_back(handle->get_start_buffer()); std::ranges::copy(std::span{submit.pCommandBuffers, submit.commandBufferCount}, std::back_inserter(*cbs)); - cbs->push_back(tail_handle->command_buffer); + cbs->push_back(handle->get_end_buffer()); return cbs; }()); @@ -464,14 +460,10 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, std::ranges::transform( submit_span, std::back_inserter(next_submits), [&](const auto& submit) { - const auto head_handle = context->timestamp_pool->acquire(); - head_handle->write_command(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); - const auto tail_handle = context->timestamp_pool->acquire(); - tail_handle->write_command(VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); + const auto handle = context->timestamp_pool->acquire(); submissions.push_back(std::make_unique(Submission{ - .start = head_handle, - .end = tail_handle, + .handle = handle, .time = now, })); @@ -479,14 +471,14 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, auto cbs = std::make_unique(); cbs->push_back(VkCommandBufferSubmitInfo{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, - .commandBuffer = head_handle->command_buffer, + .commandBuffer = handle->get_start_buffer(), }); std::ranges::copy(std::span{submit.pCommandBufferInfos, submit.commandBufferInfoCount}, std::back_inserter(*cbs)); cbs->push_back(VkCommandBufferSubmitInfo{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, - .commandBuffer = tail_handle->command_buffer, + .commandBuffer = handle->get_end_buffer(), }); return cbs; }()); diff --git a/src/strategies/anti_lag/queue_strategy.cc b/src/strategies/anti_lag/queue_strategy.cc index 0834a96..27a9337 100644 --- a/src/strategies/anti_lag/queue_strategy.cc +++ b/src/strategies/anti_lag/queue_strategy.cc @@ -56,7 +56,7 @@ void AntiLagQueueStrategy::await_complete() { return; } const auto& last = submissions.back(); - last->end->await_time(); + last->handle->await_end_time(); } // Stub - AntiLag doesn't care about presents. diff --git a/src/strategies/low_latency2/swapchain_monitor.cc b/src/strategies/low_latency2/swapchain_monitor.cc index 4c19251..b6d4dd0 100644 --- a/src/strategies/low_latency2/swapchain_monitor.cc +++ b/src/strategies/low_latency2/swapchain_monitor.cc @@ -59,7 +59,8 @@ void SwapchainMonitor::do_monitor(const std::stop_token stoken) { lock.unlock(); for (const auto& submission : semaphore_submission.submissions) { if (!submission.empty()) { - submission.back()->end->await_time(); + const auto& last = submission.back(); + last->handle->await_end_time(); } } diff --git a/src/submission.hh b/src/submission.hh index e14f3f0..ba721c2 100644 --- a/src/submission.hh +++ b/src/submission.hh @@ -8,7 +8,7 @@ namespace low_latency { class Submission { public: - std::shared_ptr start, end; + std::shared_ptr handle; DeviceClock::time_point_t time; }; diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc index b7fe06b..cfe2528 100644 --- a/src/timestamp_pool.cc +++ b/src/timestamp_pool.cc @@ -37,8 +37,9 @@ TimestampPool::QueryChunk::QueryChunk(const QueueContext& queue_context) command_buffers(std::make_unique(queue_context)) { this->free_indices = []() { - constexpr auto keys = std::views::iota(0u, QueryChunk::CHUNK_SIZE); - return std::unordered_set(std::from_range, keys); + constexpr auto keys = std::views::iota(0u, QueryChunk::CHUNK_SIZE) | + std::views::stride(2u); + return std::unordered_set(std::from_range, keys); }(); } @@ -67,13 +68,6 @@ TimestampPool::QueryChunk::CommandBuffersOwner::~CommandBuffersOwner() { std::data(this->command_buffers)); } -VkCommandBuffer TimestampPool::QueryChunk::CommandBuffersOwner::operator[]( - const std::size_t& i) { - - assert(i < CHUNK_SIZE); - return this->command_buffers[i]; -} - TimestampPool::QueryChunk::~QueryChunk() {} TimestampPool::TimestampPool(QueueContext& queue_context) @@ -122,10 +116,35 @@ std::shared_ptr TimestampPool::acquire() { TimestampPool::Handle::Handle(TimestampPool& timestamp_pool, QueryChunk& query_chunk, - const std::uint64_t& query_index) + const std::uint32_t query_index) : timestamp_pool(timestamp_pool), query_chunk(query_chunk), - query_pool(*query_chunk.query_pool), query_index(query_index), - command_buffer((*query_chunk.command_buffers)[query_index]) {} + query_index(query_index) { + + const auto cbbi = VkCommandBufferBeginInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + }; + + const auto& device_context = this->timestamp_pool.queue_context.device; + const auto& vtable = device_context.vtable; + const auto command_buffers = + std::data(this->query_chunk.command_buffers->command_buffers); + + const auto rewrite_cmd = [&](const std::uint32_t offset, + const VkPipelineStageFlagBits2 bit) { + const auto& command_buffer = command_buffers[query_index + offset]; + const auto& query_pool = *this->query_chunk.query_pool; + const auto index = + static_cast(this->query_index) + offset; + vtable.ResetQueryPoolEXT(device_context.device, query_pool, index, 1); + THROW_NOT_VKSUCCESS(vtable.ResetCommandBuffer(command_buffer, 0)); + THROW_NOT_VKSUCCESS(vtable.BeginCommandBuffer(command_buffer, &cbbi)); + vtable.CmdWriteTimestamp2KHR(command_buffer, bit, query_pool, index); + THROW_NOT_VKSUCCESS(vtable.EndCommandBuffer(command_buffer)); + }; + + rewrite_cmd(0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); + rewrite_cmd(1, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); +} TimestampPool::Handle::~Handle() {} @@ -145,7 +164,7 @@ void TimestampPool::do_reaper(const std::stop_token stoken) { // Allow more to go on the queue while we wait for it to finish. lock.unlock(); - handle_ptr->await_time(); + handle_ptr->await_end_time(); // Lock our mutex, allow the queue to use it again and delete it. lock.lock(); @@ -154,61 +173,30 @@ void TimestampPool::do_reaper(const std::stop_token stoken) { } } -void TimestampPool::Handle::write_command( - const VkPipelineStageFlagBits2 bit) const { - - const auto cbbi = VkCommandBufferBeginInfo{ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - }; - - const auto& device_context = this->timestamp_pool.queue_context.device; - const auto& vtable = device_context.vtable; - - vtable.ResetQueryPoolEXT(device_context.device, this->query_pool, - static_cast(this->query_index), 1); - - THROW_NOT_VKSUCCESS(vtable.ResetCommandBuffer(this->command_buffer, 0)); - THROW_NOT_VKSUCCESS(vtable.BeginCommandBuffer(this->command_buffer, &cbbi)); - - vtable.CmdWriteTimestamp2KHR(this->command_buffer, bit, this->query_pool, - static_cast(this->query_index)); - - THROW_NOT_VKSUCCESS(vtable.EndCommandBuffer(this->command_buffer)); +const VkCommandBuffer& TimestampPool::Handle::get_start_buffer() const { + const auto command_buffers = + std::data(this->query_chunk.command_buffers->command_buffers); + return command_buffers[this->query_index]; } -std::optional TimestampPool::Handle::get_time() { - const auto& context = this->timestamp_pool.queue_context.device; - const auto& vtable = context.vtable; - - auto query_result = std::array{}; - - const auto result = vtable.GetQueryPoolResults( - context.device, this->query_pool, - static_cast(this->query_index), 1, sizeof(query_result), - &query_result, sizeof(query_result), - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); - - if (result != VK_SUCCESS && result != VK_NOT_READY) { - throw result; - } - - if (!query_result[1]) { - return std::nullopt; - } - - return context.clock->ticks_to_time(query_result[0]); +const VkCommandBuffer& TimestampPool::Handle::get_end_buffer() const { + const auto command_buffers = + std::data(this->query_chunk.command_buffers->command_buffers); + return command_buffers[this->query_index + 1]; } -DeviceClock::time_point_t TimestampPool::Handle::await_time() { +DeviceClock::time_point_t +TimestampPool::Handle::await_time_impl(const std::uint32_t offset) const { + const auto& context = this->timestamp_pool.queue_context.device; const auto& vtable = context.vtable; + const auto& query_pool = *this->query_chunk.query_pool; auto query_result = std::array{}; THROW_NOT_VKSUCCESS(vtable.GetQueryPoolResults( - context.device, this->query_pool, - static_cast(this->query_index), 1, sizeof(query_result), - &query_result, sizeof(query_result), + context.device, query_pool, this->query_index + offset, 1, + sizeof(query_result), &query_result, sizeof(query_result), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | VK_QUERY_RESULT_WAIT_BIT)); assert(query_result[1]); @@ -216,6 +204,13 @@ DeviceClock::time_point_t TimestampPool::Handle::await_time() { return context.clock->ticks_to_time(query_result[0]); } +DeviceClock::time_point_t TimestampPool::Handle::await_start_time() const { + return this->await_time_impl(0); +} +DeviceClock::time_point_t TimestampPool::Handle::await_end_time() const { + return this->await_time_impl(1); +} + TimestampPool::~TimestampPool() {} } // namespace low_latency \ No newline at end of file diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh index 038e9a4..9044864 100644 --- a/src/timestamp_pool.hh +++ b/src/timestamp_pool.hh @@ -45,6 +45,8 @@ class TimestampPool final { private: static constexpr auto CHUNK_SIZE = 512u; + // Should be even because we take two each time in our handles. + static_assert(CHUNK_SIZE % 2 == 0); private: struct QueryPoolOwner final { @@ -65,7 +67,7 @@ class TimestampPool final { }; struct CommandBuffersOwner final { - private: + public: const QueueContext& queue_context; std::vector command_buffers; @@ -76,15 +78,12 @@ class TimestampPool final { CommandBuffersOwner operator=(const CommandBuffersOwner&) = delete; CommandBuffersOwner operator=(CommandBuffersOwner&&) = delete; ~CommandBuffersOwner(); - - public: - VkCommandBuffer operator[](const std::size_t& i); }; std::unique_ptr query_pool; std::unique_ptr command_buffers; // A set of indices which are currently availabe in this chunk. - std::unordered_set free_indices; + std::unordered_set free_indices; public: QueryChunk(const QueueContext& queue_context); @@ -96,10 +95,11 @@ class TimestampPool final { }; public: - // A handle represents a VkCommandBuffer and a query index. It can be used - // to attach timing information to submissions. Once the Handle destructs - // the query index will be returned to the parent pool - but crucially only - // when Vulkan is done with it. + // A handle represents a VkCommandBuffer and a query index. + // It represents represents and provides both a start and end command + // buffer, which can attach start/end timing information to submissions. + // Once the Handle destructs the query index will be returned to the parent + // pool - but crucially only when Vulkan is done with it. struct Handle final { private: friend class TimestampPool; @@ -109,13 +109,11 @@ class TimestampPool final { QueryChunk& query_chunk; public: - const VkQueryPool query_pool; - const std::uint64_t query_index; - const VkCommandBuffer command_buffer; + const std::uint32_t query_index; public: Handle(TimestampPool& timestamp_pool, QueryChunk& query_chunk, - const std::uint64_t& query_index); + const std::uint32_t query_index); Handle(const Handle& handle) = delete; Handle operator=(const Handle& handle) = delete; Handle(Handle&&) = delete; @@ -123,15 +121,17 @@ class TimestampPool final { ~Handle(); public: - // Performs the Vulkan that sets up this command buffer for submission. - void write_command(const VkPipelineStageFlagBits2 bit) const; + const VkCommandBuffer& get_start_buffer() const; + const VkCommandBuffer& get_end_buffer() const; - public: - // Attempts to get the time - optional if it's not available yet. - std::optional get_time(); + private: + DeviceClock::time_point_t + await_time_impl(const std::uint32_t offset) const; + public: // Waits until the time is available and returns it. - DeviceClock::time_point_t await_time(); + DeviceClock::time_point_t await_start_time() const; + DeviceClock::time_point_t await_end_time() const; }; private: -- cgit v1.2.3