diff options
| author | Nicolas James <nj3ahxac@gmail.com> | 2026-02-18 14:51:21 +1100 |
|---|---|---|
| committer | Nicolas James <nj3ahxac@gmail.com> | 2026-02-18 14:51:21 +1100 |
| commit | bb7c9b56e1710a7f2a3f4bb57f181e4fa196aba2 (patch) | |
| tree | e9de8b88afcb339af9a7c87156511f6cc238f829 | |
| parent | b7414a760c183d4987090c110ddacca277983e17 (diff) | |
Fix missing cpu time between gpu presents, simplify timestamp->get_time()
| -rw-r--r-- | src/queue_context.cc | 78 | ||||
| -rw-r--r-- | src/queue_context.hh | 4 | ||||
| -rw-r--r-- | src/timestamp_pool.cc | 45 | ||||
| -rw-r--r-- | src/timestamp_pool.hh | 8 |
4 files changed, 77 insertions, 58 deletions
diff --git a/src/queue_context.cc b/src/queue_context.cc index 7121898..363149b 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -152,7 +152,6 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) { // completely redundant, in all cases it was exactly what we have here. But // I could be wrong. - // that wa const auto start_iter = std::begin(this->submissions); // no op submit? if (start_iter == std::end(this->submissions)) { @@ -164,13 +163,31 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) { (*start_iter)->debug += "first_during_present "; (*last_iter)->debug += "last_during_present "; - // TODO We probably want to walk through other graphics queues and get - // the time that they submitted at some stage. Somewhat difficult to do, - // we probably need to create a graph of dependencies but it seems like - // overengineering. I will just get this working well before I deal - // with complicated edge cases. + // The last submission is either in flight, already processed, or we + // just happen to be the first frame and we can just set it to our start + // with little conseuqence. + const auto prev_frame_last_submit = [&]() -> auto { + if (const auto iter = std::rbegin(this->in_flight_frames); + iter != std::rend(this->in_flight_frames)) { + + assert(!iter->submissions.empty()); + return iter->submissions.back(); + } + + if (const auto iter = std::rbegin(this->timings); + iter != std::rend(this->timings)) { + + const auto& submissions = (*iter)->frame.submissions; + assert(!submissions.empty()); + + return submissions.back(); + } + + return *start_iter; + }(); this->in_flight_frames.emplace_back(Frame{ + .prev_frame_last_submit = prev_frame_last_submit, .submissions = std::move(this->submissions), .sequence = (*last_iter)->sequence, }); @@ -240,9 +257,7 @@ void QueueContext::process_frames() { frame.submissions, std::back_inserter(intervals), [&, this](const auto& submission) { const auto get_time = [&, this](const auto& handle) { - const auto t = handle->get_ticks(*this->timestamp_pool); - assert(t.has_value()); // guaranteed from seq. - return clock.ticks_to_time(*t); + return handle->get_time(); }; return Interval{ @@ -280,10 +295,15 @@ void QueueContext::process_frames() { return gputime + (end - start); }); - const auto start = merged.front().start; + // The start should be the previous frame's last submission, NOT when we + // start here. Otherwise, we won't account for the time between the + // previous frame's last submission and our first submission, which + // could genuinely be a large period of time. So look for it in timings, + // because it's guaranteed to be there at this stage. Either that, or + // we're the first frame, and it doesn't really matter. + const auto start = frame.prev_frame_last_submit->end_handle->get_time(); const auto end = merged.back().end; - const auto not_gputime = - (merged.back().end - merged.front().start) - gputime; + const auto not_gputime = (end - start) - gputime; auto timing = Timing{ .gputime = gputime, @@ -304,7 +324,6 @@ void QueueContext::process_frames() { } } -using opt_time_point_t = std::optional<DeviceContext::Clock::time_point_t>; void QueueContext::sleep_in_present() { const auto& device = this->device_context; const auto& vtable = device.vtable; @@ -350,27 +369,20 @@ void QueueContext::sleep_in_present() { return; } - const auto expected_gputime = [&, this]() { + const auto calc_median = [&, this](const auto& getter) { auto vect = std::vector<Timing*>{}; std::ranges::transform(this->timings, std::back_inserter(vect), [](const auto& timing) { return timing.get(); }); - std::ranges::sort(vect, [](const auto& a, const auto& b) { - return a->gputime < b->gputime; + std::ranges::sort(vect, [&](const auto& a, const auto& b) { + return getter(a) < getter(b); }); - // return vect[0]->frametime; - return vect[std::size(vect) / 2]->gputime; - }(); + return getter(vect[std::size(vect) / 2]); + }; - const auto expected_not_gputime = [&, this]() { - auto vect = std::vector<Timing*>{}; - std::ranges::transform(this->timings, std::back_inserter(vect), - [](const auto& timing) { return timing.get(); }); - std::ranges::sort(vect, [](const auto& a, const auto& b) { - return a->not_gputime < b->not_gputime; - }); - // return vect[0]->frametime; - return vect[std::size(vect) / 2]->not_gputime; - }(); + const auto expected_gputime = + calc_median([](const auto& timing) { return timing->gputime; }); + const auto expected_not_gputime = + calc_median([](const auto& timing) { return timing->not_gputime; }); std::cerr << " expected gputime: "; debug_log_time(expected_gputime); @@ -413,13 +425,7 @@ void QueueContext::sleep_in_present() { // We now know that A is available because its semaphore has been // signalled. - - const auto a = [&, this]() { - const auto& handle = frame.submissions.front()->start_handle; - const auto ticks = handle->get_ticks(*this->timestamp_pool); - assert(ticks.has_value()); - return device_context.clock.ticks_to_time(*ticks); - }(); + const auto a = frame.prev_frame_last_submit->end_handle->get_time(); const auto now = std::chrono::steady_clock::now(); const auto dist = now - a; diff --git a/src/queue_context.hh b/src/queue_context.hh index ed13465..219e6fb 100644 --- a/src/queue_context.hh +++ b/src/queue_context.hh @@ -48,10 +48,12 @@ class QueueContext final : public Context { std::deque<submission_ptr_t> submissions; // In flight frame submissions grouped together. - // The first element in the vector refers to the first submission that + // The first element in the deque refers to the first submission that // contributed to that frame. The last element is the last submission before // present was called. + // std::size(submissions) >= 1 btw struct Frame { + submission_ptr_t prev_frame_last_submit; std::deque<submission_ptr_t> submissions; std::uint64_t sequence; }; diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc index 01afe07..854fae1 100644 --- a/src/timestamp_pool.cc +++ b/src/timestamp_pool.cc @@ -76,18 +76,19 @@ std::shared_ptr<TimestampPool::Handle> TimestampPool::acquire() { const auto query_index = *std::begin(indices); assert(indices.erase(query_index)); - return std::make_shared<Handle>(*not_empty_iter, query_index); + return std::make_shared<Handle>(*this, *not_empty_iter, query_index); } -TimestampPool::Handle::Handle(const std::shared_ptr<QueryChunk>& origin_chunk, +TimestampPool::Handle::Handle(const TimestampPool& timestamp_pool, + const std::shared_ptr<QueryChunk>& origin_chunk, const std::uint64_t& query_index) - : query_pool(origin_chunk->query_pool), query_index(query_index), - origin_chunk(origin_chunk), + : timestamp_pool(timestamp_pool), query_pool(origin_chunk->query_pool), + query_index(query_index), origin_chunk(origin_chunk), command_buffer((*origin_chunk->command_buffers)[query_index]) {} TimestampPool::Handle::~Handle() { - // Parent destructing shouldn't mean we should have a bunch of insertions - // for zero reason. + // Parent destructing shouldn't mean we should have a bunch of + // insertions for zero reason. if (const auto ptr = this->origin_chunk.lock(); ptr) { assert(ptr->free_indices->insert(this->query_index).second); } @@ -122,29 +123,35 @@ void TimestampPool::Handle::setup_command_buffers( vtable.EndCommandBuffer(tail.command_buffer); } -std::optional<std::uint64_t> -TimestampPool::Handle::get_ticks(const TimestampPool& pool) { - - const auto& device_context = pool.queue_context.device_context; - const auto& vtable = device_context.vtable; +DeviceContext::Clock::time_point_t TimestampPool::Handle::get_time() { + const auto& device_ctx = this->timestamp_pool.queue_context.device_context; + const auto& vtable = device_ctx.vtable; + // For debug builds, we're going to query the availability bit so we can + // assert that after the semaphore has flagged it as naturally available. struct QueryResult { std::uint64_t value; +#ifndef NDEBUG std::uint64_t available; +#endif }; auto query_result = QueryResult{}; + constexpr auto query_flags = []() -> auto { + auto flag = VkQueryResultFlags{VK_QUERY_RESULT_64_BIT}; +#ifndef NDEBUG + flag |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT; +#endif + return flag; + }(); + const auto r = vtable.GetQueryPoolResults( - device_context.device, query_pool, this->query_index, 1, - sizeof(query_result), &query_result, sizeof(query_result), - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); + device_ctx.device, query_pool, this->query_index, 1, + sizeof(query_result), &query_result, sizeof(query_result), query_flags); - assert(r == VK_SUCCESS || r == VK_NOT_READY); + assert(r == VK_SUCCESS && query_result.available); - if (!query_result.available) { - return std::nullopt; - } - return query_result.value; + return device_ctx.clock.ticks_to_time(query_result.value); } TimestampPool::~TimestampPool() { diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh index 681c9e2..b7aa54e 100644 --- a/src/timestamp_pool.hh +++ b/src/timestamp_pool.hh @@ -18,6 +18,8 @@ #include <unordered_set> #include <vector> +#include "device_context.hh" + namespace low_latency { class QueueContext; @@ -58,6 +60,7 @@ class TimestampPool final { friend class TimestampPool; private: + const TimestampPool& timestamp_pool; const std::weak_ptr<QueryChunk> origin_chunk; public: @@ -66,7 +69,8 @@ class TimestampPool final { const VkCommandBuffer command_buffer; public: - Handle(const std::shared_ptr<QueryChunk>& origin_chunk, + Handle(const TimestampPool& timestamp_pool, + const std::shared_ptr<QueryChunk>& origin_chunk, const std::uint64_t& query_index); Handle(const Handle& handle) = delete; Handle(Handle&&) = delete; @@ -78,7 +82,7 @@ class TimestampPool final { void setup_command_buffers(const Handle& tail, const QueueContext& queue_context) const; - std::optional<std::uint64_t> get_ticks(const TimestampPool& pool); + DeviceContext::Clock::time_point_t get_time(); }; public: |
