diff options
| author | Nicolas James <nj3ahxac@gmail.com> | 2026-03-29 20:44:23 +1100 |
|---|---|---|
| committer | Nicolas James <nj3ahxac@gmail.com> | 2026-03-29 20:44:23 +1100 |
| commit | 681bd5096ee416b50dd7338de30af7b3db385a36 (patch) | |
| tree | 358b6bc6f9a3af66729b8ac3b15dd38cc0f4bd2a /src/queue_context.cc | |
| parent | d5ef2dbbd77c69dd93e92d5b7046a65c2361b59b (diff) | |
Implement Reflex - break AntiLag in the process. Remove AntiLag1. WIP
Diffstat (limited to 'src/queue_context.cc')
| -rw-r--r-- | src/queue_context.cc | 352 |
1 files changed, 33 insertions, 319 deletions
diff --git a/src/queue_context.cc b/src/queue_context.cc index d12f03d..30e73c1 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -3,11 +3,6 @@ #include "layer_context.hh" #include "timestamp_pool.hh" -#include <algorithm> -#include <chrono> -#include <fstream> -#include <iostream> -#include <ranges> #include <span> #include <vulkan/vulkan_core.h> @@ -49,333 +44,52 @@ QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue, } QueueContext::~QueueContext() { - this->in_flight_frames.clear(); - this->submissions.clear(); + this->unpresented_submissions.clear(); this->timestamp_pool.reset(); } void QueueContext::notify_submit( - const VkSubmitInfo& info, + const present_id_t& present_id, const std::shared_ptr<TimestampPool::Handle> head_handle, const std::shared_ptr<TimestampPool::Handle> tail_handle, - const DeviceContext::Clock::time_point_t& now) { - - auto signals = std::unordered_set<VkSemaphore>{}; - auto waits = std::unordered_set<VkSemaphore>{}; - std::ranges::copy(std::span{info.pWaitSemaphores, info.waitSemaphoreCount}, - std::inserter(waits, std::end(waits))); - std::ranges::copy( - std::span{info.pSignalSemaphores, info.signalSemaphoreCount}, - std::inserter(signals, std::end(signals))); - - this->submissions.emplace_back(std::make_unique<Submission>( - std::move(signals), std::move(waits), head_handle, tail_handle, now)); - - if (std::size(this->submissions) > this->MAX_TRACKED_SUBMISSIONS) { - this->submissions.pop_front(); - } -} - -// Identical to notify_submit, but we use VkSubmitInfo2. -void QueueContext::notify_submit( - const VkSubmitInfo2& info, - const std::shared_ptr<TimestampPool::Handle> head_handle, - const std::shared_ptr<TimestampPool::Handle> tail_handle, - const DeviceContext::Clock::time_point_t& now) { - - auto signals = std::unordered_set<VkSemaphore>{}; - auto waits = std::unordered_set<VkSemaphore>{}; - - std::ranges::transform( - std::span{info.pWaitSemaphoreInfos, info.waitSemaphoreInfoCount}, - std::inserter(waits, std::end(waits)), - [](const auto& info) -> auto { return info.semaphore; }); - - std::ranges::transform( - std::span{info.pSignalSemaphoreInfos, info.signalSemaphoreInfoCount}, - std::inserter(signals, std::end(signals)), - [](const auto& info) -> auto { return info.semaphore; }); - - this->submissions.emplace_back(std::make_unique<Submission>( - std::move(signals), std::move(waits), head_handle, tail_handle, now)); - - if (std::size(this->submissions) > this->MAX_TRACKED_SUBMISSIONS) { - this->submissions.pop_front(); - } -} - -void QueueContext::drain_submissions_to_frame() { - - // We are going to assume that all queue submissions before and on the same - // queue contribute to the frame. - - // This used to be more complicated where we found the first submission that - // was signalled by acquire, then we walked forwards until we found the - // submission before it that marked the end of frame (which was the last - // submission in the previous frame that called notify submit). This seemed - // completely redundant, in all cases it was exactly what we have here. But - // I could be wrong. - - const auto start_iter = std::begin(this->submissions); - // no op submit? - if (start_iter == std::end(this->submissions)) { - return; + const DeviceClock::time_point_t& now) { + + // Push this submission onto our unpresented_submissions at our present_id + // mapping (might be empty, but handled with operator[]). + auto& submissions = this->unpresented_submissions[present_id]; + if (submissions == nullptr) { + submissions = + std::make_shared<std::deque<std::unique_ptr<Submission>>>(); } - // The last submission is either in flight, already processed, or we - // just happen to be the first frame and we can just set it to our start - // with little consequence. - const auto prev_frame_last_submit = [&]() -> auto { - if (const auto iter = std::rbegin(this->in_flight_frames); - iter != std::rend(this->in_flight_frames)) { - - assert(!iter->submissions.empty()); - return iter->submissions.back(); - } - - if (const auto iter = std::rbegin(this->timings); - iter != std::rend(this->timings)) { + submissions->push_back( + std::make_unique<Submission>(Submission{.head_handle = head_handle, + .tail_handle = tail_handle, + .cpu_present_time = now})); - const auto& submissions = (*iter)->frame.submissions; - assert(!submissions.empty()); - - return submissions.back(); - } - - return *start_iter; - }(); - - this->in_flight_frames.emplace_back( - Frame{.submissions = std::move(this->submissions), - .cpu_post_present_time = DeviceContext::Clock::now()}); - assert(std::size(this->in_flight_frames.back().submissions)); - // *valid but unspecified state after move, so clear!* - this->submissions.clear(); -} - -void QueueContext::notify_present(const VkPresentInfoKHR& info) { - this->drain_submissions_to_frame(); - this->drain_frames_to_timings(); - - // We should only sleep in present if two conditions are met: - // 1. Our antilag_mode isn't set to on, because otherwise the sleep will - // be done in input and with far better results. - // 2. The 'is_antilag_1_enabled' flag, which exists at the layer's - // context, is set. - // - /* - * WIP REFLEX - if (this->device_context.antilag_mode != VK_ANTI_LAG_MODE_ON_AMD && - this->device_context.instance.layer.is_antilag_1_enabled) { - - this->sleep_in_present(); + // This is probably hit if our queue never actually presents to anything, + // because the only time we manually evict our unpresent_submissions is + // when we present to something. + if (std::size(*submissions) > this->MAX_TRACKED_SUBMISSIONS) { + submissions->pop_front(); } - */ } -const auto debug_log_time2 = [](auto& stream, const auto& diff) { - using namespace std::chrono; - const auto ms = duration_cast<milliseconds>(diff); - const auto us = duration_cast<microseconds>(diff - ms); - const auto ns = duration_cast<nanoseconds>(diff - ms - us); - stream << ms << " " << us << " " << ns << " ago\n"; -}; +void QueueContext::notify_present(const VkSwapchainKHR& swapchain, + const present_id_t& present_id) { -void QueueContext::drain_frames_to_timings() { - if (!std::size(this->in_flight_frames)) { - return; + // Notify the device that this swapchain was just presented to. + // We're avoiding a double hash here - don't use operator[] and erase. + auto iter = this->unpresented_submissions.try_emplace(present_id).first; + if (iter->second == nullptr) { + iter->second = + std::make_shared<std::deque<std::unique_ptr<Submission>>>(); } - // Only need to calibrate this device, we don't support multi device anti - // lag. - this->device_context.clock->calibrate(); - - while (std::size(this->in_flight_frames)) { - const auto& frame = this->in_flight_frames.front(); - - assert(std::size(frame.submissions)); - - const auto& last_submission = frame.submissions.back(); - - // Not completed (so future frames definitely aren't) - stop early. - if (!last_submission->end_handle->get_time().has_value()) { - break; - } - - // We are committed to removing the frame at this stage and - // promoting it to a 'timing' struct because it's completed. - // We can guarantee that we can extract timing information from - // all start/end handles now. - - // Using leetcode merge intervals in the wild lol - struct Interval { - DeviceContext::Clock::time_point_t start, end; - }; - - const auto sorted_intervals = [&]() -> auto { - auto intervals = std::vector<Interval>{}; - std::ranges::transform( - frame.submissions, std::back_inserter(intervals), - [&](const auto& submission) { - return Interval{ - .start = submission->start_handle->get_time_required(), - .end = submission->end_handle->get_time_required(), - }; - }); - - std::ranges::sort(intervals, [](const auto& a, const auto& b) { - return a.start < b.start; - }); - return intervals; - }(); - - const auto merged = [&]() -> auto { - auto merged = std::vector<Interval>{}; - auto last = sorted_intervals[0]; - - for (const auto& [s, e] : sorted_intervals | std::views::drop(1)) { - if (s <= last.end) { - last.end = std::max(last.end, e); - } else { - merged.push_back(last); - last = {s, e}; - } - } - merged.push_back(last); - return merged; - }(); - - // It's important to note that gputime starts from a point which isn't - // equal to the below 'start' var. It looks something like this, where a - // '-' represents CPU time only and '=' represents CPU + GPU. - // - // |---------------------|=========|--------|====|-----------------| - // ^ last_present ^ merged.front().start present ^ - // merged.back().end ^ - // - // I would imagine there would be more GPU than cpu to reach the anti - // lag codepath than is depicted here. We can track the total time - // between vkPresent calls as future_submit - last_submit. The total - // time the GPU spent engaged is the sum of all intervals. So we can - // get a meaningful 'not_gputime' as total - gpu_time. - - const auto gputime = std::ranges::fold_left( - merged, DeviceContext::Clock::time_point_t::duration{}, - [](auto gputime, const auto& interval) { - const auto& [start, end] = interval; - return gputime + (end - start); - }); - - // Our cpu_start value here refers to the time when the CPU was allowed - // to move past the present call and, in theory, begin cpu work on the - // next frame. - const auto cpu_start = [&]() -> auto { - if (const auto it = std::rbegin(this->timings); - it != std::rend(this->timings)) { - - return (*it)->frame.cpu_post_present_time; - } - // This will happen once, only for the first frame. We don't - // have a way of knowing when the CPU first started work here. - // Just return our first submit's start for this edge case. - return frame.submissions.front()->start_handle->get_time_required(); - }(); - - const auto cputime = - frame.submissions.front()->enqueued_time - cpu_start; - - this->timings.emplace_back(std::make_unique<Timing>(Timing{ - .gputime = gputime, - .cputime = cputime, - .frame = frame, - })); - - this->in_flight_frames.pop_front(); - } - - if (const auto T = std::size(this->timings); - T > this->MAX_TRACKED_TIMINGS) { - - const auto erase_to_iter = - std::next(std::begin(this->timings), - static_cast<long>(T - MAX_TRACKED_TIMINGS)); - this->timings.erase(std::begin(this->timings), erase_to_iter); - } -} - -void QueueContext::sleep_in_present() { - // After calling this, any remaining frames are truly in flight. - this->drain_frames_to_timings(); - if (!std::size(this->in_flight_frames)) { - return; - } - - // This is getting the most recent frame and waiting until its start has - // begun. This means that, in the case of >1 frame in flight, it's draining - // all of them before we're allowed to move forward. - const auto first_gpu_work = [&]() -> auto { - const auto& most_recent_frame = this->in_flight_frames.back(); - const auto& first_submission = most_recent_frame.submissions.front(); - return first_submission->start_handle->get_time_spinlock(); - }(); - - // Drain frames again because as stated above, we might have multiple frames - // now completed after our wait spinlock. - this->drain_frames_to_timings(); - - // Check the size again because the frame we want to target may have already - // completed when we called process_frames(). - if (!std::size(this->in_flight_frames)) { - return; - } - assert(std::size(this->in_flight_frames) == 1); - - // Not enough data yet to apply any delays. - if (std::size(this->timings) < this->MAX_TRACKED_TIMINGS) { - return; - } - - const auto calc_median = [&, this](const auto& getter) { - auto vect = std::vector<Timing*>{}; - std::ranges::transform(this->timings, std::back_inserter(vect), - [](const auto& timing) { return timing.get(); }); - std::ranges::sort(vect, [&](const auto& a, const auto& b) { - return getter(a) < getter(b); - }); - return getter(vect[std::size(vect) / 2]); - }; - - const auto expected_gputime = - calc_median([](const auto& timing) { return timing->gputime; }); - const auto expected_cputime = - calc_median([](const auto& timing) { return timing->cputime; }); - - // Should look like this: - // total_length = expected_gputime - // |------------------------x------------------------------| - // ^ first_gpu_work now last_gpu_work ^ - - const auto now = DeviceContext::Clock::now(); - const auto dist = now - first_gpu_work; - const auto expected_dist_to_last = expected_gputime - dist; - - const auto wait_time = expected_dist_to_last - expected_cputime; - - auto& frame = this->in_flight_frames.back(); - const auto& last_gpu_work = frame.submissions.back()->end_handle; - last_gpu_work->get_time_spinlock(now + wait_time); - - frame.cpu_post_present_time = std::chrono::steady_clock::now(); + this->device_context.notify_present(swapchain, iter->second); - std::ofstream f("/tmp/times.txt", std::ios::trunc); - f << " expected gputime: "; - debug_log_time2(f, expected_gputime); - f << " expected cputime: "; - debug_log_time2(f, expected_cputime); - f << " requestd sleep: "; - debug_log_time2(f, wait_time); - f << " observed sleep: "; - debug_log_time2(f, frame.cpu_post_present_time - now); + // Important, we nuke the submission because now it's presented. + this->unpresented_submissions.erase(iter); } bool QueueContext::should_inject_timestamps() const { @@ -385,9 +99,9 @@ bool QueueContext::should_inject_timestamps() const { return false; } - // Don't bother injecting timestamps during queue submission if both AL1 and - // AL2 are disabled. - if (!this->device_context.was_antilag_requested && + // Don't bother injecting timestamps during queue submission if we + // aren't planning on doing anything anyway. + if (!this->device_context.was_capability_requested && !physical_device.instance.layer.is_antilag_1_enabled) { return false; |
