diff options
Diffstat (limited to 'src/queue_context.cc')
| -rw-r--r-- | src/queue_context.cc | 296 |
1 files changed, 290 insertions, 6 deletions
diff --git a/src/queue_context.cc b/src/queue_context.cc index 930b0c5..9b46773 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -2,6 +2,9 @@ #include "device_context.hh" #include "timestamp_pool.hh" +#include <chrono> +#include <iostream> + namespace low_latency { static VkCommandPool @@ -51,12 +54,9 @@ QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue, timestamp_pool(std::make_unique<TimestampPool>(*this)) {} QueueContext::~QueueContext() { - - // nuke our handles, so we avoid segfaults for now - this->handle_hack.clear(); - - // Ugly - destructors of timestamp_pool should be called before we destroy - // our vulkan objects. + + this->in_flight_frames.clear(); + this->submissions.clear(); this->timestamp_pool.reset(); const auto& vtable = this->device_context.vtable; @@ -66,4 +66,288 @@ QueueContext::~QueueContext() { nullptr); } +void QueueContext::notify_submit( + std::span<const VkSubmitInfo> infos, + const std::uint64_t target_semaphore_sequence, + std::shared_ptr<TimestampPool::Handle>&& handle) { + + // This has an issue where we're collecting all signals and waits and + // treating a single submit call as finishing + + auto signals = std::unordered_set<VkSemaphore>{}; + auto waits = std::unordered_set<VkSemaphore>{}; + for (const auto& info : infos) { + std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount, + std::inserter(waits, std::end(waits))); + std::ranges::copy_n(info.pSignalSemaphores, info.signalSemaphoreCount, + std::inserter(signals, std::end(signals))); + } + + this->submissions.emplace_back(std::make_unique<Submission>( + std::move(signals), std::move(waits), target_semaphore_sequence, + std::move(handle))); + + // TODO HACK + if (std::size(this->submissions) > 100) { + this->submissions.pop_front(); + } +} + +void QueueContext::notify_submit( + std::span<const VkSubmitInfo2> infos, + const std::uint64_t target_semaphore_sequence, + std::shared_ptr<TimestampPool::Handle>&& handle) { + + auto signals = std::unordered_set<VkSemaphore>{}; + auto waits = std::unordered_set<VkSemaphore>{}; + for (const auto& info : infos) { + constexpr auto get_semaphore = [](const auto& semaphore_info) { + return semaphore_info.semaphore; + }; + std::ranges::transform(info.pSignalSemaphoreInfos, + std::next(info.pSignalSemaphoreInfos, + info.signalSemaphoreInfoCount), + std::inserter(signals, std::end(signals)), + get_semaphore); + std::ranges::transform( + info.pWaitSemaphoreInfos, + std::next(info.pWaitSemaphoreInfos, info.waitSemaphoreInfoCount), + std::inserter(waits, std::end(waits)), get_semaphore); + } + + this->submissions.emplace_back(std::make_unique<Submission>( + std::move(signals), std::move(waits), target_semaphore_sequence, + std::move(handle))); + + // TODO HACK + if (std::size(this->submissions) > 100) { + this->submissions.pop_front(); + } +} + +void QueueContext::notify_present(const VkPresentInfoKHR& info) { + + auto frame = [&]() -> std::unique_ptr<Frame> { + const auto waits = [&]() { + auto waits = std::unordered_set<VkSemaphore>{}; + std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount, + std::inserter(waits, std::end(waits))); + return waits; + }(); + + const auto wait_semaphores = std::unordered_set<VkSemaphore>{ + info.pWaitSemaphores, + std::next(info.pWaitSemaphores, info.waitSemaphoreCount)}; + + auto collected_semaphores = std::unordered_set<VkSemaphore>{}; + for (auto i = std::uint32_t{0}; i < info.swapchainCount; ++i) { + const auto& swapchain = info.pSwapchains[i]; + const auto& index = info.pImageIndices[i]; + + // Shouldn't be possible to present to a swapchain that wasn't + // waited in + + const auto& signals = this->device_context.swapchain_signals; + const auto swapchain_it = signals.find(swapchain); + assert(swapchain_it != std::end(signals)); + const auto index_it = swapchain_it->second.find(index); + assert(index_it != std::end(swapchain_it->second)); + + const auto semaphore = index_it->second; + collected_semaphores.emplace(index_it->second); + } + + const auto start_submission_it = std::ranges::find_if( + std::rbegin(this->submissions), std::rend(this->submissions), + [&](const auto& submission) { + return std::ranges::any_of( + submission->waits, [&](const auto& wait) { + return collected_semaphores.contains(wait); + }); + }); + + if (start_submission_it == std::rend(this->submissions)) { + std::cout << "couldn't find starting submission!\n"; + return nullptr; + } + const auto& start_submission = *start_submission_it; + + const auto end_submission_it = std::ranges::find_if( + std::rbegin(this->submissions), std::rend(this->submissions), + [&](const auto& submission) { + return std::ranges::any_of( + submission->signals, [&](const auto& signal) { + return wait_semaphores.contains(signal); + }); + }); + + if (end_submission_it == std::rend(this->submissions)) { + std::cout << "couldn't find ending submission!\n"; + return nullptr; + } + const auto& end_submission = *end_submission_it; + + return std::make_unique<Frame>(Frame{ + .start_context = *this, + .start = start_submission->timestamp_handle, + .target_start_sequence = + start_submission->target_semaphore_sequence, + .end_context = *this, + .end = start_submission->timestamp_handle, + .target_end_sequence = start_submission->target_semaphore_sequence, + }); + }(); + + this->in_flight_frames.emplace_back(std::move(frame)); + + // hack + if (this->in_flight_frames.size() > 5) { + this->in_flight_frames.pop_front(); + } +} + +// now it's all coming together +std::optional<QueueContext::duration_t> QueueContext::get_delay_time() { + if (!std::size(this->in_flight_frames)) { + return std::nullopt; + } + + auto seq = std::uint64_t{}; + this->device_context.vtable.GetSemaphoreCounterValueKHR( + this->device_context.device, this->semaphore, &seq); + + // Get semaphore first, then poll! + this->timestamp_pool->poll(); + + // idk how frequently we should call this. + this->device_context.calibrate_timestamps(); + + static auto gpu_frametimes = std::deque<uint64_t>{}; + static auto cpu_frametimes = std::deque<uint64_t>{}; + + const auto S = std::size(this->in_flight_frames); + + std::cout << "\nSTART FRAME READOUT\n"; + std::cout << "error bound: " << this->device_context.clock.error_bound + << '\n'; + std::cout << "num frames in flight: " << S << '\n'; + std::cout << "from oldest -> newest\n"; + + // const auto b_seq = semaphore_from_context(*this); + const auto now = std::chrono::steady_clock::now(); + + auto i = std::size_t{0}; + for (; i < std::size(this->in_flight_frames); ++i) { + const auto& frame = this->in_flight_frames[i]; + std::cout << " Evaluating the frame that's " << S - i - 1 + << " behind\n"; + if (!frame) { + std::cout << " nullptr!\n"; + continue; + } + + std::cout << " target start: " << frame->target_start_sequence << '\n'; + std::cout << " target end: " << frame->target_end_sequence << '\n'; + if (seq < frame->target_start_sequence) { + std::cout << " frame hasn't started yet!\n"; + continue; + } + + const auto start_ticks = + frame->start_context.timestamp_pool->get_polled(*frame->start); + std::cout << " START TICKS: " << start_ticks << '\n'; + const auto& a_clock = frame->start_context.device_context.clock; + const auto a = a_clock.ticks_to_time(start_ticks); + + { + using namespace std::chrono; + const auto diff = now - a; + const auto ms = duration_cast<milliseconds>(diff); + const auto us = duration_cast<microseconds>(diff - ms); + const auto ns = duration_cast<nanoseconds>(diff - ms - us); + std::cout << " frame started: " << ms << " ms " << us + << " us " << ns << " ns ago\n"; + } + + if (seq < frame->target_end_sequence) { + std::cout << " frame hasn't ended yet!\n"; + continue; + } + + + const auto end_ticks = + frame->end_context.timestamp_pool->get_polled(*frame->end, true); + const auto& b_clock = frame->end_context.device_context.clock; + std::cout << " END_TICKS: " << end_ticks << '\n'; + const auto b = b_clock.ticks_to_time(end_ticks); + { + using namespace std::chrono; + if (now <= b) { + std::cout << "b happened before now?\n"; + } + const auto diff = now - b; + const auto ms = duration_cast<milliseconds>(diff); + const auto us = duration_cast<microseconds>(diff - ms); + const auto ns = duration_cast<nanoseconds>(diff - ms - us); + std::cout << " frame ended: " << ms << " ms " << us + << " us " << ns << " ns ago\n"; + } + + const auto gpu_time = b - a; + { + using namespace std::chrono; + const auto diff = gpu_time; + const auto ms = duration_cast<milliseconds>(diff); + const auto us = duration_cast<microseconds>(diff - ms); + const auto ns = duration_cast<nanoseconds>(diff - ms - us); + std::cout << " gpu_time: " << ms << " ms " << us + << " us " << ns << " ns ago\n"; + } + + /* + cpu_frametimes.emplace_back(cpu_time); + gpu_frametimes.emplace_back(gpu_time); + */ + } + + /* + if (remove_index.has_value()) { + this->in_flight_frames.erase(std::begin(this->in_flight_frames), + std::begin(this->in_flight_frames) + + *remove_index); + } + */ + + /* + auto g_copy = gpu_frametimes; + auto c_copy = cpu_frametimes; + std::ranges::sort(g_copy); + std::ranges::sort(c_copy); + + constexpr auto N = 49; + if (std::size(cpu_frametimes) < N) { + return std::nullopt; + } + + const auto F = std::size(g_copy); + // close enough to median lol + const auto g = g_copy[F / 2]; + const auto c = c_copy[F / 2]; + + std::cout << g << '\n'; + + std::cout << " median gpu: " << (g / 1'000'000) << " ms " << g / 1'000 + << " us " << g << " ns\n"; + std::cout << " median cpu: " << c / 1'000'000 << " ms " << c / 1'000 + << " us " << c << " ns\n"; + + if (F > N) { + gpu_frametimes.pop_front(); + cpu_frametimes.pop_front(); + } + */ + + return std::nullopt; +} + } // namespace low_latency
\ No newline at end of file |
