diff options
| -rw-r--r-- | src/device_context.cc | 8 | ||||
| -rw-r--r-- | src/layer.cc | 8 | ||||
| -rw-r--r-- | src/queue_context.cc | 37 | ||||
| -rw-r--r-- | src/queue_context.hh | 6 |
4 files changed, 31 insertions, 28 deletions
diff --git a/src/device_context.cc b/src/device_context.cc index b8d3482..67af3aa 100644 --- a/src/device_context.cc +++ b/src/device_context.cc @@ -53,14 +53,6 @@ void DeviceContext::Clock::calibrate() { }; auto calibrated_result = CalibratedResult{}; - // we probably want to use this instead bc clock_gettime isn't guaranteed - // by steady clock afaik - /* - struct timespec tv; - clock_gettime(CLOCK_MONOTONIC, &tv); - return tv.tv_nsec + tv.tv_sec*1000000000ull; - */ - const auto steady_before = std::chrono::steady_clock::now(); device.vtable.GetCalibratedTimestampsKHR(device.device, 2, std::data(infos), &calibrated_result.device, diff --git a/src/layer.cc b/src/layer.cc index 24ee72e..e2777b6 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -600,14 +600,6 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { queue_context->notify_present(*present_info); } - const auto debug_log_time = [](const auto& diff) { - using namespace std::chrono; - const auto ms = duration_cast<milliseconds>(diff); - const auto us = duration_cast<microseconds>(diff - ms); - const auto ns = duration_cast<nanoseconds>(diff - ms - us); - std::cerr << ms << " " << us << " " << ns << "\n"; - }; - queue_context->sleep_in_present(); return VK_SUCCESS; diff --git a/src/queue_context.cc b/src/queue_context.cc index 342f393..4c8f776 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -319,14 +319,23 @@ void QueueContext::process_frames() { const auto frametime = *b - *a; + const auto cpu_time = + [&]() -> DeviceContext::Clock::time_point_t::duration { + const auto latest_iter = std::rbegin(this->timings); + if (latest_iter == std::rend(this->timings)) { + return DeviceContext::Clock::time_point_t::duration::zero(); + } + return *a - (*latest_iter)->gpu_end; + }(); + std::cerr << " calculated total time from last frame (frametime): "; debug_log_time(*b - *a); this->timings.emplace_back(std::make_unique<Timing>( - Timing{.gpu_start = *a, - .gpu_end = *b, + Timing{.gpu_end = *b, .gpu_time = frametime, + .cpu_time = cpu_time, .frame = std::move(this->in_flight_frames.front())})); this->in_flight_frames.pop_front(); } @@ -393,12 +402,25 @@ void QueueContext::sleep_in_present() { // return vect[0]->frametime; return vect[std::size(vect) / 2]->gpu_time; }(); + + const auto expected_cputime = [&, this]() { + auto vect = std::vector<Timing*>{}; + std::ranges::transform(this->timings, std::back_inserter(vect), + [](const auto& timing) { return timing.get(); }); + std::ranges::sort(vect, [](const auto& a, const auto& b) { + return a->gpu_time < b->gpu_time; + }); + // return vect[0]->frametime; + return vect[std::size(vect) / 2]->gpu_time; + }(); std::cerr << " expected gputime: "; debug_log_time(expected_gputime); + std::cerr << " expected cputime: "; + debug_log_time(expected_cputime); - // PRESENT CALL - // |-------------------------|---------------------------| - // ^a ^swap_acquire ^b + // PRESENT CALL + // |--------------|-------------------|----------------| + // a swap_acquire b c // // Us, the CPU on the host, is approximately at 'b'. // We have a good guess for the distance between @@ -432,7 +454,7 @@ void QueueContext::sleep_in_present() { const auto now = std::chrono::steady_clock::now(); const auto dist = now - a; - const auto expected = expected_gputime - dist; + const auto expected = expected_gputime - dist - expected_cputime; const auto swi = VkSemaphoreWaitInfo{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, @@ -440,7 +462,8 @@ void QueueContext::sleep_in_present() { .pSemaphores = &this->semaphore, .pValues = &frame->end.sequence, }; - vtable.WaitSemaphoresKHR(device.device, &swi, std::max(expected.count(), 0l)); + vtable.WaitSemaphoresKHR(device.device, &swi, + std::max(expected.count(), 0l)); /* diff --git a/src/queue_context.hh b/src/queue_context.hh index 356de33..172683d 100644 --- a/src/queue_context.hh +++ b/src/queue_context.hh @@ -22,8 +22,6 @@ class QueueContext final : public Context { const VkQueue queue; const std::uint32_t queue_family_index; - // I used to use these to signal when we could read timestamps until - // I realised you could use hostQueryReset. std::uint64_t semaphore_sequence = 0; VkSemaphore semaphore; @@ -64,11 +62,9 @@ class QueueContext final : public Context { std::deque<std::unique_ptr<Frame>> in_flight_frames; struct Timing { - DeviceContext::Clock::time_point_t gpu_start; DeviceContext::Clock::time_point_t gpu_end; - DeviceContext::Clock::time_point_t::duration cpu_time; - DeviceContext::Clock::time_point_t::duration gpu_time; + DeviceContext::Clock::time_point_t::duration gpu_time, cpu_time; std::unique_ptr<Frame> frame; }; |
