aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/device_context.cc8
-rw-r--r--src/layer.cc8
-rw-r--r--src/queue_context.cc37
-rw-r--r--src/queue_context.hh6
4 files changed, 31 insertions, 28 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
index b8d3482..67af3aa 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -53,14 +53,6 @@ void DeviceContext::Clock::calibrate() {
};
auto calibrated_result = CalibratedResult{};
- // we probably want to use this instead bc clock_gettime isn't guaranteed
- // by steady clock afaik
- /*
- struct timespec tv;
- clock_gettime(CLOCK_MONOTONIC, &tv);
- return tv.tv_nsec + tv.tv_sec*1000000000ull;
- */
-
const auto steady_before = std::chrono::steady_clock::now();
device.vtable.GetCalibratedTimestampsKHR(device.device, 2, std::data(infos),
&calibrated_result.device,
diff --git a/src/layer.cc b/src/layer.cc
index 24ee72e..e2777b6 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -600,14 +600,6 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
queue_context->notify_present(*present_info);
}
- const auto debug_log_time = [](const auto& diff) {
- using namespace std::chrono;
- const auto ms = duration_cast<milliseconds>(diff);
- const auto us = duration_cast<microseconds>(diff - ms);
- const auto ns = duration_cast<nanoseconds>(diff - ms - us);
- std::cerr << ms << " " << us << " " << ns << "\n";
- };
-
queue_context->sleep_in_present();
return VK_SUCCESS;
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 342f393..4c8f776 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -319,14 +319,23 @@ void QueueContext::process_frames() {
const auto frametime = *b - *a;
+ const auto cpu_time =
+ [&]() -> DeviceContext::Clock::time_point_t::duration {
+ const auto latest_iter = std::rbegin(this->timings);
+ if (latest_iter == std::rend(this->timings)) {
+ return DeviceContext::Clock::time_point_t::duration::zero();
+ }
+ return *a - (*latest_iter)->gpu_end;
+ }();
+
std::cerr
<< " calculated total time from last frame (frametime): ";
debug_log_time(*b - *a);
this->timings.emplace_back(std::make_unique<Timing>(
- Timing{.gpu_start = *a,
- .gpu_end = *b,
+ Timing{.gpu_end = *b,
.gpu_time = frametime,
+ .cpu_time = cpu_time,
.frame = std::move(this->in_flight_frames.front())}));
this->in_flight_frames.pop_front();
}
@@ -393,12 +402,25 @@ void QueueContext::sleep_in_present() {
// return vect[0]->frametime;
return vect[std::size(vect) / 2]->gpu_time;
}();
+
+ const auto expected_cputime = [&, this]() {
+ auto vect = std::vector<Timing*>{};
+ std::ranges::transform(this->timings, std::back_inserter(vect),
+ [](const auto& timing) { return timing.get(); });
+ std::ranges::sort(vect, [](const auto& a, const auto& b) {
+ return a->gpu_time < b->gpu_time;
+ });
+ // return vect[0]->frametime;
+ return vect[std::size(vect) / 2]->gpu_time;
+ }();
std::cerr << " expected gputime: ";
debug_log_time(expected_gputime);
+ std::cerr << " expected cputime: ";
+ debug_log_time(expected_cputime);
- // PRESENT CALL
- // |-------------------------|---------------------------|
- // ^a ^swap_acquire ^b
+ // PRESENT CALL
+ // |--------------|-------------------|----------------|
+ // a swap_acquire b c
//
// Us, the CPU on the host, is approximately at 'b'.
// We have a good guess for the distance between
@@ -432,7 +454,7 @@ void QueueContext::sleep_in_present() {
const auto now = std::chrono::steady_clock::now();
const auto dist = now - a;
- const auto expected = expected_gputime - dist;
+ const auto expected = expected_gputime - dist - expected_cputime;
const auto swi = VkSemaphoreWaitInfo{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
@@ -440,7 +462,8 @@ void QueueContext::sleep_in_present() {
.pSemaphores = &this->semaphore,
.pValues = &frame->end.sequence,
};
- vtable.WaitSemaphoresKHR(device.device, &swi, std::max(expected.count(), 0l));
+ vtable.WaitSemaphoresKHR(device.device, &swi,
+ std::max(expected.count(), 0l));
/*
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 356de33..172683d 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -22,8 +22,6 @@ class QueueContext final : public Context {
const VkQueue queue;
const std::uint32_t queue_family_index;
- // I used to use these to signal when we could read timestamps until
- // I realised you could use hostQueryReset.
std::uint64_t semaphore_sequence = 0;
VkSemaphore semaphore;
@@ -64,11 +62,9 @@ class QueueContext final : public Context {
std::deque<std::unique_ptr<Frame>> in_flight_frames;
struct Timing {
- DeviceContext::Clock::time_point_t gpu_start;
DeviceContext::Clock::time_point_t gpu_end;
- DeviceContext::Clock::time_point_t::duration cpu_time;
- DeviceContext::Clock::time_point_t::duration gpu_time;
+ DeviceContext::Clock::time_point_t::duration gpu_time, cpu_time;
std::unique_ptr<Frame> frame;
};