aboutsummaryrefslogtreecommitdiff
path: root/src/queue_context.cc
diff options
context:
space:
mode:
authorNicolas James <Eele1Ephe7uZahRie@tutanota.com>2026-02-14 14:05:26 +1100
committerNicolas James <Eele1Ephe7uZahRie@tutanota.com>2026-02-14 14:05:26 +1100
commit0deb469d5a7c9a16179139dcff74a54aac1791a0 (patch)
tree2f791a72f0441a30321332f5ecea5865f357e2bb /src/queue_context.cc
parent8f4501215c0dbbbde59da2d015fdec3dbe5131bc (diff)
commit wip
Diffstat (limited to 'src/queue_context.cc')
-rw-r--r--src/queue_context.cc429
1 files changed, 267 insertions, 162 deletions
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 9b46773..99cf51e 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -67,25 +67,20 @@ QueueContext::~QueueContext() {
}
void QueueContext::notify_submit(
- std::span<const VkSubmitInfo> infos,
- const std::uint64_t target_semaphore_sequence,
- std::shared_ptr<TimestampPool::Handle>&& handle) {
-
- // This has an issue where we're collecting all signals and waits and
- // treating a single submit call as finishing
+ const VkSubmitInfo& info, const std::uint64_t& target_semaphore_sequence,
+ const std::shared_ptr<TimestampPool::Handle> head_handle,
+ const std::shared_ptr<TimestampPool::Handle> tail_handle) {
auto signals = std::unordered_set<VkSemaphore>{};
auto waits = std::unordered_set<VkSemaphore>{};
- for (const auto& info : infos) {
- std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
- std::inserter(waits, std::end(waits)));
- std::ranges::copy_n(info.pSignalSemaphores, info.signalSemaphoreCount,
- std::inserter(signals, std::end(signals)));
- }
+ std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
+ std::inserter(waits, std::end(waits)));
+ std::ranges::copy_n(info.pSignalSemaphores, info.signalSemaphoreCount,
+ std::inserter(signals, std::end(signals)));
this->submissions.emplace_back(std::make_unique<Submission>(
std::move(signals), std::move(waits), target_semaphore_sequence,
- std::move(handle)));
+ head_handle, tail_handle));
// TODO HACK
if (std::size(this->submissions) > 100) {
@@ -93,6 +88,7 @@ void QueueContext::notify_submit(
}
}
+/*
void QueueContext::notify_submit(
std::span<const VkSubmitInfo2> infos,
const std::uint64_t target_semaphore_sequence,
@@ -100,6 +96,7 @@ void QueueContext::notify_submit(
auto signals = std::unordered_set<VkSemaphore>{};
auto waits = std::unordered_set<VkSemaphore>{};
+
for (const auto& info : infos) {
constexpr auto get_semaphore = [](const auto& semaphore_info) {
return semaphore_info.semaphore;
@@ -124,21 +121,18 @@ void QueueContext::notify_submit(
this->submissions.pop_front();
}
}
+*/
void QueueContext::notify_present(const VkPresentInfoKHR& info) {
- auto frame = [&]() -> std::unique_ptr<Frame> {
- const auto waits = [&]() {
- auto waits = std::unordered_set<VkSemaphore>{};
- std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
- std::inserter(waits, std::end(waits)));
- return waits;
- }();
-
- const auto wait_semaphores = std::unordered_set<VkSemaphore>{
- info.pWaitSemaphores,
- std::next(info.pWaitSemaphores, info.waitSemaphoreCount)};
+ const auto waits = [&]() {
+ auto waits = std::unordered_set<VkSemaphore>{};
+ std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
+ std::inserter(waits, std::end(waits)));
+ return waits;
+ }();
+ const auto collected_semaphores = [&info, this]() {
auto collected_semaphores = std::unordered_set<VkSemaphore>{};
for (auto i = std::uint32_t{0}; i < info.swapchainCount; ++i) {
const auto& swapchain = info.pSwapchains[i];
@@ -153,112 +147,147 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
const auto index_it = swapchain_it->second.find(index);
assert(index_it != std::end(swapchain_it->second));
- const auto semaphore = index_it->second;
+ const auto& semaphore = index_it->second;
collected_semaphores.emplace(index_it->second);
}
+ return collected_semaphores;
+ }();
- const auto start_submission_it = std::ranges::find_if(
- std::rbegin(this->submissions), std::rend(this->submissions),
- [&](const auto& submission) {
- return std::ranges::any_of(
- submission->waits, [&](const auto& wait) {
- return collected_semaphores.contains(wait);
- });
- });
-
- if (start_submission_it == std::rend(this->submissions)) {
- std::cout << "couldn't find starting submission!\n";
- return nullptr;
- }
- const auto& start_submission = *start_submission_it;
-
- const auto end_submission_it = std::ranges::find_if(
- std::rbegin(this->submissions), std::rend(this->submissions),
- [&](const auto& submission) {
- return std::ranges::any_of(
- submission->signals, [&](const auto& signal) {
- return wait_semaphores.contains(signal);
- });
- });
-
- if (end_submission_it == std::rend(this->submissions)) {
- std::cout << "couldn't find ending submission!\n";
- return nullptr;
- }
- const auto& end_submission = *end_submission_it;
-
- return std::make_unique<Frame>(Frame{
- .start_context = *this,
- .start = start_submission->timestamp_handle,
- .target_start_sequence =
- start_submission->target_semaphore_sequence,
- .end_context = *this,
- .end = start_submission->timestamp_handle,
- .target_end_sequence = start_submission->target_semaphore_sequence,
+ const auto start_iter = std::ranges::find_if(
+ std::rbegin(this->submissions), std::rend(this->submissions),
+ [&](const auto& submission) {
+ return std::ranges::any_of(
+ submission->waits, [&](const auto& wait) {
+ return collected_semaphores.contains(wait);
+ });
});
- }();
- this->in_flight_frames.emplace_back(std::move(frame));
-
+ if (start_iter == std::rend(this->submissions)) {
+ std::cout << "couldn't find starting submission!\n";
+ return;
+ }
+ const auto& start = *start_iter;
+
+ const auto end_iter = std::ranges::find_if(
+ std::rbegin(this->submissions), std::rend(this->submissions),
+ [&](const auto& submission) {
+ return std::ranges::any_of(
+ submission->signals,
+ [&](const auto& signal) { return waits.contains(signal); });
+ });
+
+ if (end_iter == std::rend(this->submissions)) {
+ std::cout << "couldn't find ending submission!\n";
+ return;
+ }
+ const auto& end = *end_iter;
+
+ auto frame = Frame{.start =
+ Frame::Timepoint{
+ .context = *this,
+ .handle = start->start_handle,
+ .sequence = start->sequence,
+ },
+ .end = Frame::Timepoint{
+ .context = *this,
+ .handle = end->end_handle,
+ .sequence = end->sequence,
+ }};
+ this->in_flight_frames.emplace_back(
+ std::make_unique<Frame>(std::move(frame)));
+
// hack
if (this->in_flight_frames.size() > 5) {
this->in_flight_frames.pop_front();
}
}
-// now it's all coming together
std::optional<QueueContext::duration_t> QueueContext::get_delay_time() {
if (!std::size(this->in_flight_frames)) {
return std::nullopt;
}
- auto seq = std::uint64_t{};
- this->device_context.vtable.GetSemaphoreCounterValueKHR(
- this->device_context.device, this->semaphore, &seq);
-
- // Get semaphore first, then poll!
- this->timestamp_pool->poll();
+ // We are about to query the wait semaphores of all of our current
+ // frames in flight. They may come from the same device, so we're going
+ // to build a mapping here to reduce vulkan calls. Not only that,
+ // we have to do this or else our timing information becomes broken
+ // as this loop iterates.
+ const auto target_devices = [this]() -> auto {
+ using context_ref_t = std::reference_wrapper<DeviceContext>;
+ auto target_devices = std::unordered_map<VkDevice, context_ref_t>{};
+ for (const auto& frame : this->in_flight_frames) {
+ auto& start = frame->start.context.device_context;
+ auto& end = frame->end.context.device_context;
+
+ target_devices.try_emplace(start.device, std::ref(start));
+ target_devices.try_emplace(end.device, std::ref(end));
+ }
+ return target_devices;
+ }();
- // idk how frequently we should call this.
- this->device_context.calibrate_timestamps();
+ // Calibrate timestamps before we acquire semaphores.
+ for (const auto& pair : target_devices) {
+ auto& device = pair.second;
+ device_context.clock.calibrate();
+ }
- static auto gpu_frametimes = std::deque<uint64_t>{};
- static auto cpu_frametimes = std::deque<uint64_t>{};
+ // Now we have all owned devices and their clocks are in a good state.
+ // We need to build another mapping of semaphores to their queries now.
+ const auto queue_sequences = [this]() -> auto {
+ auto queue_sequences = std::unordered_map<VkQueue, std::uint64_t>{};
+ for (const auto& frame : this->in_flight_frames) {
+ auto& start = frame->start.context;
+ auto& end = frame->end.context;
+
+ for (const auto& queue_ptr : {&start, &end}) {
+ if (queue_sequences.contains(queue_ptr->queue)) {
+ continue;
+ }
+
+ const auto& vtable = queue_ptr->device_context.vtable;
+ auto seq = std::uint64_t{};
+ vtable.GetSemaphoreCounterValueKHR(this->device_context.device,
+ this->semaphore, &seq);
+ queue_sequences.emplace(queue_ptr->queue, seq);
+ }
+ }
+ return queue_sequences;
+ }();
+ // Now all devices we are about to query are primed to query.
+ // We have all sequence numbers from all queus we could possibly query.
const auto S = std::size(this->in_flight_frames);
+ for (auto i = std::size_t{0}; i < S; ++i) {
+ assert(this->in_flight_frames[i]);
+ const auto& frame = *this->in_flight_frames[i];
+ const auto& start = frame.start;
+ const auto& end = frame.end;
- std::cout << "\nSTART FRAME READOUT\n";
- std::cout << "error bound: " << this->device_context.clock.error_bound
- << '\n';
- std::cout << "num frames in flight: " << S << '\n';
- std::cout << "from oldest -> newest\n";
-
- // const auto b_seq = semaphore_from_context(*this);
- const auto now = std::chrono::steady_clock::now();
-
- auto i = std::size_t{0};
- for (; i < std::size(this->in_flight_frames); ++i) {
- const auto& frame = this->in_flight_frames[i];
std::cout << " Evaluating the frame that's " << S - i - 1
<< " behind\n";
- if (!frame) {
- std::cout << " nullptr!\n";
+
+ std::cout << " target start seq: " << start.sequence << '\n';
+ std::cout << " target end seq: " << end.sequence << '\n';
+
+ const auto start_seq_it = queue_sequences.find(start.context.queue);
+ assert(start_seq_it != std::end(queue_sequences));
+ const auto& start_seq = start_seq_it->second;
+ if (start_seq < start.sequence) {
+ std::cout << " frame hasn't started yet !\n ";
continue;
}
- std::cout << " target start: " << frame->target_start_sequence << '\n';
- std::cout << " target end: " << frame->target_end_sequence << '\n';
- if (seq < frame->target_start_sequence) {
- std::cout << " frame hasn't started yet!\n";
- continue;
+ /*
+ const auto start_ticks_opt =
+ start.handle->get_ticks(*start.context.timestamp_pool);
+ if (!start_ticks_opt.has_value()) {
+ std::cout << " frame hasn't started yet !\n ";
}
- const auto start_ticks =
- frame->start_context.timestamp_pool->get_polled(*frame->start);
std::cout << " START TICKS: " << start_ticks << '\n';
- const auto& a_clock = frame->start_context.device_context.clock;
- const auto a = a_clock.ticks_to_time(start_ticks);
-
+ const auto start_time =
+ start.context.device_context.clock.ticks_to_time(start_ticks);
+
{
using namespace std::chrono;
const auto diff = now - a;
@@ -269,85 +298,161 @@ std::optional<QueueContext::duration_t> QueueContext::get_delay_time() {
<< " us " << ns << " ns ago\n";
}
- if (seq < frame->target_end_sequence) {
- std::cout << " frame hasn't ended yet!\n";
+ const auto end_seq_it = queue_sequences.find(end.context.queue);
+ assert(end_seq_it != std::end(queue_sequences));
+ const auto& end_seq = end_seq_it->second;
+ if (start_seq < end.sequence) {
+ std::cout << " frame hasn't started yet !\n ";
continue;
}
+ */
+ }
+ return std::nullopt;
+ //
+}
- const auto end_ticks =
- frame->end_context.timestamp_pool->get_polled(*frame->end, true);
- const auto& b_clock = frame->end_context.device_context.clock;
- std::cout << " END_TICKS: " << end_ticks << '\n';
- const auto b = b_clock.ticks_to_time(end_ticks);
- {
- using namespace std::chrono;
- if (now <= b) {
- std::cout << "b happened before now?\n";
- }
- const auto diff = now - b;
- const auto ms = duration_cast<milliseconds>(diff);
- const auto us = duration_cast<microseconds>(diff - ms);
- const auto ns = duration_cast<nanoseconds>(diff - ms - us);
- std::cout << " frame ended: " << ms << " ms " << us
- << " us " << ns << " ns ago\n";
- }
+// now it's all coming together
+// std::optional<QueueContext::duration_t> QueueContext::get_delay_time() {
+/*
+if (!std::size(this->in_flight_frames)) {
+ return std::nullopt;
+}
- const auto gpu_time = b - a;
- {
- using namespace std::chrono;
- const auto diff = gpu_time;
- const auto ms = duration_cast<milliseconds>(diff);
- const auto us = duration_cast<microseconds>(diff - ms);
- const auto ns = duration_cast<nanoseconds>(diff - ms - us);
- std::cout << " gpu_time: " << ms << " ms " << us
- << " us " << ns << " ns ago\n";
- }
+auto seq = std::uint64_t{};
+this->device_context.vtable.GetSemaphoreCounterValueKHR(
+ this->device_context.device, this->semaphore, &seq);
- /*
- cpu_frametimes.emplace_back(cpu_time);
- gpu_frametimes.emplace_back(gpu_time);
- */
- }
+// Get semaphore first, then poll!
+this->timestamp_pool->poll();
- /*
- if (remove_index.has_value()) {
- this->in_flight_frames.erase(std::begin(this->in_flight_frames),
- std::begin(this->in_flight_frames) +
- *remove_index);
+// idk how frequently we should call this.
+this->device_context.calibrate_timestamps();
+
+static auto gpu_frametimes = std::deque<uint64_t>{};
+static auto cpu_frametimes = std::deque<uint64_t>{};
+
+const auto S = std::size(this->in_flight_frames);
+
+std::cout << "\nSTART FRAME READOUT\n";
+std::cout << "error bound: " << this->device_context.clock.error_bound
+ << '\n';
+std::cout << "num frames in flight: " << S << '\n';
+std::cout << "from oldest -> newest\n";
+
+// const auto b_seq = semaphore_from_context(*this);
+const auto now = std::chrono::steady_clock::now();
+
+auto i = std::size_t{0};
+for (; i < std::size(this->in_flight_frames); ++i) {
+ const auto& frame = this->in_flight_frames[i];
+ std::cout << " Evaluating the frame that's " << S - i - 1
+ << " behind\n";
+ if (!frame) {
+ std::cout << " nullptr!\n";
+ continue;
}
- */
- /*
- auto g_copy = gpu_frametimes;
- auto c_copy = cpu_frametimes;
- std::ranges::sort(g_copy);
- std::ranges::sort(c_copy);
+ std::cout << " target start: " << frame->target_start_sequence <<
+'\n'; std::cout << " target end: " << frame->target_end_sequence << '\n'; if
+(seq < frame->target_start_sequence) { std::cout << " frame hasn't
+started yet!\n"; continue;
+ }
- constexpr auto N = 49;
- if (std::size(cpu_frametimes) < N) {
- return std::nullopt;
+ const auto start_ticks =
+ frame->start_context.timestamp_pool->get_polled(*frame->start);
+ std::cout << " START TICKS: " << start_ticks << '\n';
+ const auto& a_clock = frame->start_context.device_context.clock;
+ const auto a = a_clock.ticks_to_time(start_ticks);
+
+ {
+ using namespace std::chrono;
+ const auto diff = now - a;
+ const auto ms = duration_cast<milliseconds>(diff);
+ const auto us = duration_cast<microseconds>(diff - ms);
+ const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+ std::cout << " frame started: " << ms << " ms " << us
+ << " us " << ns << " ns ago\n";
}
- const auto F = std::size(g_copy);
- // close enough to median lol
- const auto g = g_copy[F / 2];
- const auto c = c_copy[F / 2];
+ if (seq < frame->target_end_sequence) {
+ std::cout << " frame hasn't ended yet!\n";
+ continue;
+ }
- std::cout << g << '\n';
- std::cout << " median gpu: " << (g / 1'000'000) << " ms " << g / 1'000
- << " us " << g << " ns\n";
- std::cout << " median cpu: " << c / 1'000'000 << " ms " << c / 1'000
- << " us " << c << " ns\n";
+ const auto end_ticks =
+ frame->end_context.timestamp_pool->get_polled(*frame->end, true);
+ const auto& b_clock = frame->end_context.device_context.clock;
+ std::cout << " END_TICKS: " << end_ticks << '\n';
+ const auto b = b_clock.ticks_to_time(end_ticks);
+ {
+ using namespace std::chrono;
+ if (now <= b) {
+ std::cout << "b happened before now?\n";
+ }
+ const auto diff = now - b;
+ const auto ms = duration_cast<milliseconds>(diff);
+ const auto us = duration_cast<microseconds>(diff - ms);
+ const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+ std::cout << " frame ended: " << ms << " ms " << us
+ << " us " << ns << " ns ago\n";
+ }
- if (F > N) {
- gpu_frametimes.pop_front();
- cpu_frametimes.pop_front();
+ const auto gpu_time = b - a;
+ {
+ using namespace std::chrono;
+ const auto diff = gpu_time;
+ const auto ms = duration_cast<milliseconds>(diff);
+ const auto us = duration_cast<microseconds>(diff - ms);
+ const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+ std::cout << " gpu_time: " << ms << " ms " << us
+ << " us " << ns << " ns ago\n";
}
- */
+ /*
+ cpu_frametimes.emplace_back(cpu_time);
+ gpu_frametimes.emplace_back(gpu_time);
+}
+
+/*
+if (remove_index.has_value()) {
+ this->in_flight_frames.erase(std::begin(this->in_flight_frames),
+ std::begin(this->in_flight_frames) +
+ *remove_index);
+}
+*/
+
+/*
+auto g_copy = gpu_frametimes;
+auto c_copy = cpu_frametimes;
+std::ranges::sort(g_copy);
+std::ranges::sort(c_copy);
+
+constexpr auto N = 49;
+if (std::size(cpu_frametimes) < N) {
return std::nullopt;
}
+const auto F = std::size(g_copy);
+// close enough to median lol
+const auto g = g_copy[F / 2];
+const auto c = c_copy[F / 2];
+
+std::cout << g << '\n';
+
+std::cout << " median gpu: " << (g / 1'000'000) << " ms " << g / 1'000
+ << " us " << g << " ns\n";
+std::cout << " median cpu: " << c / 1'000'000 << " ms " << c / 1'000
+ << " us " << c << " ns\n";
+
+if (F > N) {
+ gpu_frametimes.pop_front();
+ cpu_frametimes.pop_front();
+}
+
+return std::nullopt;
+}
+*/
+
} // namespace low_latency \ No newline at end of file