From 7f3439714858d4c70f60db71543df15db5708d92 Mon Sep 17 00:00:00 2001 From: Nicolas James Date: Fri, 20 Feb 2026 20:57:37 +1100 Subject: Don't inject timestamps into unsupported queues --- src/layer.cc | 11 ++++++++++- src/physical_device_context.cc | 41 ++++++++++++++++++++++++++++++++++------- src/physical_device_context.hh | 5 ++++- src/queue_context.cc | 37 ++++++++++++++++++++++++++++++------- src/queue_context.hh | 3 +++ 5 files changed, 81 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/layer.cc b/src/layer.cc index d09f7be..6dfff9b 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -92,6 +92,7 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, INSTANCE_VTABLE_LOAD(GetInstanceProcAddr); INSTANCE_VTABLE_LOAD(CreateDevice); INSTANCE_VTABLE_LOAD(EnumerateDeviceExtensionProperties); + INSTANCE_VTABLE_LOAD(GetPhysicalDeviceQueueFamilyProperties2); #undef INSTANCE_VTABLE_LOAD const auto lock = std::scoped_lock{layer_context.mutex}; @@ -384,7 +385,7 @@ static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2( if (!queue || !*queue) { return; } - + const auto key = layer_context.get_key(*queue); const auto lock = std::scoped_lock{layer_context.mutex}; const auto [it, inserted] = layer_context.contexts.try_emplace(key); @@ -443,6 +444,10 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, if (!submit_count) { // no-op submit we shouldn't worry about return vtable.QueueSubmit(queue, submit_count, submit_infos, fence); } + + if (!queue_context->should_inject_timestamps()) { + return vtable.QueueSubmit(queue, submit_count, submit_infos, fence); + } // What's happening here? // We are making a very modest modification to all vkQueueSubmits where we @@ -527,6 +532,10 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence); } + if (!queue_context->should_inject_timestamps()) { + return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence); + } + using cbs_t = std::vector; auto next_submits = std::vector{}; auto next_cbs = std::vector>{}; diff --git a/src/physical_device_context.cc b/src/physical_device_context.cc index d265c9d..2d1afc3 100644 --- a/src/physical_device_context.cc +++ b/src/physical_device_context.cc @@ -1,17 +1,44 @@ #include "physical_device_context.hh" +#include namespace low_latency { - -PhysicalDeviceContext::PhysicalDeviceContext( - InstanceContext& instance_context, const VkPhysicalDevice& physical_device) - : instance(instance_context), physical_device(physical_device) { + +static std::unique_ptr +make_pd_props(const InstanceContext& instance_context, + const VkPhysicalDevice& physical_device) { + const auto& vtable = instance_context.vtable; auto props = VkPhysicalDeviceProperties{}; - instance.vtable.GetPhysicalDeviceProperties(this->physical_device, &props); - this->properties = - std::make_unique(std::move(props)); + vtable.GetPhysicalDeviceProperties(physical_device, &props); + return std::make_unique(std::move(props)); +} + +static std::unique_ptr +make_qf_props(const InstanceContext& instance_context, + const VkPhysicalDevice& physical_device) { + + const auto& vtable = instance_context.vtable; + + auto count = std::uint32_t{}; + vtable.GetPhysicalDeviceQueueFamilyProperties2(physical_device, &count, + nullptr); + + auto result = std::vector( + count, VkQueueFamilyProperties2{ + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2}); + vtable.GetPhysicalDeviceQueueFamilyProperties2(physical_device, &count, + std::data(result)); + + using qp_t = PhysicalDeviceContext::queue_properties_t; + return std::make_unique(std::move(result)); } +PhysicalDeviceContext::PhysicalDeviceContext( + InstanceContext& instance_context, const VkPhysicalDevice& physical_device) + : instance(instance_context), physical_device(physical_device), + properties(make_pd_props(instance, physical_device)), + queue_properties(make_qf_props(instance, physical_device)) {} + PhysicalDeviceContext::~PhysicalDeviceContext() {} } // namespace low_latency \ No newline at end of file diff --git a/src/physical_device_context.hh b/src/physical_device_context.hh index 8eb4a1a..35809ff 100644 --- a/src/physical_device_context.hh +++ b/src/physical_device_context.hh @@ -15,7 +15,10 @@ class PhysicalDeviceContext final : public Context { const VkPhysicalDevice physical_device; - std::unique_ptr properties; + const std::unique_ptr properties; + + using queue_properties_t = std::vector; + const std::unique_ptr queue_properties; public: PhysicalDeviceContext(InstanceContext& instance_context, diff --git a/src/queue_context.cc b/src/queue_context.cc index 6968720..d20cc79 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -172,12 +173,16 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) { this->submissions.clear(); } -const auto debug_log_time = [](const auto& diff) { +const auto debug_log_time2 = [](auto& stream, const auto& diff) { using namespace std::chrono; const auto ms = duration_cast(diff); const auto us = duration_cast(diff - ms); const auto ns = duration_cast(diff - ms - us); - std::cerr << ms << " " << us << " " << ns << " ago\n"; + stream << ms << " " << us << " " << ns << " ago\n"; +}; + +const auto debug_log_time = [](const auto& diff) { + debug_log_time2(std::cerr, diff); }; void QueueContext::process_frames() { @@ -361,11 +366,6 @@ void QueueContext::sleep_in_present() { const auto expected_cputime = calc_median([](const auto& timing) { return timing->cputime; }); - std::cerr << " expected gputime: "; - debug_log_time(expected_gputime); - std::cerr << " expected cputime: "; - debug_log_time(expected_cputime); - // Should look like this: // total_length = expected_gputime // |------------------------x------------------------------| @@ -382,6 +382,29 @@ void QueueContext::sleep_in_present() { last_gpu_work->get_time_spinlock(now + wait_time); frame.cpu_post_present_time = std::chrono::steady_clock::now(); + + std::ofstream f("/tmp/times.txt", std::ios::trunc); + f << " expected gputime: "; + debug_log_time2(f, expected_gputime); + f << " expected cputime: "; + debug_log_time2(f, expected_cputime); + f << " requestd sleep: "; + debug_log_time2(f, wait_time); + f << " observed sleep: "; + debug_log_time2(f, frame.cpu_post_present_time - now); +} + +bool QueueContext::should_inject_timestamps() const { + const auto& pd = this->device_context.physical_device; + + assert(pd.queue_properties); + const auto& queue_props = *pd.queue_properties; + assert(this->queue_family_index < std::size(queue_props)); + + const auto& props = queue_props[this->queue_family_index]; + // Probably need at least 64, don't worry about it just yet and just ensure + // it's not zero (because that will cause a crash if we inject). + return props.queueFamilyProperties.timestampValidBits; } } // namespace low_latency \ No newline at end of file diff --git a/src/queue_context.hh b/src/queue_context.hh index 0c5e51f..67b9c5d 100644 --- a/src/queue_context.hh +++ b/src/queue_context.hh @@ -89,6 +89,9 @@ class QueueContext final : public Context { public: void sleep_in_present(); + + public: + bool should_inject_timestamps() const; }; }; // namespace low_latency -- cgit v1.2.3