aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/layer.cc11
-rw-r--r--src/physical_device_context.cc41
-rw-r--r--src/physical_device_context.hh5
-rw-r--r--src/queue_context.cc37
-rw-r--r--src/queue_context.hh3
5 files changed, 81 insertions, 16 deletions
diff --git a/src/layer.cc b/src/layer.cc
index d09f7be..6dfff9b 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -92,6 +92,7 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
INSTANCE_VTABLE_LOAD(GetInstanceProcAddr);
INSTANCE_VTABLE_LOAD(CreateDevice);
INSTANCE_VTABLE_LOAD(EnumerateDeviceExtensionProperties);
+ INSTANCE_VTABLE_LOAD(GetPhysicalDeviceQueueFamilyProperties2);
#undef INSTANCE_VTABLE_LOAD
const auto lock = std::scoped_lock{layer_context.mutex};
@@ -384,7 +385,7 @@ static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2(
if (!queue || !*queue) {
return;
}
-
+
const auto key = layer_context.get_key(*queue);
const auto lock = std::scoped_lock{layer_context.mutex};
const auto [it, inserted] = layer_context.contexts.try_emplace(key);
@@ -443,6 +444,10 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
if (!submit_count) { // no-op submit we shouldn't worry about
return vtable.QueueSubmit(queue, submit_count, submit_infos, fence);
}
+
+ if (!queue_context->should_inject_timestamps()) {
+ return vtable.QueueSubmit(queue, submit_count, submit_infos, fence);
+ }
// What's happening here?
// We are making a very modest modification to all vkQueueSubmits where we
@@ -527,6 +532,10 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
}
+ if (!queue_context->should_inject_timestamps()) {
+ return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
+ }
+
using cbs_t = std::vector<VkCommandBufferSubmitInfo>;
auto next_submits = std::vector<VkSubmitInfo2>{};
auto next_cbs = std::vector<std::unique_ptr<cbs_t>>{};
diff --git a/src/physical_device_context.cc b/src/physical_device_context.cc
index d265c9d..2d1afc3 100644
--- a/src/physical_device_context.cc
+++ b/src/physical_device_context.cc
@@ -1,17 +1,44 @@
#include "physical_device_context.hh"
+#include <vulkan/vulkan_core.h>
namespace low_latency {
-
-PhysicalDeviceContext::PhysicalDeviceContext(
- InstanceContext& instance_context, const VkPhysicalDevice& physical_device)
- : instance(instance_context), physical_device(physical_device) {
+
+static std::unique_ptr<VkPhysicalDeviceProperties>
+make_pd_props(const InstanceContext& instance_context,
+ const VkPhysicalDevice& physical_device) {
+ const auto& vtable = instance_context.vtable;
auto props = VkPhysicalDeviceProperties{};
- instance.vtable.GetPhysicalDeviceProperties(this->physical_device, &props);
- this->properties =
- std::make_unique<VkPhysicalDeviceProperties>(std::move(props));
+ vtable.GetPhysicalDeviceProperties(physical_device, &props);
+ return std::make_unique<VkPhysicalDeviceProperties>(std::move(props));
+}
+
+static std::unique_ptr<PhysicalDeviceContext::queue_properties_t>
+make_qf_props(const InstanceContext& instance_context,
+ const VkPhysicalDevice& physical_device) {
+
+ const auto& vtable = instance_context.vtable;
+
+ auto count = std::uint32_t{};
+ vtable.GetPhysicalDeviceQueueFamilyProperties2(physical_device, &count,
+ nullptr);
+
+ auto result = std::vector<VkQueueFamilyProperties2>(
+ count, VkQueueFamilyProperties2{
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2});
+ vtable.GetPhysicalDeviceQueueFamilyProperties2(physical_device, &count,
+ std::data(result));
+
+ using qp_t = PhysicalDeviceContext::queue_properties_t;
+ return std::make_unique<qp_t>(std::move(result));
}
+PhysicalDeviceContext::PhysicalDeviceContext(
+ InstanceContext& instance_context, const VkPhysicalDevice& physical_device)
+ : instance(instance_context), physical_device(physical_device),
+ properties(make_pd_props(instance, physical_device)),
+ queue_properties(make_qf_props(instance, physical_device)) {}
+
PhysicalDeviceContext::~PhysicalDeviceContext() {}
} // namespace low_latency \ No newline at end of file
diff --git a/src/physical_device_context.hh b/src/physical_device_context.hh
index 8eb4a1a..35809ff 100644
--- a/src/physical_device_context.hh
+++ b/src/physical_device_context.hh
@@ -15,7 +15,10 @@ class PhysicalDeviceContext final : public Context {
const VkPhysicalDevice physical_device;
- std::unique_ptr<VkPhysicalDeviceProperties> properties;
+ const std::unique_ptr<VkPhysicalDeviceProperties> properties;
+
+ using queue_properties_t = std::vector<VkQueueFamilyProperties2>;
+ const std::unique_ptr<queue_properties_t> queue_properties;
public:
PhysicalDeviceContext(InstanceContext& instance_context,
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 6968720..d20cc79 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -4,6 +4,7 @@
#include <algorithm>
#include <chrono>
+#include <fstream>
#include <iostream>
#include <ranges>
#include <span>
@@ -172,12 +173,16 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
this->submissions.clear();
}
-const auto debug_log_time = [](const auto& diff) {
+const auto debug_log_time2 = [](auto& stream, const auto& diff) {
using namespace std::chrono;
const auto ms = duration_cast<milliseconds>(diff);
const auto us = duration_cast<microseconds>(diff - ms);
const auto ns = duration_cast<nanoseconds>(diff - ms - us);
- std::cerr << ms << " " << us << " " << ns << " ago\n";
+ stream << ms << " " << us << " " << ns << " ago\n";
+};
+
+const auto debug_log_time = [](const auto& diff) {
+ debug_log_time2(std::cerr, diff);
};
void QueueContext::process_frames() {
@@ -361,11 +366,6 @@ void QueueContext::sleep_in_present() {
const auto expected_cputime =
calc_median([](const auto& timing) { return timing->cputime; });
- std::cerr << " expected gputime: ";
- debug_log_time(expected_gputime);
- std::cerr << " expected cputime: ";
- debug_log_time(expected_cputime);
-
// Should look like this:
// total_length = expected_gputime
// |------------------------x------------------------------|
@@ -382,6 +382,29 @@ void QueueContext::sleep_in_present() {
last_gpu_work->get_time_spinlock(now + wait_time);
frame.cpu_post_present_time = std::chrono::steady_clock::now();
+
+ std::ofstream f("/tmp/times.txt", std::ios::trunc);
+ f << " expected gputime: ";
+ debug_log_time2(f, expected_gputime);
+ f << " expected cputime: ";
+ debug_log_time2(f, expected_cputime);
+ f << " requestd sleep: ";
+ debug_log_time2(f, wait_time);
+ f << " observed sleep: ";
+ debug_log_time2(f, frame.cpu_post_present_time - now);
+}
+
+bool QueueContext::should_inject_timestamps() const {
+ const auto& pd = this->device_context.physical_device;
+
+ assert(pd.queue_properties);
+ const auto& queue_props = *pd.queue_properties;
+ assert(this->queue_family_index < std::size(queue_props));
+
+ const auto& props = queue_props[this->queue_family_index];
+ // Probably need at least 64, don't worry about it just yet and just ensure
+ // it's not zero (because that will cause a crash if we inject).
+ return props.queueFamilyProperties.timestampValidBits;
}
} // namespace low_latency \ No newline at end of file
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 0c5e51f..67b9c5d 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -89,6 +89,9 @@ class QueueContext final : public Context {
public:
void sleep_in_present();
+
+ public:
+ bool should_inject_timestamps() const;
};
}; // namespace low_latency