diff options
| author | Nicolas James <nj3ahxac@gmail.com> | 2026-03-12 20:42:37 +1100 |
|---|---|---|
| committer | Nicolas James <nj3ahxac@gmail.com> | 2026-03-12 20:42:37 +1100 |
| commit | 8ea01a571be073be00f8a77150f3d62ef5600b52 (patch) | |
| tree | ab4e262cff0d591e1145e4986f2d635527f63163 | |
| parent | f16c927ab9083ac4d0ce38ec3b62ca8677055b90 (diff) | |
Fix potential clock domain mismatch when using chrono::now()
| -rw-r--r-- | src/device_context.cc | 20 | ||||
| -rw-r--r-- | src/device_context.hh | 7 | ||||
| -rw-r--r-- | src/layer.cc | 4 | ||||
| -rw-r--r-- | src/queue_context.cc | 19 | ||||
| -rw-r--r-- | src/queue_context.hh | 4 | ||||
| -rw-r--r-- | src/timestamp_pool.cc | 2 |
6 files changed, 36 insertions, 20 deletions
diff --git a/src/device_context.cc b/src/device_context.cc index 0f606b5..99a4979 100644 --- a/src/device_context.cc +++ b/src/device_context.cc @@ -1,6 +1,7 @@ #include "device_context.hh" #include "queue_context.hh" +#include <time.h> #include <utility> #include <vulkan/vulkan_core.h> @@ -36,6 +37,17 @@ DeviceContext::Clock::Clock(const DeviceContext& context) : device(context) { DeviceContext::Clock::~Clock() {} +DeviceContext::Clock::time_point_t DeviceContext::Clock::now() { + + auto ts = timespec{}; + if (clock_gettime(CLOCK_MONOTONIC, &ts)) { + throw errno; + } + + return time_point_t{std::chrono::seconds{ts.tv_sec} + + std::chrono::nanoseconds{ts.tv_nsec}}; +} + void DeviceContext::Clock::calibrate() { const auto infos = std::vector<VkCalibratedTimestampInfoKHR>{ {VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, @@ -75,14 +87,10 @@ DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const { return is_negative ? -signed_abs_diff : signed_abs_diff; }(); - // This will have issues because std::chrono::steady_clock::now(), which - // we use for cpu time, may not be on the same time domain what was returned - // by GetCalibratedTimestamps. It would be more robust to use the posix - // gettime that vulkan guarantees it can be compared to instead. - const auto diff_nsec = static_cast<std::int64_t>(static_cast<double>(diff) * ns_tick + 0.5); - const auto delta = std::chrono::nanoseconds(this->host_ns + diff_nsec); + const auto delta = std::chrono::nanoseconds( + this->host_ns + static_cast<std::uint64_t>(diff_nsec)); return time_point_t{delta}; } diff --git a/src/device_context.hh b/src/device_context.hh index 18b89be..c76f376 100644 --- a/src/device_context.hh +++ b/src/device_context.hh @@ -46,6 +46,13 @@ struct DeviceContext final : public Context { ~Clock(); public: + // WARNING: This *MUST* be used over std::chrono::steady_clock::now if + // you're planning on comparing it to a device's clock. If it isn't, the + // timestamps might from different domains and will be completely + // nonsensical. + static time_point_t now(); + + public: void calibrate(); time_point_t ticks_to_time(const std::uint64_t& ticks) const; }; diff --git a/src/layer.cc b/src/layer.cc index 24898a3..b4766eb 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -438,7 +438,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, // more explicit + insurance if that changes. auto handles = std::vector<std::shared_ptr<TimestampPool::Handle>>{}; - const auto now = std::chrono::steady_clock::now(); + const auto now = DeviceContext::Clock::now(); std::ranges::transform( std::span{submit_infos, submit_count}, std::back_inserter(next_submits), @@ -489,7 +489,7 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, auto next_cbs = std::vector<std::unique_ptr<cbs_t>>{}; auto handles = std::vector<std::shared_ptr<TimestampPool::Handle>>{}; - const auto now = std::chrono::steady_clock::now(); + const auto now = DeviceContext::Clock::now(); std::ranges::transform( std::span{submit_infos, submit_count}, std::back_inserter(next_submits), diff --git a/src/queue_context.cc b/src/queue_context.cc index 6367e16..69bcb13 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -143,7 +143,7 @@ void QueueContext::drain_submissions_to_frame() { this->in_flight_frames.emplace_back( Frame{.submissions = std::move(this->submissions), - .cpu_post_present_time = std::chrono::steady_clock::now()}); + .cpu_post_present_time = DeviceContext::Clock::now()}); assert(std::size(this->in_flight_frames.back().submissions)); // *valid but unspecified state after move, so clear!* this->submissions.clear(); @@ -293,8 +293,9 @@ void QueueContext::drain_frames_to_timings() { if (const auto T = std::size(this->timings); T > this->MAX_TRACKED_TIMINGS) { - const auto dist = T - this->MAX_TRACKED_TIMINGS; - const auto erase_to_iter = std::next(std::begin(this->timings), dist); + const auto erase_to_iter = + std::next(std::begin(this->timings), + static_cast<long>(T - MAX_TRACKED_TIMINGS)); this->timings.erase(std::begin(this->timings), erase_to_iter); } } @@ -351,7 +352,7 @@ void QueueContext::sleep_in_present() { // |------------------------x------------------------------| // ^ first_gpu_work now last_gpu_work ^ - const auto now = std::chrono::steady_clock::now(); + const auto now = DeviceContext::Clock::now(); const auto dist = now - first_gpu_work; const auto expected_dist_to_last = expected_gputime - dist; @@ -375,22 +376,22 @@ void QueueContext::sleep_in_present() { } bool QueueContext::should_inject_timestamps() const { - const auto& pd = this->device_context.physical_device; + const auto& physical_device = this->device_context.physical_device; - if (!pd.supports_required_extensions) { + if (!physical_device.supports_required_extensions) { return false; } // Don't bother injecting timestamps during queue submission if both AL1 and // AL2 are disabled. if (!this->device_context.was_antilag_requested && - !pd.instance.layer.is_antilag_1_enabled) { + !physical_device.instance.layer.is_antilag_1_enabled) { return false; } - assert(pd.queue_properties); - const auto& queue_props = *pd.queue_properties; + assert(physical_device.queue_properties); + const auto& queue_props = *physical_device.queue_properties; assert(this->queue_family_index < std::size(queue_props)); const auto& props = queue_props[this->queue_family_index]; diff --git a/src/queue_context.hh b/src/queue_context.hh index fbb04e8..0e6441e 100644 --- a/src/queue_context.hh +++ b/src/queue_context.hh @@ -20,7 +20,7 @@ class QueueContext final : public Context { // The amount of finished frame timing data we keep before eviction. // For now, this value is also the number of data points used in the // calculation of gpu timing information. - static constexpr auto MAX_TRACKED_TIMINGS = 50; + static constexpr auto MAX_TRACKED_TIMINGS = 50u; // The amount of queue submissions we allow tracked per queue before // we give up tracking them. For a queue that is presented to, // these submissions will be constantly moved to Frame structs so @@ -29,7 +29,7 @@ class QueueContext final : public Context { // amount of vkQueueSubmit's per frame. For queues which don't // present, this limit stops them from growing limitlessly in memory // as we may not necessarily manually evict them yet. - static constexpr auto MAX_TRACKED_SUBMISSIONS = 50; + static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u; public: DeviceContext& device_context; diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc index 4ca1d5a..e02a4fc 100644 --- a/src/timestamp_pool.cc +++ b/src/timestamp_pool.cc @@ -167,7 +167,7 @@ TimestampPool::Handle::get_time_spinlock( auto time = this->get_time(); for (; !time.has_value(); time = this->get_time()) { - if (const auto now = std::chrono::steady_clock::now(); now >= until) { + if (const auto now = DeviceContext::Clock::now(); now >= until) { break; } } |
