aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/device_context.cc20
-rw-r--r--src/device_context.hh7
-rw-r--r--src/layer.cc4
-rw-r--r--src/queue_context.cc19
-rw-r--r--src/queue_context.hh4
-rw-r--r--src/timestamp_pool.cc2
6 files changed, 36 insertions, 20 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
index 0f606b5..99a4979 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -1,6 +1,7 @@
#include "device_context.hh"
#include "queue_context.hh"
+#include <time.h>
#include <utility>
#include <vulkan/vulkan_core.h>
@@ -36,6 +37,17 @@ DeviceContext::Clock::Clock(const DeviceContext& context) : device(context) {
DeviceContext::Clock::~Clock() {}
+DeviceContext::Clock::time_point_t DeviceContext::Clock::now() {
+
+ auto ts = timespec{};
+ if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
+ throw errno;
+ }
+
+ return time_point_t{std::chrono::seconds{ts.tv_sec} +
+ std::chrono::nanoseconds{ts.tv_nsec}};
+}
+
void DeviceContext::Clock::calibrate() {
const auto infos = std::vector<VkCalibratedTimestampInfoKHR>{
{VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr,
@@ -75,14 +87,10 @@ DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const {
return is_negative ? -signed_abs_diff : signed_abs_diff;
}();
- // This will have issues because std::chrono::steady_clock::now(), which
- // we use for cpu time, may not be on the same time domain what was returned
- // by GetCalibratedTimestamps. It would be more robust to use the posix
- // gettime that vulkan guarantees it can be compared to instead.
-
const auto diff_nsec =
static_cast<std::int64_t>(static_cast<double>(diff) * ns_tick + 0.5);
- const auto delta = std::chrono::nanoseconds(this->host_ns + diff_nsec);
+ const auto delta = std::chrono::nanoseconds(
+ this->host_ns + static_cast<std::uint64_t>(diff_nsec));
return time_point_t{delta};
}
diff --git a/src/device_context.hh b/src/device_context.hh
index 18b89be..c76f376 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -46,6 +46,13 @@ struct DeviceContext final : public Context {
~Clock();
public:
+ // WARNING: This *MUST* be used over std::chrono::steady_clock::now if
+ // you're planning on comparing it to a device's clock. If it isn't, the
+ // timestamps might from different domains and will be completely
+ // nonsensical.
+ static time_point_t now();
+
+ public:
void calibrate();
time_point_t ticks_to_time(const std::uint64_t& ticks) const;
};
diff --git a/src/layer.cc b/src/layer.cc
index 24898a3..b4766eb 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -438,7 +438,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
// more explicit + insurance if that changes.
auto handles = std::vector<std::shared_ptr<TimestampPool::Handle>>{};
- const auto now = std::chrono::steady_clock::now();
+ const auto now = DeviceContext::Clock::now();
std::ranges::transform(
std::span{submit_infos, submit_count}, std::back_inserter(next_submits),
@@ -489,7 +489,7 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
auto next_cbs = std::vector<std::unique_ptr<cbs_t>>{};
auto handles = std::vector<std::shared_ptr<TimestampPool::Handle>>{};
- const auto now = std::chrono::steady_clock::now();
+ const auto now = DeviceContext::Clock::now();
std::ranges::transform(
std::span{submit_infos, submit_count}, std::back_inserter(next_submits),
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 6367e16..69bcb13 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -143,7 +143,7 @@ void QueueContext::drain_submissions_to_frame() {
this->in_flight_frames.emplace_back(
Frame{.submissions = std::move(this->submissions),
- .cpu_post_present_time = std::chrono::steady_clock::now()});
+ .cpu_post_present_time = DeviceContext::Clock::now()});
assert(std::size(this->in_flight_frames.back().submissions));
// *valid but unspecified state after move, so clear!*
this->submissions.clear();
@@ -293,8 +293,9 @@ void QueueContext::drain_frames_to_timings() {
if (const auto T = std::size(this->timings);
T > this->MAX_TRACKED_TIMINGS) {
- const auto dist = T - this->MAX_TRACKED_TIMINGS;
- const auto erase_to_iter = std::next(std::begin(this->timings), dist);
+ const auto erase_to_iter =
+ std::next(std::begin(this->timings),
+ static_cast<long>(T - MAX_TRACKED_TIMINGS));
this->timings.erase(std::begin(this->timings), erase_to_iter);
}
}
@@ -351,7 +352,7 @@ void QueueContext::sleep_in_present() {
// |------------------------x------------------------------|
// ^ first_gpu_work now last_gpu_work ^
- const auto now = std::chrono::steady_clock::now();
+ const auto now = DeviceContext::Clock::now();
const auto dist = now - first_gpu_work;
const auto expected_dist_to_last = expected_gputime - dist;
@@ -375,22 +376,22 @@ void QueueContext::sleep_in_present() {
}
bool QueueContext::should_inject_timestamps() const {
- const auto& pd = this->device_context.physical_device;
+ const auto& physical_device = this->device_context.physical_device;
- if (!pd.supports_required_extensions) {
+ if (!physical_device.supports_required_extensions) {
return false;
}
// Don't bother injecting timestamps during queue submission if both AL1 and
// AL2 are disabled.
if (!this->device_context.was_antilag_requested &&
- !pd.instance.layer.is_antilag_1_enabled) {
+ !physical_device.instance.layer.is_antilag_1_enabled) {
return false;
}
- assert(pd.queue_properties);
- const auto& queue_props = *pd.queue_properties;
+ assert(physical_device.queue_properties);
+ const auto& queue_props = *physical_device.queue_properties;
assert(this->queue_family_index < std::size(queue_props));
const auto& props = queue_props[this->queue_family_index];
diff --git a/src/queue_context.hh b/src/queue_context.hh
index fbb04e8..0e6441e 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -20,7 +20,7 @@ class QueueContext final : public Context {
// The amount of finished frame timing data we keep before eviction.
// For now, this value is also the number of data points used in the
// calculation of gpu timing information.
- static constexpr auto MAX_TRACKED_TIMINGS = 50;
+ static constexpr auto MAX_TRACKED_TIMINGS = 50u;
// The amount of queue submissions we allow tracked per queue before
// we give up tracking them. For a queue that is presented to,
// these submissions will be constantly moved to Frame structs so
@@ -29,7 +29,7 @@ class QueueContext final : public Context {
// amount of vkQueueSubmit's per frame. For queues which don't
// present, this limit stops them from growing limitlessly in memory
// as we may not necessarily manually evict them yet.
- static constexpr auto MAX_TRACKED_SUBMISSIONS = 50;
+ static constexpr auto MAX_TRACKED_SUBMISSIONS = 50u;
public:
DeviceContext& device_context;
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index 4ca1d5a..e02a4fc 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -167,7 +167,7 @@ TimestampPool::Handle::get_time_spinlock(
auto time = this->get_time();
for (; !time.has_value(); time = this->get_time()) {
- if (const auto now = std::chrono::steady_clock::now(); now >= until) {
+ if (const auto now = DeviceContext::Clock::now(); now >= until) {
break;
}
}