aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/queue_context.cc78
-rw-r--r--src/queue_context.hh4
-rw-r--r--src/timestamp_pool.cc45
-rw-r--r--src/timestamp_pool.hh8
4 files changed, 77 insertions, 58 deletions
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 7121898..363149b 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -152,7 +152,6 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
// completely redundant, in all cases it was exactly what we have here. But
// I could be wrong.
- // that wa
const auto start_iter = std::begin(this->submissions);
// no op submit?
if (start_iter == std::end(this->submissions)) {
@@ -164,13 +163,31 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
(*start_iter)->debug += "first_during_present ";
(*last_iter)->debug += "last_during_present ";
- // TODO We probably want to walk through other graphics queues and get
- // the time that they submitted at some stage. Somewhat difficult to do,
- // we probably need to create a graph of dependencies but it seems like
- // overengineering. I will just get this working well before I deal
- // with complicated edge cases.
+ // The last submission is either in flight, already processed, or we
+ // just happen to be the first frame and we can just set it to our start
+ // with little conseuqence.
+ const auto prev_frame_last_submit = [&]() -> auto {
+ if (const auto iter = std::rbegin(this->in_flight_frames);
+ iter != std::rend(this->in_flight_frames)) {
+
+ assert(!iter->submissions.empty());
+ return iter->submissions.back();
+ }
+
+ if (const auto iter = std::rbegin(this->timings);
+ iter != std::rend(this->timings)) {
+
+ const auto& submissions = (*iter)->frame.submissions;
+ assert(!submissions.empty());
+
+ return submissions.back();
+ }
+
+ return *start_iter;
+ }();
this->in_flight_frames.emplace_back(Frame{
+ .prev_frame_last_submit = prev_frame_last_submit,
.submissions = std::move(this->submissions),
.sequence = (*last_iter)->sequence,
});
@@ -240,9 +257,7 @@ void QueueContext::process_frames() {
frame.submissions, std::back_inserter(intervals),
[&, this](const auto& submission) {
const auto get_time = [&, this](const auto& handle) {
- const auto t = handle->get_ticks(*this->timestamp_pool);
- assert(t.has_value()); // guaranteed from seq.
- return clock.ticks_to_time(*t);
+ return handle->get_time();
};
return Interval{
@@ -280,10 +295,15 @@ void QueueContext::process_frames() {
return gputime + (end - start);
});
- const auto start = merged.front().start;
+ // The start should be the previous frame's last submission, NOT when we
+ // start here. Otherwise, we won't account for the time between the
+ // previous frame's last submission and our first submission, which
+ // could genuinely be a large period of time. So look for it in timings,
+ // because it's guaranteed to be there at this stage. Either that, or
+ // we're the first frame, and it doesn't really matter.
+ const auto start = frame.prev_frame_last_submit->end_handle->get_time();
const auto end = merged.back().end;
- const auto not_gputime =
- (merged.back().end - merged.front().start) - gputime;
+ const auto not_gputime = (end - start) - gputime;
auto timing = Timing{
.gputime = gputime,
@@ -304,7 +324,6 @@ void QueueContext::process_frames() {
}
}
-using opt_time_point_t = std::optional<DeviceContext::Clock::time_point_t>;
void QueueContext::sleep_in_present() {
const auto& device = this->device_context;
const auto& vtable = device.vtable;
@@ -350,27 +369,20 @@ void QueueContext::sleep_in_present() {
return;
}
- const auto expected_gputime = [&, this]() {
+ const auto calc_median = [&, this](const auto& getter) {
auto vect = std::vector<Timing*>{};
std::ranges::transform(this->timings, std::back_inserter(vect),
[](const auto& timing) { return timing.get(); });
- std::ranges::sort(vect, [](const auto& a, const auto& b) {
- return a->gputime < b->gputime;
+ std::ranges::sort(vect, [&](const auto& a, const auto& b) {
+ return getter(a) < getter(b);
});
- // return vect[0]->frametime;
- return vect[std::size(vect) / 2]->gputime;
- }();
+ return getter(vect[std::size(vect) / 2]);
+ };
- const auto expected_not_gputime = [&, this]() {
- auto vect = std::vector<Timing*>{};
- std::ranges::transform(this->timings, std::back_inserter(vect),
- [](const auto& timing) { return timing.get(); });
- std::ranges::sort(vect, [](const auto& a, const auto& b) {
- return a->not_gputime < b->not_gputime;
- });
- // return vect[0]->frametime;
- return vect[std::size(vect) / 2]->not_gputime;
- }();
+ const auto expected_gputime =
+ calc_median([](const auto& timing) { return timing->gputime; });
+ const auto expected_not_gputime =
+ calc_median([](const auto& timing) { return timing->not_gputime; });
std::cerr << " expected gputime: ";
debug_log_time(expected_gputime);
@@ -413,13 +425,7 @@ void QueueContext::sleep_in_present() {
// We now know that A is available because its semaphore has been
// signalled.
-
- const auto a = [&, this]() {
- const auto& handle = frame.submissions.front()->start_handle;
- const auto ticks = handle->get_ticks(*this->timestamp_pool);
- assert(ticks.has_value());
- return device_context.clock.ticks_to_time(*ticks);
- }();
+ const auto a = frame.prev_frame_last_submit->end_handle->get_time();
const auto now = std::chrono::steady_clock::now();
const auto dist = now - a;
diff --git a/src/queue_context.hh b/src/queue_context.hh
index ed13465..219e6fb 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -48,10 +48,12 @@ class QueueContext final : public Context {
std::deque<submission_ptr_t> submissions;
// In flight frame submissions grouped together.
- // The first element in the vector refers to the first submission that
+ // The first element in the deque refers to the first submission that
// contributed to that frame. The last element is the last submission before
// present was called.
+ // std::size(submissions) >= 1 btw
struct Frame {
+ submission_ptr_t prev_frame_last_submit;
std::deque<submission_ptr_t> submissions;
std::uint64_t sequence;
};
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index 01afe07..854fae1 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -76,18 +76,19 @@ std::shared_ptr<TimestampPool::Handle> TimestampPool::acquire() {
const auto query_index = *std::begin(indices);
assert(indices.erase(query_index));
- return std::make_shared<Handle>(*not_empty_iter, query_index);
+ return std::make_shared<Handle>(*this, *not_empty_iter, query_index);
}
-TimestampPool::Handle::Handle(const std::shared_ptr<QueryChunk>& origin_chunk,
+TimestampPool::Handle::Handle(const TimestampPool& timestamp_pool,
+ const std::shared_ptr<QueryChunk>& origin_chunk,
const std::uint64_t& query_index)
- : query_pool(origin_chunk->query_pool), query_index(query_index),
- origin_chunk(origin_chunk),
+ : timestamp_pool(timestamp_pool), query_pool(origin_chunk->query_pool),
+ query_index(query_index), origin_chunk(origin_chunk),
command_buffer((*origin_chunk->command_buffers)[query_index]) {}
TimestampPool::Handle::~Handle() {
- // Parent destructing shouldn't mean we should have a bunch of insertions
- // for zero reason.
+ // Parent destructing shouldn't mean we should have a bunch of
+ // insertions for zero reason.
if (const auto ptr = this->origin_chunk.lock(); ptr) {
assert(ptr->free_indices->insert(this->query_index).second);
}
@@ -122,29 +123,35 @@ void TimestampPool::Handle::setup_command_buffers(
vtable.EndCommandBuffer(tail.command_buffer);
}
-std::optional<std::uint64_t>
-TimestampPool::Handle::get_ticks(const TimestampPool& pool) {
-
- const auto& device_context = pool.queue_context.device_context;
- const auto& vtable = device_context.vtable;
+DeviceContext::Clock::time_point_t TimestampPool::Handle::get_time() {
+ const auto& device_ctx = this->timestamp_pool.queue_context.device_context;
+ const auto& vtable = device_ctx.vtable;
+ // For debug builds, we're going to query the availability bit so we can
+ // assert that after the semaphore has flagged it as naturally available.
struct QueryResult {
std::uint64_t value;
+#ifndef NDEBUG
std::uint64_t available;
+#endif
};
auto query_result = QueryResult{};
+ constexpr auto query_flags = []() -> auto {
+ auto flag = VkQueryResultFlags{VK_QUERY_RESULT_64_BIT};
+#ifndef NDEBUG
+ flag |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
+#endif
+ return flag;
+ }();
+
const auto r = vtable.GetQueryPoolResults(
- device_context.device, query_pool, this->query_index, 1,
- sizeof(query_result), &query_result, sizeof(query_result),
- VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
+ device_ctx.device, query_pool, this->query_index, 1,
+ sizeof(query_result), &query_result, sizeof(query_result), query_flags);
- assert(r == VK_SUCCESS || r == VK_NOT_READY);
+ assert(r == VK_SUCCESS && query_result.available);
- if (!query_result.available) {
- return std::nullopt;
- }
- return query_result.value;
+ return device_ctx.clock.ticks_to_time(query_result.value);
}
TimestampPool::~TimestampPool() {
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index 681c9e2..b7aa54e 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -18,6 +18,8 @@
#include <unordered_set>
#include <vector>
+#include "device_context.hh"
+
namespace low_latency {
class QueueContext;
@@ -58,6 +60,7 @@ class TimestampPool final {
friend class TimestampPool;
private:
+ const TimestampPool& timestamp_pool;
const std::weak_ptr<QueryChunk> origin_chunk;
public:
@@ -66,7 +69,8 @@ class TimestampPool final {
const VkCommandBuffer command_buffer;
public:
- Handle(const std::shared_ptr<QueryChunk>& origin_chunk,
+ Handle(const TimestampPool& timestamp_pool,
+ const std::shared_ptr<QueryChunk>& origin_chunk,
const std::uint64_t& query_index);
Handle(const Handle& handle) = delete;
Handle(Handle&&) = delete;
@@ -78,7 +82,7 @@ class TimestampPool final {
void setup_command_buffers(const Handle& tail,
const QueueContext& queue_context) const;
- std::optional<std::uint64_t> get_ticks(const TimestampPool& pool);
+ DeviceContext::Clock::time_point_t get_time();
};
public: