Skip to content

Commit

Permalink
PerformanceTracker: Use SPSCQueue and atomic to eliminate need for a …
Browse files Browse the repository at this point in the history
…mutex. Clean up some math.
  • Loading branch information
jordan-woyak committed Mar 6, 2025
1 parent afa5285 commit f87093a
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 112 deletions.
3 changes: 3 additions & 0 deletions Source/Core/VideoCommon/PerformanceMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ double PerformanceMetrics::GetLastSpeedDenominator() const

void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)
{
m_vps_counter.UpdateStats();
m_fps_counter.UpdateStats();

const int movable_flag = Config::Get(Config::GFX_MOVABLE_PERFORMANCE_METRICS) ?
ImGuiWindowFlags_None :
ImGuiWindowFlags_NoMove;
Expand Down
162 changes: 68 additions & 94 deletions Source/Core/VideoCommon/PerformanceTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
#include <algorithm>
#include <cmath>
#include <iomanip>
#include <mutex>

#include <implot.h>

#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/MathUtil.h"
#include "Core/Core.h"
#include "VideoCommon/VideoConfig.h"

Expand All @@ -21,14 +21,11 @@ static constexpr u64 MAX_QUALITY_GRAPH_SIZE = 1UL << 8;

PerformanceTracker::PerformanceTracker(const std::optional<std::string> log_name,
const std::optional<DT> sample_window_duration)
: m_on_state_changed_handle{Core::AddOnStateChangedCallback([this](Core::State state) {
if (state == Core::State::Paused)
SetPaused(true);
else if (state == Core::State::Running)
SetPaused(false);
})},
m_log_name{log_name}, m_sample_window_duration{sample_window_duration}
: m_log_name{log_name}, m_sample_window_duration{sample_window_duration}
{
m_on_state_changed_handle =
Core::AddOnStateChangedCallback([this](Core::State state) { m_is_last_time_sane = false; });

Reset();
}

Expand All @@ -39,151 +36,143 @@ PerformanceTracker::~PerformanceTracker()

void PerformanceTracker::Reset()
{
std::unique_lock lock{m_mutex};
m_raw_dts.Clear();
m_dt_queue.clear();

m_dt_total = DT::zero();
m_dt_queue.clear();
m_last_raw_dt = DT::zero();
m_last_time = Clock::now();
m_hz_avg = 0.0;
m_dt_avg = DT::zero();
m_dt_std = std::nullopt;
m_dt_std = DT::zero();
m_is_last_time_sane = false;
}

void PerformanceTracker::Count()
{
std::unique_lock lock{m_mutex};
const TimePoint current_time{Clock::now()};

const DT diff{current_time - m_last_time};
m_last_time = current_time;

if (m_paused)
if (!m_is_last_time_sane)
{
m_is_last_time_sane = true;
return;
}

const DT window{GetSampleWindow()};
m_last_raw_dt = diff;
m_raw_dts.Push(diff);
}

const TimePoint time{Clock::now()};
const DT diff{time - m_last_time};
void PerformanceTracker::UpdateStats()
{
DT diff{};
while (m_raw_dts.Pop(diff))
HandleRawDt(diff);
}

m_last_time = time;
void PerformanceTracker::HandleRawDt(DT diff)
{
if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE)
PopBack();

PushFront(diff);

if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE)
PopBack();
const DT window{GetSampleWindow()};

while (m_dt_total - m_dt_queue.back() >= window)
PopBack();

// Simple Moving Average Throughout the Window
m_dt_avg = m_dt_total / m_dt_queue.size();
const double hz = DT_s(1.0) / m_dt_avg;
const DT dt_avg = m_dt_total / m_dt_queue.size();
const double hz = DT_s(1.0) / dt_avg;
m_dt_avg = dt_avg;

// Exponential Moving Average
const DT_s rc = SAMPLE_RC_RATIO * std::min(window, m_dt_total);
const double a = 1.0 - std::exp(-(DT_s(diff) / rc));

// Sometimes euler averages can break when the average is inf/nan
if (std::isfinite(m_hz_avg))
m_hz_avg += a * (hz - m_hz_avg);
const auto hz_avg = m_hz_avg.load();
if (std::isfinite(hz_avg))
m_hz_avg = hz_avg + a * (hz - hz_avg);
else
m_hz_avg = hz;

m_dt_std = std::nullopt;
// Update Std Dev
MathUtil::RunningVariance<double> variance;
for (auto dt : m_dt_queue)
variance.Push(DT_s(dt).count());
m_dt_std = std::chrono::duration_cast<DT>(DT_s(variance.PopulationStandardDeviation()));

LogRenderTimeToFile(diff);
}

DT PerformanceTracker::GetSampleWindow() const
{
// This reads a constant value and thus does not need a mutex
return m_sample_window_duration.value_or(
duration_cast<DT>(DT_us{std::max(1, g_ActiveConfig.iPerfSampleUSec)}));
}

double PerformanceTracker::GetHzAvg() const
{
std::shared_lock lock{m_mutex};
return m_hz_avg;
}

DT PerformanceTracker::GetDtAvg() const
{
std::shared_lock lock{m_mutex};
return m_dt_avg;
}

DT PerformanceTracker::GetDtStd() const
{
std::unique_lock lock{m_mutex};

if (m_dt_std)
return *m_dt_std;

if (m_dt_queue.empty())
return *(m_dt_std = DT::zero());

double total = 0.0;
for (auto dt : m_dt_queue)
{
double diff = DT_s(dt - m_dt_avg).count();
total += diff * diff;
}

// This is a weighted standard deviation
return *(m_dt_std = std::chrono::duration_cast<DT>(DT_s(std::sqrt(total / m_dt_queue.size()))));
return m_dt_std;
}

DT PerformanceTracker::GetLastRawDt() const
{
std::shared_lock lock{m_mutex};

if (m_dt_queue.empty())
return DT::zero();

return m_dt_queue.front();
return m_last_raw_dt;
}

void PerformanceTracker::ImPlotPlotLines(const char* label) const
{
static std::array<float, MAX_DT_QUEUE_SIZE + 2> x, y;

std::shared_lock lock{m_mutex};
// "quality" graph uses twice as many points.
static_assert(MAX_QUALITY_GRAPH_SIZE * 2 <= MAX_DT_QUEUE_SIZE);
static std::array<float, MAX_DT_QUEUE_SIZE + 1> x, y;

if (m_dt_queue.empty())
return;

// Decides if there are too many points to plot using rectangles
const bool quality = m_dt_queue.size() < MAX_QUALITY_GRAPH_SIZE;

const DT update_time = Clock::now() - m_last_time;
const float predicted_frame_time = DT_ms(std::max(update_time, m_dt_queue.front())).count();

std::size_t points = 0;
if (quality)
{
x[points] = 0.f;
y[points] = predicted_frame_time;
++points;
}

x[points] = DT_ms(update_time).count();
y[points] = predicted_frame_time;
++points;

for (auto dt : m_dt_queue)
{
const float frame_time_ms = DT_ms(dt).count();
std::size_t point_index = 0;
const auto add_point = [&](DT dt, DT shift_x, float prev_ms) {
const float ms = DT_ms{dt}.count();

if (quality)
{
x[points] = x[points - 1];
y[points] = frame_time_ms;
++points;
x[point_index] = prev_ms;
y[point_index] = ms;
++point_index;
}

x[points] = x[points - 1] + frame_time_ms;
y[points] = frame_time_ms;
++points;
}
x[point_index] = prev_ms + DT_ms{shift_x}.count();
y[point_index] = ms;
++point_index;
};

ImPlot::PlotLine(label, x.data(), y.data(), static_cast<int>(points));
// Rightmost point.
const auto update_time = Clock::now() - m_last_time;
const auto predicted_frame_time = std::max(update_time, m_dt_queue.front());
add_point(predicted_frame_time, DT{}, 0);

// Other points, right to left.
for (auto dt : m_dt_queue)
add_point(dt, dt, x[point_index - 1]);

ImPlot::PlotLine(label, x.data(), y.data(), static_cast<int>(point_index));
}

void PerformanceTracker::PushFront(DT value)
Expand Down Expand Up @@ -211,18 +200,3 @@ void PerformanceTracker::LogRenderTimeToFile(DT val)

m_bench_file << std::fixed << std::setprecision(8) << DT_ms(val).count() << std::endl;
}

void PerformanceTracker::SetPaused(bool paused)
{
std::unique_lock lock{m_mutex};

m_paused = paused;
if (m_paused)
{
m_last_time = TimePoint::max();
}
else
{
m_last_time = Clock::now();
}
}
38 changes: 20 additions & 18 deletions Source/Core/VideoCommon/PerformanceTracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@

#pragma once

#include <atomic>
#include <deque>
#include <fstream>
#include <optional>
#include <shared_mutex>

#include "Common/CommonTypes.h"
#include "Common/SPSCQueue.h"

class PerformanceTracker
{
Expand All @@ -22,30 +23,30 @@ class PerformanceTracker
PerformanceTracker(PerformanceTracker&&) = delete;
PerformanceTracker& operator=(PerformanceTracker&&) = delete;

// Functions for recording performance information
void Reset();

// Calls must come from the same thread.
// UpdateStats is expected to be called regularly to empty the SPSC queue.
void UpdateStats();
void ImPlotPlotLines(const char* label) const;

// May call from any thread, but not concurrently, not that you'd want to..
void Count();

// Functions for reading performance information
// May call from any thread.
DT GetSampleWindow() const;

double GetHzAvg() const;

DT GetDtAvg() const;
DT GetDtStd() const;

DT GetLastRawDt() const;

void ImPlotPlotLines(const char* label) const;

private:
void LogRenderTimeToFile(DT val);
void SetPaused(bool paused);

void HandleRawDt(DT value);
void PushFront(DT value);
void PopBack();

bool m_paused = false;
int m_on_state_changed_handle;

// Name of log file and file stream
Expand All @@ -54,6 +55,12 @@ class PerformanceTracker

// Last time Count() was called
TimePoint m_last_time;
std::atomic<bool> m_is_last_time_sane = false;

// Push'd from Count()
// and Pop'd from UpdateStats()
Common::SPSCQueue<DT, false> m_raw_dts;
std::atomic<DT> m_last_raw_dt = DT::zero();

// Amount of time to sample dt's over (defaults to config)
const std::optional<DT> m_sample_window_duration;
Expand All @@ -63,12 +70,7 @@ class PerformanceTracker
std::deque<DT> m_dt_queue;

// Average rate/time throughout the window
DT m_dt_avg = DT::zero(); // Uses Moving Average
double m_hz_avg = 0.0; // Uses Moving Average + Euler Average

// Used to initialize this on demand instead of on every Count()
mutable std::optional<DT> m_dt_std = std::nullopt;

// Used to enable thread safety with the performance tracker
mutable std::shared_mutex m_mutex;
std::atomic<DT> m_dt_avg = DT::zero(); // Uses Moving Average
std::atomic<double> m_hz_avg = 0.0; // Uses Moving Average + Euler Average
std::atomic<DT> m_dt_std = DT::zero();
};

0 comments on commit f87093a

Please sign in to comment.