diff --git a/README.md b/README.md index e1507d715f..7183104ea9 100644 --- a/README.md +++ b/README.md @@ -417,6 +417,7 @@ Options starting with "gl_*" are for OpenGL. - `gl_size_query = viewport` : Specify what to use for getting display size. Options are "viewport", "scissorbox" or disabled. Defaults to using glXQueryDrawable. - `gl_bind_framebuffer = 0..N` : (Re)bind given framebuffer before MangoHud gets drawn. Helps with Crusader Kings III. - `gl_dont_flip = 1` : Don't swap origin if using GL_UPPER_LEFT. Helps with Ryujinx. +- `libdrm_sampling` : Use libdrm_amdgpu to calculate GPU utilization. Helps with some problematic Vega GPUs. ## MangoHud FPS logging diff --git a/data/MangoHud.conf b/data/MangoHud.conf index b4eed108fe..c053669801 100644 --- a/data/MangoHud.conf +++ b/data/MangoHud.conf @@ -215,6 +215,9 @@ frame_timing ### Don't swap origin if using GL_UPPER_LEFT. Helps with Ryujinx # gl_dont_flip=1 +### Use libdrm_amdgpu to calculate GPU utilization. Helps with some problematic Vega GPUs. +# libdrm_sampling + ################ INTERACTION ################# ### Change toggle keybinds for the hud & logging diff --git a/meson.build b/meson.build index e87bff9784..2b18b96f97 100644 --- a/meson.build +++ b/meson.build @@ -88,10 +88,14 @@ if is_unixy dep_wayland_client = dependency('wayland-client', required: get_option('with_wayland'), version : '>=1.11') dbus_dep = dependency('dbus-1', required: get_option('with_dbus')).partial_dependency(compile_args : true, includes : true) + libdrm_dep = dependency('libdrm') + libdrm_amdgpu_dep = dependency('libdrm_amdgpu') else dep_x11 = null_dep dep_wayland_client = null_dep dbus_dep = null_dep + libdrm_dep = null_dep + libdrm_amdgpu_dep = null_dep endif if dep_x11.found() diff --git a/src/amdgpu_libdrm.cpp b/src/amdgpu_libdrm.cpp new file mode 100644 index 0000000000..bb6b551a15 --- /dev/null +++ b/src/amdgpu_libdrm.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "gpu.h" + +#include "amdgpu_libdrm.h" + +#define LIBDRM_MAX_DEVICES 32 + +#define LIBDRM_SAMPLE_DELAY 3500 +#define LIBDRM_SAMPLE_BUF_SIZE 512 + +#define LIBDRM_GRBM_STATUS 0x8010 + +enum LIBDRM_GRBM_BITS { + LIBDRM_GRBM_BUSY_BIT = 1U << 31 +}; + +struct libdrm_sample { + bool busy_bit; +}; + +struct libdrm_stats { + int busy; +}; + +std::string dri_device_path; +bool do_libdrm_sampling = false; + +std::deque sample_buf(LIBDRM_SAMPLE_BUF_SIZE, {0}); +std::mutex sample_buf_m; + +amdgpu_device_handle amdgpu_handle; + +static void libdrm_do_sample(libdrm_sample *sample) { + uint32_t registers; + amdgpu_read_mm_registers(amdgpu_handle, LIBDRM_GRBM_STATUS / 4, 1, 0xffffffff, 0, ®isters); + + if (registers & LIBDRM_GRBM_BUSY_BIT) sample->busy_bit = true; +} + +static void libdrm_thread() { + while (true) { + auto start_time = std::chrono::system_clock::now().time_since_epoch(); + + struct libdrm_sample sample {0}; + libdrm_do_sample(&sample); + + sample_buf_m.lock(); + sample_buf.pop_front(); + sample_buf.push_back(sample); + sample_buf_m.unlock(); + + auto end_time = std::chrono::system_clock::now().time_since_epoch(); + auto sleep_duration = std::chrono::microseconds(LIBDRM_SAMPLE_DELAY) - (end_time - start_time); + if (sleep_duration > std::chrono::nanoseconds(0)) { + std::this_thread::sleep_for(sleep_duration); + } + } +} + +static int libdrm_initialize() { + drmDevicePtr devices[LIBDRM_MAX_DEVICES]; + int device_count = drmGetDevices2(0, devices, LIBDRM_MAX_DEVICES); + if (device_count < 0) { + SPDLOG_ERROR("drmGetDevices2 failed"); + return -1; + } + + char *renderd_node = nullptr; + for (int i = 0; i < device_count; i++) { + constexpr int required_nodes = (1 << DRM_NODE_PRIMARY) | (1 << DRM_NODE_RENDER); + if ((devices[i]->available_nodes & required_nodes) != required_nodes) { + continue; + } + + if (devices[i]->nodes[DRM_NODE_PRIMARY] == dri_device_path) { + renderd_node = devices[i]->nodes[DRM_NODE_RENDER]; + break; + } + } + + if (renderd_node == nullptr) { + SPDLOG_ERROR("No renderD node found for '{}'", dri_device_path); + drmFreeDevices(devices, device_count); + return -1; + } + + int fd = open(renderd_node, O_RDWR); + drmFreeDevices(devices, device_count); + if (fd < 0) { + SPDLOG_ERROR("renderD node open failed: '{}'", dri_device_path); + return -1; + } + + uint32_t libdrm_minor, libdrm_major; + if (amdgpu_device_initialize(fd, &libdrm_major, &libdrm_minor, &amdgpu_handle)) { + SPDLOG_ERROR("amdgpu_device_initialize failed"); + close(fd); + return -1; + } + + close(fd); // amdgpu_device_initialize should F_DUPFD it internally, so there is no need to keep fd open + return 0; +} + +void libdrm_get_info() { + static bool init = false; + if (!init) { + if (libdrm_initialize()) { + do_libdrm_sampling = false; + SPDLOG_ERROR("Could not initialize libdrm"); + return; + } + std::thread(libdrm_thread).detach(); + init = true; + SPDLOG_INFO("Initialized libdrm sampling"); + } + + struct libdrm_stats stats {0}; + + sample_buf_m.lock(); + for (auto sample : sample_buf) { + stats.busy += sample.busy_bit ? 1 : 0; // the ternary is probably not needed + } + sample_buf_m.unlock(); + + gpu_info.load = (int)(((double)stats.busy / LIBDRM_SAMPLE_BUF_SIZE) * 100); +} diff --git a/src/amdgpu_libdrm.h b/src/amdgpu_libdrm.h new file mode 100644 index 0000000000..aad4bf855a --- /dev/null +++ b/src/amdgpu_libdrm.h @@ -0,0 +1,6 @@ +#pragma once + +extern std::string dri_device_path; +extern bool do_libdrm_sampling; + +void libdrm_get_info(); diff --git a/src/amdgpu.cpp b/src/amdgpu_metrics.cpp similarity index 99% rename from src/amdgpu.cpp rename to src/amdgpu_metrics.cpp index 865f773110..2d2a5c519c 100644 --- a/src/amdgpu.cpp +++ b/src/amdgpu_metrics.cpp @@ -1,7 +1,7 @@ #include #include #include -#include "amdgpu.h" +#include "amdgpu_metrics.h" #include "gpu.h" #include "cpu.h" #include "overlay.h" @@ -172,7 +172,6 @@ void amdgpu_get_metrics(){ amdgpu_common_metrics_m.lock(); gpu_info.load = amdgpu_common_metrics.gpu_load_percent; - gpu_info.powerUsage = amdgpu_common_metrics.average_gfx_power_w; gpu_info.CoreClock = amdgpu_common_metrics.current_gfxclk_mhz; gpu_info.MemClock = amdgpu_common_metrics.current_uclk_mhz; diff --git a/src/amdgpu.h b/src/amdgpu_metrics.h similarity index 100% rename from src/amdgpu.h rename to src/amdgpu_metrics.h diff --git a/src/gpu.cpp b/src/gpu.cpp index 549f6fdf45..957ce70d41 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -7,11 +7,12 @@ #include #include "nvctrl.h" #include "timing.hpp" +#include "amdgpu_libdrm.h" #ifdef HAVE_NVML #include "nvidia_info.h" #endif -#include "amdgpu.h" +#include "amdgpu_metrics.h" using namespace std::chrono_literals; @@ -74,7 +75,7 @@ nvapi_util(); void getAmdGpuInfo(){ int64_t value = 0; if (metrics_path.empty()){ - if (amdgpu.busy) { + if (!do_libdrm_sampling && amdgpu.busy) { rewind(amdgpu.busy); fflush(amdgpu.busy); int value = 0; diff --git a/src/meson.build b/src/meson.build index 6849ecbe0d..73fee49c8e 100644 --- a/src/meson.build +++ b/src/meson.build @@ -59,7 +59,7 @@ vklayer_files = files( 'vulkan.cpp', 'blacklist.cpp', 'file_utils.cpp', - 'amdgpu.cpp', + 'amdgpu_metrics.cpp', 'intel.cpp' ) opengl_files = [] @@ -89,6 +89,7 @@ if is_unixy 'battery.cpp', 'control.cpp', 'gamepad.cpp', + 'amdgpu_libdrm.cpp', ) opengl_files = files( @@ -187,7 +188,10 @@ mangohud_static_lib = static_library( dep_pthread, dep_vulkan, windows_deps, - json_dep], + json_dep, + libdrm_dep, + libdrm_amdgpu_dep, + ], include_directories : [inc_common], link_args : link_args, install_dir : libdir_mangohud, @@ -249,6 +253,8 @@ if get_option('mangoapp') dep_x11, glfw3_dep, json_dep, + libdrm_dep, + libdrm_amdgpu_dep, ], include_directories : [inc_common], install_tag : 'mangoapp', diff --git a/src/overlay.cpp b/src/overlay.cpp index 3fca18e7da..bcee4d3c01 100644 --- a/src/overlay.cpp +++ b/src/overlay.cpp @@ -22,7 +22,8 @@ #include "file_utils.h" #include "pci_ids.h" #include "iostats.h" -#include "amdgpu.h" +#include "amdgpu_metrics.h" +#include "amdgpu_libdrm.h" #ifdef __linux__ @@ -122,6 +123,9 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID) if (gpu_metrics_exists) amdgpu_get_metrics(); + if (do_libdrm_sampling) + libdrm_get_info(); + if (vendorID == 0x10de) getNvidiaGpuInfo(params); @@ -788,6 +792,12 @@ void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_para break; } + if (params.enabled[OVERLAY_PARAM_ENABLED_libdrm_sampling]) { + do_libdrm_sampling = true; + dri_device_path = string("/dev/dri") + path.substr(path.find_last_of("/")); + SPDLOG_INFO("Using DRI device for libdrm sampling: '{}'", dri_device_path); + } + // don't bother then if (metrics_path.empty() && !amdgpu.busy && vendorID != 0x8086) { params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false; diff --git a/src/overlay_params.h b/src/overlay_params.h index da32af5868..77f86ebdc7 100644 --- a/src/overlay_params.h +++ b/src/overlay_params.h @@ -89,6 +89,7 @@ typedef unsigned long KeySym; OVERLAY_PARAM_BOOL(hud_no_margin) \ OVERLAY_PARAM_BOOL(hud_compact) \ OVERLAY_PARAM_BOOL(exec_name) \ + OVERLAY_PARAM_BOOL(libdrm_sampling) \ OVERLAY_PARAM_CUSTOM(fps_sampling_period) \ OVERLAY_PARAM_CUSTOM(output_folder) \ OVERLAY_PARAM_CUSTOM(output_file) \