Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use libdrm_amdgpu as an alternative GPU load information source #925

Open
wants to merge 31 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
6223a1b
Add libdrm_amdgpu dependencies
bsolos Feb 14, 2023
c966247
Implement basic sampling logic
bsolos Feb 14, 2023
fe7e321
Fix buildscript
bsolos Feb 15, 2023
19c2cb6
Finish basic implementation of libdrm sampling
bsolos Feb 15, 2023
16f65f9
Implemented basic libdrm error handling
bsolos Feb 15, 2023
dfcd0d0
Added 'libdrm_sampling' parameter to toggle libdrm sampling from the …
bsolos Feb 15, 2023
f603312
Add 'libdrm_sampling' to the workarounds section in README.md
bsolos Feb 15, 2023
1c01bf9
Add '# libdrm_sampling' to MangoHud.conf
bsolos Feb 17, 2023
9f9869f
Check for OVERLAY_PARAM_ENABLED_libdrm_sampling only in init_gpu_stats
bsolos Feb 17, 2023
c92c5a4
Only use other GPU load caclculation options if libdrm sampling is di…
bsolos Feb 17, 2023
aeaec25
Attempt to stabilize the sampling rate
bsolos Feb 19, 2023
4698bc5
Replaced cpp.find_library('drm_amdgpu',...) with dependency('libdrm_a…
bsolos Mar 6, 2023
9443da7
Unconditionally build with libdrm_sampling
bsolos Mar 6, 2023
5c7c5b1
Remove unnecessary declarations from amdgpu_libdrm.h and mark them as…
bsolos Mar 6, 2023
7f8c05c
Remove an unnecessary check in amdgpu_get_metrics
bsolos Mar 6, 2023
4e6a492
Undo all changes in amdgpu.cpp
bsolos Mar 6, 2023
0bf914e
Open the render device node instead of the card node
bsolos Mar 6, 2023
57c4da4
Remove the accidental LF from amdgpu.cpp
bsolos Mar 6, 2023
db32a41
Use drmGetDevices2 to find the renderD node for the DRI device
bsolos Mar 6, 2023
7099429
Find the renderD node more efficiently
bsolos Mar 7, 2023
0ecf9dd
Fixed the available_nodes check
bsolos Mar 7, 2023
e006789
Wrap the & operation in parentheses
bsolos Mar 7, 2023
effac71
Minor style changes
bsolos Mar 7, 2023
e8f8bb4
Move information used only by amdgpu_libdrm.cpp to the .cpp
bsolos Mar 7, 2023
4f2d394
Free resources on error in libdrm_initialize
bsolos Mar 7, 2023
42a92c7
Remove redundant 'required: true' from meson.build
bsolos Mar 7, 2023
00a203d
Make the renderd_node search code more readable
bsolos Mar 7, 2023
17c920f
Rename 'amdgpu.(cpp|h)' to 'amdgpu_metrics.(cpp|h)'
bsolos Mar 7, 2023
bf4ee87
Drop 'libdrm/' from the include
bsolos Mar 7, 2023
cb54fbe
Resolve some rebasing artifacts
bsolos Mar 7, 2023
8adaf7a
Do cleanup properly in libdrm_initialize
bsolos Mar 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ Options starting with "gl_*" are for OpenGL.
- `gl_size_query = viewport` : Specify what to use for getting display size. Options are "viewport", "scissorbox" or disabled. Defaults to using glXQueryDrawable.
- `gl_bind_framebuffer = 0..N` : (Re)bind given framebuffer before MangoHud gets drawn. Helps with Crusader Kings III.
- `gl_dont_flip = 1` : Don't swap origin if using GL_UPPER_LEFT. Helps with Ryujinx.
- `libdrm_sampling` : Use libdrm_amdgpu to calculate GPU utilization. Helps with some problematic Vega GPUs.

## MangoHud FPS logging

Expand Down
3 changes: 3 additions & 0 deletions data/MangoHud.conf
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,9 @@ frame_timing
### Don't swap origin if using GL_UPPER_LEFT. Helps with Ryujinx
# gl_dont_flip=1

### Use libdrm_amdgpu to calculate GPU utilization. Helps with some problematic Vega GPUs.
# libdrm_sampling

################ INTERACTION #################

### Change toggle keybinds for the hud & logging
Expand Down
4 changes: 4 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,14 @@ if is_unixy
dep_wayland_client = dependency('wayland-client',
required: get_option('with_wayland'), version : '>=1.11')
dbus_dep = dependency('dbus-1', required: get_option('with_dbus')).partial_dependency(compile_args : true, includes : true)
libdrm_dep = dependency('libdrm')
libdrm_amdgpu_dep = dependency('libdrm_amdgpu')
else
dep_x11 = null_dep
dep_wayland_client = null_dep
dbus_dep = null_dep
libdrm_dep = null_dep
libdrm_amdgpu_dep = null_dep
endif

if dep_x11.found()
Expand Down
135 changes: 135 additions & 0 deletions src/amdgpu_libdrm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#include <deque>
#include <mutex>
#include <thread>
#include <chrono>
#include <xf86drm.h>
#include <amdgpu.h>
#include <spdlog/spdlog.h>
#include <fcntl.h>
#include <unistd.h>
#include "gpu.h"

#include "amdgpu_libdrm.h"

#define LIBDRM_MAX_DEVICES 32

#define LIBDRM_SAMPLE_DELAY 3500
#define LIBDRM_SAMPLE_BUF_SIZE 512

#define LIBDRM_GRBM_STATUS 0x8010

enum LIBDRM_GRBM_BITS {
LIBDRM_GRBM_BUSY_BIT = 1U << 31
};

struct libdrm_sample {
bool busy_bit;
};

struct libdrm_stats {
int busy;
};

std::string dri_device_path;
bool do_libdrm_sampling = false;

std::deque<struct libdrm_sample> sample_buf(LIBDRM_SAMPLE_BUF_SIZE, {0});
std::mutex sample_buf_m;

amdgpu_device_handle amdgpu_handle;

static void libdrm_do_sample(libdrm_sample *sample) {
uint32_t registers;
amdgpu_read_mm_registers(amdgpu_handle, LIBDRM_GRBM_STATUS / 4, 1, 0xffffffff, 0, &registers);

if (registers & LIBDRM_GRBM_BUSY_BIT) sample->busy_bit = true;
}

static void libdrm_thread() {
while (true) {
auto start_time = std::chrono::system_clock::now().time_since_epoch();

struct libdrm_sample sample {0};
libdrm_do_sample(&sample);

sample_buf_m.lock();
sample_buf.pop_front();
sample_buf.push_back(sample);
sample_buf_m.unlock();

auto end_time = std::chrono::system_clock::now().time_since_epoch();
auto sleep_duration = std::chrono::microseconds(LIBDRM_SAMPLE_DELAY) - (end_time - start_time);
if (sleep_duration > std::chrono::nanoseconds(0)) {
std::this_thread::sleep_for(sleep_duration);
}
}
}

static int libdrm_initialize() {
drmDevicePtr devices[LIBDRM_MAX_DEVICES];
int device_count = drmGetDevices2(0, devices, LIBDRM_MAX_DEVICES);
if (device_count < 0) {
SPDLOG_ERROR("drmGetDevices2 failed");
return -1;
}

char *renderd_node = nullptr;
for (int i = 0; i < device_count; i++) {
constexpr int required_nodes = (1 << DRM_NODE_PRIMARY) | (1 << DRM_NODE_RENDER);
if ((devices[i]->available_nodes & required_nodes) != required_nodes) {
continue;
}

if (devices[i]->nodes[DRM_NODE_PRIMARY] == dri_device_path) {
bsolos marked this conversation as resolved.
Show resolved Hide resolved
renderd_node = devices[i]->nodes[DRM_NODE_RENDER];
break;
}
}

if (renderd_node == nullptr) {
SPDLOG_ERROR("No renderD node found for '{}'", dri_device_path);
drmFreeDevices(devices, device_count);
return -1;
bsolos marked this conversation as resolved.
Show resolved Hide resolved
}

int fd = open(renderd_node, O_RDWR);
drmFreeDevices(devices, device_count);
if (fd < 0) {
SPDLOG_ERROR("renderD node open failed: '{}'", dri_device_path);
return -1;
}

uint32_t libdrm_minor, libdrm_major;
if (amdgpu_device_initialize(fd, &libdrm_major, &libdrm_minor, &amdgpu_handle)) {
SPDLOG_ERROR("amdgpu_device_initialize failed");
close(fd);
return -1;
}

close(fd); // amdgpu_device_initialize should F_DUPFD it internally, so there is no need to keep fd open
return 0;
}

void libdrm_get_info() {
static bool init = false;
if (!init) {
if (libdrm_initialize()) {
do_libdrm_sampling = false;
SPDLOG_ERROR("Could not initialize libdrm");
return;
}
std::thread(libdrm_thread).detach();
init = true;
SPDLOG_INFO("Initialized libdrm sampling");
}

struct libdrm_stats stats {0};

sample_buf_m.lock();
for (auto sample : sample_buf) {
stats.busy += sample.busy_bit ? 1 : 0; // the ternary is probably not needed
}
sample_buf_m.unlock();

gpu_info.load = (int)(((double)stats.busy / LIBDRM_SAMPLE_BUF_SIZE) * 100);
}
6 changes: 6 additions & 0 deletions src/amdgpu_libdrm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#pragma once

extern std::string dri_device_path;
extern bool do_libdrm_sampling;

void libdrm_get_info();
3 changes: 1 addition & 2 deletions src/amdgpu.cpp → src/amdgpu_metrics.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <spdlog/spdlog.h>
#include <thread>
#include <sys/sysinfo.h>
#include "amdgpu.h"
#include "amdgpu_metrics.h"
#include "gpu.h"
#include "cpu.h"
#include "overlay.h"
Expand Down Expand Up @@ -172,7 +172,6 @@ void amdgpu_get_metrics(){

amdgpu_common_metrics_m.lock();
gpu_info.load = amdgpu_common_metrics.gpu_load_percent;

gpu_info.powerUsage = amdgpu_common_metrics.average_gfx_power_w;
gpu_info.CoreClock = amdgpu_common_metrics.current_gfxclk_mhz;
gpu_info.MemClock = amdgpu_common_metrics.current_uclk_mhz;
Expand Down
File renamed without changes.
5 changes: 3 additions & 2 deletions src/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
#include <spdlog/spdlog.h>
#include "nvctrl.h"
#include "timing.hpp"
#include "amdgpu_libdrm.h"
#ifdef HAVE_NVML
#include "nvidia_info.h"
#endif

#include "amdgpu.h"
#include "amdgpu_metrics.h"

using namespace std::chrono_literals;

Expand Down Expand Up @@ -74,7 +75,7 @@ nvapi_util();
void getAmdGpuInfo(){
int64_t value = 0;
if (metrics_path.empty()){
if (amdgpu.busy) {
if (!do_libdrm_sampling && amdgpu.busy) {
rewind(amdgpu.busy);
fflush(amdgpu.busy);
int value = 0;
Expand Down
10 changes: 8 additions & 2 deletions src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ vklayer_files = files(
'vulkan.cpp',
'blacklist.cpp',
'file_utils.cpp',
'amdgpu.cpp',
'amdgpu_metrics.cpp',
'intel.cpp'
)
opengl_files = []
Expand Down Expand Up @@ -89,6 +89,7 @@ if is_unixy
'battery.cpp',
'control.cpp',
'gamepad.cpp',
'amdgpu_libdrm.cpp',
)

opengl_files = files(
Expand Down Expand Up @@ -187,7 +188,10 @@ mangohud_static_lib = static_library(
dep_pthread,
dep_vulkan,
windows_deps,
json_dep],
json_dep,
libdrm_dep,
libdrm_amdgpu_dep,
],
include_directories : [inc_common],
link_args : link_args,
install_dir : libdir_mangohud,
Expand Down Expand Up @@ -249,6 +253,8 @@ if get_option('mangoapp')
dep_x11,
glfw3_dep,
json_dep,
libdrm_dep,
libdrm_amdgpu_dep,
],
include_directories : [inc_common],
install_tag : 'mangoapp',
Expand Down
12 changes: 11 additions & 1 deletion src/overlay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
#include "file_utils.h"
#include "pci_ids.h"
#include "iostats.h"
#include "amdgpu.h"
#include "amdgpu_metrics.h"
#include "amdgpu_libdrm.h"


#ifdef __linux__
Expand Down Expand Up @@ -122,6 +123,9 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID)
if (gpu_metrics_exists)
amdgpu_get_metrics();

if (do_libdrm_sampling)
libdrm_get_info();

if (vendorID == 0x10de)
getNvidiaGpuInfo(params);

Expand Down Expand Up @@ -788,6 +792,12 @@ void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_para
break;
}

if (params.enabled[OVERLAY_PARAM_ENABLED_libdrm_sampling]) {
do_libdrm_sampling = true;
dri_device_path = string("/dev/dri") + path.substr(path.find_last_of("/"));
SPDLOG_INFO("Using DRI device for libdrm sampling: '{}'", dri_device_path);
}

// don't bother then
if (metrics_path.empty() && !amdgpu.busy && vendorID != 0x8086) {
params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
Expand Down
1 change: 1 addition & 0 deletions src/overlay_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ typedef unsigned long KeySym;
OVERLAY_PARAM_BOOL(hud_no_margin) \
OVERLAY_PARAM_BOOL(hud_compact) \
OVERLAY_PARAM_BOOL(exec_name) \
OVERLAY_PARAM_BOOL(libdrm_sampling) \
OVERLAY_PARAM_CUSTOM(fps_sampling_period) \
OVERLAY_PARAM_CUSTOM(output_folder) \
OVERLAY_PARAM_CUSTOM(output_file) \
Expand Down