From 7e0a1fe9044d60549b757616af25523896e97a04 Mon Sep 17 00:00:00 2001 From: Jose Castillo Date: Thu, 12 Sep 2024 11:16:24 +0100 Subject: [PATCH] [nvidia] Capture more nvidia commands Capture commands related to nvidia container toolkit. Related: RHEL-58172 Signed-off-by: Jose Castillo --- sos/report/plugins/nvidia.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sos/report/plugins/nvidia.py b/sos/report/plugins/nvidia.py index 25d35e53a1..86e93ada43 100644 --- a/sos/report/plugins/nvidia.py +++ b/sos/report/plugins/nvidia.py @@ -16,9 +16,13 @@ class Nvidia(Plugin, IndependentPlugin): short_desc = 'Nvidia GPU information' plugin_name = 'nvidia' - commands = ('nvidia-smi',) + commands = ('nvidia-smi', 'nvidia-ctk',) + services = ('nvidia-persistenced', 'nvidia-fabricmanager', + 'nvidia-toolkit-firstboot') def setup(self): + self.add_copy_spec("/etc/cdi/nvidia.yaml") + subcmds = [ '--list-gpus', '-q -d PERFORMANCE', @@ -29,9 +33,12 @@ def setup(self): 'nvlink -s', 'nvlink -e' ] - - self.add_service_status("nvidia-persistenced") + ctk_subcmds = [ + 'cdi list', + '--version', + ] self.add_cmd_output([f"nvidia-smi {cmd}" for cmd in subcmds]) + self.add_cmd_output([f"nvidia-ctk {cmd}" for cmd in ctk_subcmds]) query = ('gpu_name,gpu_bus_id,vbios_version,temperature.gpu,' 'utilization.gpu,memory.total,memory.free,memory.used,' @@ -42,6 +49,5 @@ def setup(self): self.add_cmd_output( f"nvidia-smi --query-retired-pages={querypages} --format=csv" ) - self.add_journal(boot=0, identifier='nvidia-persistenced') # vim: set et ts=4 sw=4 :