From 1ae2884cd6cfa6c3a5ff45b6992730d1168ffdf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?= <146137186+PawelPeczek-Roboflow@users.noreply.github.com> Date: Fri, 4 Oct 2024 14:42:27 +0200 Subject: [PATCH] Revert "Bring back Prometheus GPU" --- inference/core/interfaces/http/http_api.py | 15 +----- inference/core/managers/metrics.py | 55 ---------------------- requirements/requirements.gpu.txt | 3 +- 3 files changed, 2 insertions(+), 71 deletions(-) diff --git a/inference/core/interfaces/http/http_api.py b/inference/core/interfaces/http/http_api.py index b23a83e75b..ad1ebd3683 100644 --- a/inference/core/interfaces/http/http_api.py +++ b/inference/core/interfaces/http/http_api.py @@ -13,7 +13,6 @@ from fastapi.staticfiles import StaticFiles from fastapi_cprofile.profiler import CProfileMiddleware from prometheus_fastapi_instrumentator import Instrumentator -from prometheus_fastapi_instrumentator import metrics as prom_metrics from starlette.convertors import StringConvertor, register_url_convertor from starlette.middleware.base import BaseHTTPMiddleware @@ -188,10 +187,6 @@ MessageToBigError, ) from inference.core.managers.base import ModelManager -from inference.core.managers.metrics import ( - prom_cpu_utilization_total, - prom_gpu_utilization_total, -) from inference.core.roboflow_api import ( get_roboflow_dataset_type, get_roboflow_workspace, @@ -509,15 +504,7 @@ def __init__( ) if ENABLE_PROMETHEUS: - instrumentator = Instrumentator().instrument(app) - instrumentator.add(prom_cpu_utilization_total()) - instrumentator.add(prom_gpu_utilization_total()) - instrumentator.add( - prom_metrics.latency( - buckets=(1,), - ) - ) - instrumentator.expose(app, endpoint="/metrics") + Instrumentator().expose(app, endpoint="/metrics") if METLO_KEY: app.add_middleware( diff --git a/inference/core/managers/metrics.py b/inference/core/managers/metrics.py index 44af106cea..01af47c60c 100644 --- a/inference/core/managers/metrics.py +++ b/inference/core/managers/metrics.py @@ -1,23 +1,13 @@ -import os import platform import re import socket import time import uuid -from typing import Callable - -import GPUtil -from prometheus_client import REGISTRY, Gauge -from prometheus_fastapi_instrumentator.metrics import Info from inference.core.cache import cache from inference.core.logger import logger from inference.core.version import __version__ -previous_cpu_total = None -previous_time = None -NUM_CPU_CORES = os.cpu_count() - def get_model_metrics( inference_server_id: str, model_id: str, min: float = -1, max: float = float("inf") @@ -109,48 +99,3 @@ def get_inference_results_for_model( inference_results.append({"request_time": score, "inference": result}) return inference_results - - -def prom_cpu_utilization_total() -> Callable[[Info], None]: - cpu_utilization_gauge = Gauge( - "process_cpu_utilization_total", "Total CPU utilization" - ) - - def instrumentation(info: Info) -> None: - global previous_cpu_total, previous_time - cpu_metric = REGISTRY.get_sample_value("process_cpu_seconds_total") - if cpu_metric is None: - return - current_time = time.time() - if previous_cpu_total is None: - previous_time = current_time - previous_cpu_total = cpu_metric - else: - cpu_delta = cpu_metric - previous_cpu_total - time_delta = current_time - previous_time - if time_delta > 0: - cpu_utilization_percent = 100 * (cpu_delta / time_delta) / NUM_CPU_CORES - cpu_utilization_gauge.set(cpu_utilization_percent) - previous_cpu_total = cpu_metric - previous_time = current_time - - return instrumentation - - -def prom_gpu_utilization_total() -> Callable[[Info], None]: - gpu_load_gauge = Gauge("gpu_load_percentage", "GPU Load", ["gpu_id"]) - gpu_memory_gauge = Gauge( - "gpu_memory_utilization", "GPU Memory Utilization", ["gpu_id"] - ) - gpu_temp_gauge = Gauge("gpu_temperature_celsius", "GPU Temperature", ["gpu_id"]) - - def instrumentation(info: Info) -> None: - gpus = GPUtil.getGPUs() - if not gpus: - return - for gpu in gpus: - gpu_load_gauge.labels(gpu_id=gpu.id).set(gpu.load * 100) - gpu_memory_gauge.labels(gpu_id=gpu.id).set(gpu.memoryUtil * 100) - gpu_temp_gauge.labels(gpu_id=gpu.id).set(gpu.temperature) - - return instrumentation diff --git a/requirements/requirements.gpu.txt b/requirements/requirements.gpu.txt index b465e787d7..4347b46d43 100644 --- a/requirements/requirements.gpu.txt +++ b/requirements/requirements.gpu.txt @@ -1,2 +1 @@ -onnxruntime-gpu<=1.15.1 -GPUtil==1.4.0 +onnxruntime-gpu<=1.15.1 \ No newline at end of file