feat: add hardware info reporting (#273)

This commit adds route for `/hardware/info` and `/hardware/stats` that provides the GPUs for a pipeline/model_id and some basic information from torch.cuda. --------- Co-authored-by: Rick Staa <[email protected]>
livepeer · Nov 27, 2024 · 28e0095 · 28e0095
1 parent ee40bd6
commit 28e0095
Show file tree

Hide file tree

Showing 10 changed files with 787 additions and 99 deletions.
diff --git a/runner/app/main.py b/runner/app/main.py
@@ -2,10 +2,12 @@
 import os
 from contextlib import asynccontextmanager
 
+import app
+from app.routes import health, hardware
 from fastapi import FastAPI
 from fastapi.routing import APIRoute
-
-from app.routes import health
+from app.utils.hardware import get_gpu_info
+from app.utils.nvml_manager import nvml_manager
 
 logger = logging.getLogger(__name__)
 
@@ -14,16 +16,24 @@
 async def lifespan(app: FastAPI):
     config_logging()
 
+    nvml_manager.initialize()
+
     app.include_router(health.router)
+    app.include_router(hardware.router)
 
     pipeline = os.environ["PIPELINE"]
     model_id = os.environ["MODEL_ID"]
 
     app.pipeline = load_pipeline(pipeline, model_id)
     app.include_router(load_route(pipeline))
 
+    print_cuda_devices()
     logger.info(f"Started up with pipeline {app.pipeline}")
+
     yield
+
+    nvml_manager.shutdown()
+
     logger.info("Shutting down")
 
 
@@ -133,6 +143,13 @@ def config_logging():
     )
 
 
+def print_cuda_devices():
+    devices = get_gpu_info()
+    logger.info("Cuda devices available:")
+    for device in devices:
+        logger.info(devices[device])
+
+
 def use_route_names_as_operation_ids(app: FastAPI) -> None:
     for route in app.routes:
         if isinstance(route, APIRoute):

diff --git a/runner/app/routes/hardware.py b/runner/app/routes/hardware.py
@@ -0,0 +1,65 @@
+import os
+from typing import Dict
+
+from app.utils.hardware import (
+    GpuComputeInfo,
+    GpuUtilizationInfo,
+    get_gpu_info,
+    get_gpu_stats,
+)
+from fastapi import APIRouter
+from pydantic import BaseModel
+
+router = APIRouter()
+
+
+class HardwareInformation(BaseModel):
+    """Response model for GPU information."""
+
+    pipeline: str
+    model_id: str
+    gpu_info: Dict[int, GpuComputeInfo]
+
+
+class HardwareStats(BaseModel):
+    """Response model for real-time GPU statistics."""
+
+    pipeline: str
+    model_id: str
+    gpu_stats: Dict[int, GpuUtilizationInfo]
+
+
+@router.get(
+    "/hardware/info",
+    operation_id="hardware_info",
+    response_model=HardwareInformation,
+)
+@router.get(
+    "/hardware/info/",
+    response_model=HardwareInformation,
+    include_in_schema=False,
+)
+async def hardware_info():
+    return HardwareInformation(
+        pipeline=os.environ["PIPELINE"],
+        model_id=os.environ["MODEL_ID"],
+        gpu_info=get_gpu_info(),
+    )
+
+
+@router.get(
+    "/hardware/stats",
+    operation_id="hardware_stats",
+    response_model=HardwareStats,
+)
+@router.get(
+    "/hardware/stats/",
+    response_model=HardwareStats,
+    include_in_schema=False,
+)
+async def hardware_stats():
+    return HardwareStats(
+        pipeline=os.environ["PIPELINE"],
+        model_id=os.environ["MODEL_ID"],
+        gpu_stats=get_gpu_stats(),
+    )
diff --git a/runner/app/utils/hardware.py b/runner/app/utils/hardware.py
@@ -0,0 +1,104 @@
+"""Contains utility functions for hardware information."""
+
+from typing import Dict
+from pydantic import BaseModel
+import logging
+import pynvml
+
+logger = logging.getLogger(__name__)
+
+
+class GpuBaseInfo(BaseModel):
+    """Model for general GPU information."""
+
+    id: str
+    name: str
+    memory_total: int
+    memory_free: int
+
+
+class GpuComputeInfo(GpuBaseInfo):
+    """Model for detailed GPU compute information."""
+
+    major: int
+    minor: int
+
+
+class GpuUtilizationInfo(GpuBaseInfo):
+    """Model for real-time GPU utilization statistics."""
+
+    utilization_compute: int
+    utilization_memory: int
+
+
+class GpuInfo(GpuComputeInfo, GpuUtilizationInfo):
+    """Model for full CUDA device information."""
+
+    pass
+
+
+def retrieve_cuda_info() -> Dict[int, GpuInfo]:
+    """Retrieve CUDA device information.
+
+    Returns:
+        CUDA device information.
+    """
+    devices = {}
+    for i in range(pynvml.nvmlDeviceGetCount()):
+        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+        uuid = pynvml.nvmlDeviceGetUUID(handle)
+        name = pynvml.nvmlDeviceGetName(handle)
+        memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+        major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+        utilization_rates = pynvml.nvmlDeviceGetUtilizationRates(handle)
+        devices[i] = GpuInfo(
+            id=uuid,
+            name=name,
+            memory_total=memory_info.total,
+            memory_free=memory_info.free,
+            major=major,
+            minor=minor,
+            utilization_compute=utilization_rates.gpu,
+            utilization_memory=utilization_rates.memory,
+        )
+    return devices
+
+
+def get_gpu_info() -> Dict[int, GpuComputeInfo]:
+    """Get detailed GPU compute information.
+
+    Returns:
+        The detailed GPU compute information.
+    """
+    basic_info = retrieve_cuda_info()
+    return {
+        i: GpuComputeInfo(
+            id=info.id,
+            name=info.name,
+            memory_total=info.memory_total,
+            memory_free=info.memory_free,
+            major=info.major,
+            minor=info.minor,
+        )
+        for i, info in basic_info.items()
+    }
+
+
+def get_gpu_stats() -> Dict[int, GpuUtilizationInfo]:
+    """Get real-time GPU utilization statistics.
+
+    Returns:
+        The real-time GPU utilization statistics.
+    """
+    basic_info = retrieve_cuda_info()
+    return {
+        i: GpuUtilizationInfo(
+            id=info.id,
+            name=info.name,
+            memory_total=info.memory_total,
+            memory_free=info.memory_free,
+            utilization_compute=info.utilization_compute,
+            utilization_memory=info.utilization_memory,
+        )
+        for i, info in basic_info.items()
+    }
diff --git a/runner/app/utils/nvml_manager.py b/runner/app/utils/nvml_manager.py
@@ -0,0 +1,37 @@
+"""
+This module manages NVML (NVIDIA Management Library) initialization and shutdown,
+ensuring efficient resource management and improved performance for GPU operations.
+"""
+import pynvml
+import logging
+import atexit
+
+logger = logging.getLogger(__name__)
+
+class NVMLManager:
+    """A class to manage NVML initialization and shutdown."""
+    def __init__(self):
+        self._initialized = False
+        atexit.register(self.shutdown)
+
+    def initialize(self):
+        """Initialize NVML."""
+        if not self._initialized:
+            try:
+                pynvml.nvmlInit()
+                self._initialized = True
+                logger.info("NVML initialized successfully.")
+            except pynvml.NVMLError as e:
+                logger.error(f"Failed to initialize NVML: {e}")
+
+    def shutdown(self):
+        """Shutdown NVML."""
+        if self._initialized:
+            try:
+                pynvml.nvmlShutdown()
+                self._initialized = False
+                logger.info("NVML shutdown successfully.")
+            except pynvml.NVMLError as e:
+                logger.error(f"Failed to shutdown NVML: {e}")
+
+nvml_manager = NVMLManager()
diff --git a/runner/gen_openapi.py b/runner/gen_openapi.py
@@ -9,6 +9,7 @@
 from app.main import app
 from app.routes import (
     audio_to_text,
+    hardware,
     health,
     image_to_image,
     image_to_text,
@@ -47,7 +48,6 @@ def translate_to_gateway(openapi: dict) -> dict:
 
     .. note::
         Differences between 'runner' and 'gateway' entrypoints:
-        - 'health' endpoint is removed.
         - 'model_id' is enforced in all endpoints.
         - 'metadata' property is removed from all schemas.
         - 'VideoResponse' schema is updated to match the Gateway's transcoded mp4
@@ -59,11 +59,8 @@ def translate_to_gateway(openapi: dict) -> dict:
     Returns:
         The translated OpenAPI schema.
     """
-    # Remove 'health' related endpoints and schemas.
-    openapi["paths"].pop("/health")
-    openapi["components"]["schemas"].pop("HealthCheck")
-
     # Enforce 'model_id' in all endpoints
+    logger.debug("Enforcing 'model_id' in all endpoints...")
     for _, methods in openapi["paths"].items():
         for _, details in methods.items():
             if "requestBody" in details:
@@ -86,6 +83,7 @@ def translate_to_gateway(openapi: dict) -> dict:
     # Update the 'VideoResponse' schema to match the Gateway's response.
     # NOTE: This is necessary because the Gateway transcodes the runner's response and
     # returns an mp4 file.
+    logger.debug("Updating 'VideoResponse' schema...")
     openapi["components"]["schemas"]["VideoResponse"] = copy.deepcopy(
         openapi["components"]["schemas"]["ImageResponse"]
     )
@@ -103,7 +101,9 @@ def write_openapi(fname: str, entrypoint: str = "runner"):
         entrypoint: The entrypoint to generate the OpenAPI schema for, either
             'gateway' or 'runner'. Default is 'runner'.
     """
-    app.include_router(health.router)
+    if entrypoint != "gateway":
+        app.include_router(health.router)
+        app.include_router(hardware.router)
     app.include_router(text_to_image.router)
     app.include_router(image_to_image.router)
     app.include_router(image_to_video.router)
@@ -164,8 +164,8 @@ def write_openapi(fname: str, entrypoint: str = "runner"):
     parser.add_argument(
         "--entrypoint",
         type=str,
-        choices=["runner", "gateway"],
-        default=["runner", "gateway"],
+        choices=["gateway","runner"],
+        default=["gateway","runner"],
         nargs="+",
         help=(
             "The entrypoint to generate the OpenAPI schema for, options are 'runner' "
@@ -176,5 +176,6 @@ def write_openapi(fname: str, entrypoint: str = "runner"):
 
     # Generate orchestrator and Gateway facing OpenAPI schemas.
     logger.info("Generating OpenAPI schema.")
-    for entrypoint in args.entrypoint:
+    entrypoints = sorted(args.entrypoint, key=lambda x: x != "gateway")
+    for entrypoint in entrypoints:
         write_openapi(f"openapi.{args.type.lower()}", entrypoint=entrypoint)