gpu-mode · msaroufim · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/src/discord-cluster-manager/cogs/submit_cog.py b/src/discord-cluster-manager/cogs/submit_cog.py
@@ -1,13 +1,17 @@
+import subprocess
+import tempfile
 from enum import Enum
+from pathlib import Path
 from typing import TYPE_CHECKING, Optional, Tuple, Type
 
 if TYPE_CHECKING:
     from bot import ClusterBot
 
 import discord
 from better_profanity import profanity
-from consts import SubmissionMode
+from consts import CUDA_FLAGS, GPU_TO_SM, SubmissionMode
 from discord import app_commands
+from discord.app_commands import Choice
 from discord.ext import commands
 from report import generate_report
 from run_eval import FullResult
@@ -228,3 +232,179 @@ async def _run_submission(
 
     def _get_arch(self, gpu_type: app_commands.Choice[str]):
         raise NotImplementedError()
+
+    async def generate_ptx_code(self, source_code: str, gpu_type: str, include_sass: bool = False) -> tuple[bool, str]:
+        """
+        Generate PTX code for a CUDA submission.
+
+        Args:
+            source_code (str): The CUDA source code
+            gpu_type (str): The GPU architecture to target
+            include_sass (bool): Whether to include SASS assembly code
+
+        Returns:
+            tuple[bool, str]: Success status and the PTX output or error message
+        """
+        # Get the SM architecture code for the specified GPU type
+        arch = GPU_TO_SM.get(gpu_type)
+        if not arch:
+            return False, f"Unknown GPU type: {gpu_type}. Available types: {', '.join(GPU_TO_SM.keys())}"
+
+        try:
+            # Create a temporary directory for the compilation
+            with tempfile.TemporaryDirectory() as temp_dir:
+                temp_path = Path(temp_dir)
+                source_file = temp_path / "submission.cu"
+
+                # Write the source code to a file
+                source_file.write_text(source_code)
+
+                # Prepare the compilation command with PTX output flag
+                ptx_flags = CUDA_FLAGS.copy() + ["-ptx"]
+
+                # Add sass generation flag if requested
+                if include_sass:
+                    ptx_flags.append("-Xptxas=-v")  # Verbose output with sass info
+
+                arch_flag = f"-gencode=arch=compute_{arch},code=compute_{arch}"
+
+                command = ["nvcc"] + ptx_flags + [str(source_file), arch_flag, "-o", str(temp_path / "output.ptx")]
+
+                # Check if nvcc is available
+                nvcc_check = subprocess.run(["which", "nvcc"], capture_output=True, text=True)
+                if nvcc_check.returncode != 0:
+                    return False, "NVCC (CUDA compiler) not found. Is CUDA installed?"
+
+                # Run the compilation
+                process = subprocess.run(command, capture_output=True, text=True)
+
+                # Prepare the output with both stderr (for SASS if requested) and the PTX file
+                result = ""
+
+                # Include compilation output which contains SASS information
+                if include_sass and process.stderr:
+                    result += "SASS Assembly Information:\n"
+                    result += "-" * 40 + "\n"
+                    result += process.stderr + "\n"
+                    result += "-" * 40 + "\n\n"
+
+                if process.returncode != 0:
+                    # Compilation failed
+                    return False, f"PTX generation failed:\n{process.stderr}"
+
+                # Read the PTX file
+                ptx_file = temp_path / "output.ptx"
+                if ptx_file.exists():
+                    result += "PTX Code:\n"
+                    result += "-" * 40 + "\n"
+                    result += ptx_file.read_text()
+                    return True, result
+                else:
+                    return False, "PTX file was not generated"
+        except Exception as e:
+            return False, f"Error generating PTX: {str(e)}"
+
+    @app_commands.command(name="ptx")
+    @app_commands.describe(
+        submission="The CUDA submission file (.cu extension)",
+        gpu_type="The GPU architecture to target",
+        include_sass="Whether to include SASS/assembly output",
+        as_file="Return the PTX code as a downloadable file instead of text messages"
+    )
+    @app_commands.choices(
+        gpu_type=[
+            Choice(name=gpu, value=gpu) for gpu in GPU_TO_SM.keys()
+        ]
+    )
+    @with_error_handling
+    async def ptx_command(self, interaction: discord.Interaction,
+                          submission: discord.Attachment,
+                          gpu_type: Choice[str] = None,
+                          include_sass: bool = False,
+                          as_file: bool = False):
+        """
+        Generate PTX code from a CUDA submission.
+
+        Parameters
+        ------------
+        submission: File
+            The CUDA submission file (.cu extension)
+        gpu_type: Choice[str]
+            The GPU architecture to target
+        include_sass: bool
+            Whether to include SASS assembly code in the output
+        as_file: bool
+            Return the PTX code as a downloadable file instead of text messages
+        """
+        if not interaction.response.is_done():
+            await interaction.response.defer()
+
+        # Validate the file extension
+        if not submission.filename.endswith('.cu'):
+            await send_discord_message(interaction, "❌ Only .cu file extensions are supported for PTX generation")
+            return
+
+        # Set default GPU type to T4 if not specified
+        target_gpu = gpu_type.value if gpu_type else "T4"
+
+        try:
+            # Read the submission file
+            content = await submission.read()
+            source_code = content.decode('utf-8')
+
+            # Create a thread for the PTX generation
+            thread_name = f"PTX Generation - {submission.filename} - {target_gpu}"
+            if include_sass:
+                thread_name += " with SASS"
+
+            thread = await interaction.channel.create_thread(
+                name=thread_name,
+                type=discord.ChannelType.public_thread,
+            )
+
+            await thread.send(f"Generating PTX code for {submission.filename} targeting {target_gpu}..." +
+                              (" (including SASS output)" if include_sass else ""))
+
+            # Generate the PTX code
+            success, result = await self.generate_ptx_code(source_code, target_gpu, include_sass)
+
+            if success:
+                if as_file:
+                    # Create a temporary file containing the PTX output
+                    with tempfile.NamedTemporaryFile('w', suffix='.ptx', delete=False) as temp_file:
+                        temp_file.write(result)
+                        temp_file_path = temp_file.name
+
+                    # Get the base filename without extension
+                    base_filename = Path(submission.filename).stem
+                    output_filename = f"{base_filename}_{target_gpu}.ptx"
+
+                    # Send the file
+                    await thread.send(
+                        f"PTX code for {submission.filename} targeting {target_gpu}:",
+                        file=discord.File(temp_file_path, filename=output_filename)
+                    )
+
+                    # Remove the temporary file
+                    Path(temp_file_path).unlink(missing_ok=True)
+                else:
+                    # Split the PTX code into chunks if it's too long for Discord
+                    max_msg_length = 1900  # Slightly less than 2000 to account for markdown
+                    chunks = [result[i:i+max_msg_length] for i in range(0, len(result), max_msg_length)]
+
+                    for i, chunk in enumerate(chunks):
+                        await thread.send(f"```{chunk}```")
+
+                # Send a summary message
+                await thread.send(f"✅ PTX code generation complete for {target_gpu} GPU" +
+                                 (" with SASS assembly" if include_sass else ""))
+            else:
+                # Send the error message
+                await thread.send(f"❌ Failed to generate PTX code: {result}")
+
+            # Notify user in the original channel
+            await send_discord_message(interaction, f"PTX generation for {submission.filename} is complete. Check the thread for results.")
+
+        except Exception as e:
+            logger.error(f"Error generating PTX: {e}", exc_info=True)
+            await send_discord_message(interaction, f"❌ Error generating PTX: {str(e)}")
diff --git a/src/discord-cluster-manager/cogs/verify_run_cog.py b/src/discord-cluster-manager/cogs/verify_run_cog.py
@@ -261,6 +261,7 @@ async def verify_submission(  # noqa: C901
         if report_success:
             reports.append(f"✅ {run_id:20} {mode.name} behaved as expected")
 
+
     @app_commands.command(name="verifyruns")
     async def verify_runs(self, interaction: discord.Interaction):
         """Verify runs on Modal, GitHub Nvidia, and GitHub AMD."""