aws-neuron · aws-qieqingy · Jan 2, 2025 · Jan 2, 2025
diff --git a/.github/workflows/deploy_doc.yml b/.github/workflows/deploy_doc.yml
@@ -0,0 +1,58 @@
+# Simple workflow for deploying static content to GitHub Pages
+name: Deploy static content to Pages
+
+on:
+  # Runs on pushes targeting the default branch
+  push:
+    branches: ["main"]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  # Single deploy job since we're just deploying
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.10"
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install wget awscli
+          python -m pip install pytest
+          python -m pip install neuronx-cc==2.*
+          python -m pip install Sphinx 
+      - name: Generate Docs
+        run: |
+          sphinx-build doc _apidoc
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          # Upload entire repository
+          path: '_apidoc'
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
@@ -1 +1,5 @@
-build
+build
+_apidoc
+doc/_apidoc
+doc/generated
+__pycache__
diff --git a/doc/README.md b/doc/README.md
@@ -0,0 +1,20 @@
+## View Documentation
+
+The documentation of this repo is built with Github Action, and is available at `<insert_link_when_deployed>`
+
+## Build Documentation Locally
+
+To build documentation locally, install [sphinx_build](https://www.sphinx-doc.org/en/master/man/sphinx-build.html) with 
+
+```
+pip install -U sphinx
+```
+
+Then run the following command in the root of the repo, install any
+missing dependencies if needed.
+
+```
+PYTHONPATH=$PYTHONPATH:<path to src/nki_samples> sphinx-build doc <dst_folder>
+```
+
+The HTML file of the doc will be available at `<dst_folder>/index.html`
diff --git a/doc/conf.py b/doc/conf.py
@@ -0,0 +1,95 @@
+"""Sphinx configuration."""
+
+import datetime
+import os
+import shutil
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path('..', 'src/').resolve()))
+
+def _insert_doc(decorated_nki_func):
+    decorated_nki_func.__doc__ = decorated_nki_func.func.__doc__
+    decorated_nki_func.__name = decorated_nki_func.func.__name__
+
+import nki_samples.reference.attention as attn
+_insert_doc(attn.flash_fwd)
+_insert_doc(attn.flash_attn_bwd)
+_insert_doc(attn.fused_self_attn_for_SD_small_head_size)
+
+import nki_samples.reference.vision as vision
+_insert_doc(vision.select_and_scatter_kernel)
+_insert_doc(vision.resize_nearest_fixed_dma_kernel)
+
+import nki_samples.reference.allocated_attention as alloc_attn
+_insert_doc(alloc_attn.allocated_fused_self_attn_for_SD_small_head_size)
+
+import nki_samples.reference.allocated_fused_linear as alloc_fl
+_insert_doc(alloc_fl.allocated_fused_rms_norm_qkv)
+
+def run_apidoc(app):
+    """Generate doc stubs using sphinx-apidoc."""
+    module_dir = os.path.join(app.srcdir, "../src/")
+    output_dir = os.path.join(app.srcdir, "_apidoc")
+    excludes = []
+
+    # Ensure that any stale apidoc files are cleaned up first.
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+
+    cmd = [
+        "--separate",
+        "--module-first",
+        "--doc-project=API Reference",
+        "-o",
+        output_dir,
+        module_dir,
+    ]
+    cmd.extend(excludes)
+
+    try:
+        from sphinx.ext import apidoc  # Sphinx >= 1.7
+
+        apidoc.main(cmd)
+    except ImportError:
+        from sphinx import apidoc  # Sphinx < 1.7
+
+        cmd.insert(0, apidoc.__file__)
+        apidoc.main(cmd)
+
+
+def setup(app):
+    """Register our sphinx-apidoc hook."""
+    app.connect("builder-inited", run_apidoc)
+
+
+# Sphinx configuration below.
+project = 'nki_samples'
+version = '1.x'
+release = 'mainline'
+copyright = "{}, Amazon.com".format(datetime.datetime.now().year)
+
+extensions = [
+    "sphinx.ext.autodoc",
+    'sphinx.ext.autosummary',
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.todo",
+    "sphinx.ext.viewcode",
+]
+
+autosummary_generate = True  # Turn on sphinx.ext.autosummary
+
+html_theme = "sphinxdoc"
+
+source_suffix = ".rst"
+master_doc = "index"
+
+autoclass_content = "class"
+autodoc_member_order = "bysource"
+default_role = "py:obj"
+
+htmlhelp_basename = "{}doc".format(project)
+
+napoleon_use_rtype = False
diff --git a/doc/index.rst b/doc/index.rst
@@ -0,0 +1,46 @@
+NKI Samples
+==============
+
+.. currentmodule:: nki_samples.reference
+
+.. _nki_kernels:
+
+nki_samples.reference
+---------------------
+
+All kernels located in this folder have numeric accuracy tests and 
+performance benchmarks defined in the test directory. We also demonstrate 
+using these kernels end-to-end in our integration tests.
+
+You are welcome to customize them to fit your unique workloads, and contributing to the repository by opening a PR. 
+Note that these kernels are already being deployed as part of the Neuron stack. With flash attention as an example,
+`compiling Llama models with transformers-neuronx <https://awsdocs-neuron.readthedocs-hosted.com/en/latest/libraries/transformers-neuronx/transformers-neuronx-developer-guide.html>`_
+will automatically invoke the `flash_fwd` kernel listed here. Therefore, replacing the framework operators with these 
+NKI kernels likely won't result in extra performance benefit.
+
+Please see the `README <https://github.com/aws-neuron/nki-samples>`_ page 
+of the GitHub Repository `nki-samples <https://github.com/aws-neuron/nki-samples>`_ for more details.
+
+For NKI documentation, please refer to the main `Neuron SDK documentation page <https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/nki/index.html>`_.
+
+Relationship to `neuronxcc.nki.kernels`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The kernels under `reference` folder is also available in the `neuronxcc.nki.kernels` namespace. The 
+kernels in the `neuronxcc` is synced with this repository on every Neuron SDK release. 
+
+
+.. toctree::
+    :maxdepth: 2
+
+    nki_samples.reference.attention
+    nki_samples.reference.vision
+    nki_samples.reference.allocated_fused_linear
+    nki_samples.reference.allocated_attention
+
+
+nki_samples.tutorial
+---------------------
+
+Please refer to `this page <https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/nki/tutorials.html>`_ for the 
+tutorials. The code associated with the tutorial can be found at `nki-samples/src/tutorials <https://github.com/aws-neuron/nki-samples/tree/main/src/tutorials>`_
diff --git a/doc/nki_samples.reference.allocated_attention.rst b/doc/nki_samples.reference.allocated_attention.rst
@@ -0,0 +1,16 @@
+Allocated Attention
+=======================
+
+.. currentmodule:: nki_samples.reference.allocated_attention
+
+This file hosts the high-performance reference implementation for
+the attention blocks that are used
+in `Stable Diffusion <https://huggingface.co/spaces/stabilityai/stable-diffusion>`_ models.
+This implementation uses 
+the `direct allocation API <https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/nki/nki_direct_allocation_guide.html>` to achieve better performance.
+
+.. autosummary::
+    :toctree: generated
+
+    allocated_fused_self_attn_for_SD_small_head_size
+
diff --git a/doc/nki_samples.reference.allocated_fused_linear.rst b/doc/nki_samples.reference.allocated_fused_linear.rst
@@ -0,0 +1,14 @@
+Allocated Attention
+=======================
+
+.. currentmodule:: nki_samples.reference.allocated_fused_linear
+
+This file hosts the high-performance kernel that computes `RMSNorm(hidden) @ wQKV`,
+this implementation uses 
+the `direct allocation API <https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/nki/nki_direct_allocation_guide.html>` to achieve better performance.
+
+.. autosummary::
+    :toctree: generated
+
+    allocated_fused_rms_norm_qkv
+
diff --git a/doc/nki_samples.reference.attention.rst b/doc/nki_samples.reference.attention.rst
@@ -0,0 +1,16 @@
+Attention
+=======================
+
+.. currentmodule:: nki_samples.reference.attention
+
+This file hosts the high-performance reference implementation for
+`FlashAttention <https://arxiv.org/abs/2205.14135>`_ (forward & backward), and attention blocks that are used
+in `Stable Diffusion <https://huggingface.co/spaces/stabilityai/stable-diffusion>`_ models.
+
+.. autosummary::
+    :toctree: generated
+
+    flash_fwd
+    flash_attn_bwd
+    fused_self_attn_for_SD_small_head_size
+
diff --git a/doc/nki_samples.reference.vision.rst b/doc/nki_samples.reference.vision.rst
@@ -0,0 +1,12 @@
+Vision
+=======================
+
+.. currentmodule:: nki_samples.reference.vision
+
+This file hosts the reference implementation for vision operators. 
+
+.. autosummary::
+    :toctree: generated
+
+    select_and_scatter_kernel
+    resize_nearest_fixed_dma_kernel