Release 026 (#546)

casper-hansen · Jul 23, 2024 · 44e0479 · 44e0479
1 parent ca54dea
commit 44e0479
Show file tree

Hide file tree

Showing 4 changed files with 160 additions and 162 deletions.
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -130,7 +130,7 @@ jobs:
  matrix:
  os: [ubuntu-20.04]
  python: ["3.8", "3.9", "3.10", "3.11"]
- rocm: ["5.6.1", "5.7.1"]
+ rocm: ["5.7.1"]
  defaults:
  run:
  shell: bash

diff --git a/awq/__init__.py b/awq/__init__.py
@@ -1,2 +1,2 @@
-__version__ = "0.2.5"
+__version__ = "0.2.6"
 from awq.models.auto import AutoAWQForCausalLM
diff --git a/scripts/download_wheels.sh b/scripts/download_wheels.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # Set variables
-AWQ_VERSION="0.2.5"
+AWQ_VERSION="0.2.6"
 RELEASE_URL="https://api.github.com/repos/casper-hansen/AutoAWQ/releases/tags/v${AWQ_VERSION}"
 
 # Create a directory to download the wheels

diff --git a/setup.py b/setup.py
@@ -1,159 +1,157 @@
-import os
-import torch
-import platform
-import requests
-from pathlib import Path
-from setuptools import setup, find_packages
-from torch.utils.cpp_extension import CUDAExtension
-
-
-def get_latest_kernels_version(repo):
- """
- Get the latest version of the kernels from the github repo.
- """
- response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest")
- data = response.json()
- tag_name = data["tag_name"]
- version = tag_name.replace("v", "")
- return version
-
-
-def get_kernels_whl_url(
- gpu_system_version,
- release_version,
- python_version,
- platform,
- architecture,
-):
- """
- Get the url for the kernels wheel file.
- """
- return f"https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v{release_version}/autoawq_kernels-{release_version}+{gpu_system_version}-cp{python_version}-cp{python_version}-{platform}_{architecture}.whl"
-
-
-AUTOAWQ_VERSION = "0.2.5"
-PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1"
-IS_CPU_ONLY = not torch.backends.mps.is_available() and not torch.cuda.is_available()
-
-CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda
-if CUDA_VERSION:
- CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3]
-
-ROCM_VERSION = os.getenv("ROCM_VERSION", None) or torch.version.hip
-if ROCM_VERSION:
- if ROCM_VERSION.startswith("5.6"):
- ROCM_VERSION = "5.6.1"
- elif ROCM_VERSION.startswith("5.7"):
- ROCM_VERSION = "5.7.1"
-
- ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3]
-
-if not PYPI_BUILD:
- if IS_CPU_ONLY:
- AUTOAWQ_VERSION += "+cpu"
- elif CUDA_VERSION:
- AUTOAWQ_VERSION += f"+cu{CUDA_VERSION}"
- elif ROCM_VERSION:
- AUTOAWQ_VERSION += f"+rocm{ROCM_VERSION}"
- else:
- raise RuntimeError(
- "Your system must have either Nvidia or AMD GPU to build this package."
- )
-
-common_setup_kwargs = {
- "version": AUTOAWQ_VERSION,
- "name": "autoawq",
- "author": "Casper Hansen",
- "license": "MIT",
- "python_requires": ">=3.8.0",
- "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
- "long_description": (Path(__file__).parent / "README.md").read_text(
- encoding="UTF-8"
- ),
- "long_description_content_type": "text/markdown",
- "url": "https://github.com/casper-hansen/AutoAWQ",
- "keywords": ["awq", "autoawq", "quantization", "transformers"],
- "platforms": ["linux", "windows"],
- "classifiers": [
- "Environment :: GPU :: NVIDIA CUDA :: 11.8",
- "Environment :: GPU :: NVIDIA CUDA :: 12",
- "License :: OSI Approved :: MIT License",
- "Natural Language :: English",
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "Programming Language :: C++",
- ],
-}
-
-requirements = [
- "torch>=2.0.1",
- "transformers>=4.35.0",
- "tokenizers>=0.12.1",
- "typing_extensions>=4.8.0",
- "accelerate",
- "datasets",
- "zstandard",
-]
-
-try:
- if ROCM_VERSION:
- import exlv2_ext
- else:
- import awq_ext
-
- KERNELS_INSTALLED = True
-except ImportError:
- KERNELS_INSTALLED = False
-
-# kernels can be downloaded from pypi for cuda+121 only
-# for everything else, we need to download the wheels from github
-if not KERNELS_INSTALLED and (CUDA_VERSION or ROCM_VERSION):
- if CUDA_VERSION and CUDA_VERSION.startswith("12"):
- requirements.append("autoawq-kernels")
- elif CUDA_VERSION and CUDA_VERSION.startswith("11") or ROCM_VERSION in ["561", "571"]:
- gpu_system_version = (
- f"cu{CUDA_VERSION}" if CUDA_VERSION else f"rocm{ROCM_VERSION}"
- )
- kernels_version = get_latest_kernels_version("casper-hansen/AutoAWQ_kernels")
- python_version = "".join(platform.python_version_tuple()[:2])
- platform_name = platform.system().lower()
- architecture = platform.machine().lower()
- latest_rocm_kernels_wheels = get_kernels_whl_url(
- gpu_system_version,
- kernels_version,
- python_version,
- platform_name,
- architecture,
- )
- requirements.append(f"autoawq-kernels@{latest_rocm_kernels_wheels}")
- else:
- raise RuntimeError(
- "Your system have a GPU with an unsupported CUDA or ROCm version. "
- "Please install the kernels manually from https://github.com/casper-hansen/AutoAWQ_kernels"
- )
-elif IS_CPU_ONLY:
- requirements.append("intel-extension-for-transformers>=1.4.2")
-
-force_extension = os.getenv("PYPI_FORCE_TAGS", "0")
-if force_extension == "1":
- # NOTE: We create an empty CUDAExtension because torch helps us with
- # creating the right boilerplate to enable correct targeting of
- # the autoawq-kernels package
- common_setup_kwargs["ext_modules"] = [
- CUDAExtension(
- name="test_kernel",
- sources=[],
- )
- ]
-
-setup(
- packages=find_packages(),
- install_requires=requirements,
- extras_require={
- "eval": ["lm_eval==0.4.1", "tabulate", "protobuf", "evaluate", "scipy"],
- "dev": ["black", "mkdocstrings-python", "mkdocs-material", "griffe-typingdoc"]
- },
- **common_setup_kwargs,
-)
+import os
+import torch
+import platform
+import requests
+from pathlib import Path
+from setuptools import setup, find_packages
+from torch.utils.cpp_extension import CUDAExtension
+
+
+def get_latest_kernels_version(repo):
+ """
+ Get the latest version of the kernels from the github repo.
+ """
+ response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest")
+ data = response.json()
+ tag_name = data["tag_name"]
+ version = tag_name.replace("v", "")
+ return version
+
+
+def get_kernels_whl_url(
+ gpu_system_version,
+ release_version,
+ python_version,
+ platform,
+ architecture,
+):
+ """
+ Get the url for the kernels wheel file.
+ """
+ return f"https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v{release_version}/autoawq_kernels-{release_version}+{gpu_system_version}-cp{python_version}-cp{python_version}-{platform}_{architecture}.whl"
+
+
+AUTOAWQ_VERSION = "0.2.6"
+PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1"
+IS_CPU_ONLY = not torch.backends.mps.is_available() and not torch.cuda.is_available()
+
+CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda
+if CUDA_VERSION:
+ CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3]
+
+ROCM_VERSION = os.getenv("ROCM_VERSION", None) or torch.version.hip
+if ROCM_VERSION:
+ if ROCM_VERSION.startswith("5.7"):
+ ROCM_VERSION = "5.7.1"
+
+ ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3]
+
+if not PYPI_BUILD:
+ if IS_CPU_ONLY:
+ AUTOAWQ_VERSION += "+cpu"
+ elif CUDA_VERSION:
+ AUTOAWQ_VERSION += f"+cu{CUDA_VERSION}"
+ elif ROCM_VERSION:
+ AUTOAWQ_VERSION += f"+rocm{ROCM_VERSION}"
+ else:
+ raise RuntimeError(
+ "Your system must have either Nvidia or AMD GPU to build this package."
+ )
+
+common_setup_kwargs = {
+ "version": AUTOAWQ_VERSION,
+ "name": "autoawq",
+ "author": "Casper Hansen",
+ "license": "MIT",
+ "python_requires": ">=3.8.0",
+ "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
+ "long_description": (Path(__file__).parent / "README.md").read_text(
+ encoding="UTF-8"
+ ),
+ "long_description_content_type": "text/markdown",
+ "url": "https://github.com/casper-hansen/AutoAWQ",
+ "keywords": ["awq", "autoawq", "quantization", "transformers"],
+ "platforms": ["linux", "windows"],
+ "classifiers": [
+ "Environment :: GPU :: NVIDIA CUDA :: 11.8",
+ "Environment :: GPU :: NVIDIA CUDA :: 12",
+ "License :: OSI Approved :: MIT License",
+ "Natural Language :: English",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: C++",
+ ],
+}
+
+requirements = [
+ "torch==2.3.1",
+ "transformers>=4.35.0",
+ "tokenizers>=0.12.1",
+ "typing_extensions>=4.8.0",
+ "accelerate",
+ "datasets",
+ "zstandard",
+]
+
+try:
+ if ROCM_VERSION:
+ import exlv2_ext
+ else:
+ import awq_ext
+
+ KERNELS_INSTALLED = True
+except ImportError:
+ KERNELS_INSTALLED = False
+
+# kernels can be downloaded from pypi for cuda+121 only
+# for everything else, we need to download the wheels from github
+if not KERNELS_INSTALLED and (CUDA_VERSION or ROCM_VERSION):
+ if CUDA_VERSION and CUDA_VERSION.startswith("12"):
+ requirements.append("autoawq-kernels")
+ elif CUDA_VERSION and CUDA_VERSION.startswith("11") or ROCM_VERSION in ["571"]:
+ gpu_system_version = (
+ f"cu{CUDA_VERSION}" if CUDA_VERSION else f"rocm{ROCM_VERSION}"
+ )
+ kernels_version = get_latest_kernels_version("casper-hansen/AutoAWQ_kernels")
+ python_version = "".join(platform.python_version_tuple()[:2])
+ platform_name = platform.system().lower()
+ architecture = platform.machine().lower()
+ latest_rocm_kernels_wheels = get_kernels_whl_url(
+ gpu_system_version,
+ kernels_version,
+ python_version,
+ platform_name,
+ architecture,
+ )
+ requirements.append(f"autoawq-kernels@{latest_rocm_kernels_wheels}")
+ else:
+ raise RuntimeError(
+ "Your system have a GPU with an unsupported CUDA or ROCm version. "
+ "Please install the kernels manually from https://github.com/casper-hansen/AutoAWQ_kernels"
+ )
+elif IS_CPU_ONLY:
+ requirements.append("intel-extension-for-transformers>=1.4.2")
+
+force_extension = os.getenv("PYPI_FORCE_TAGS", "0")
+if force_extension == "1":
+ # NOTE: We create an empty CUDAExtension because torch helps us with
+ # creating the right boilerplate to enable correct targeting of
+ # the autoawq-kernels package
+ common_setup_kwargs["ext_modules"] = [
+ CUDAExtension(
+ name="test_kernel",
+ sources=[],
+ )
+ ]
+
+setup(
+ packages=find_packages(),
+ install_requires=requirements,
+ extras_require={
+ "eval": ["lm_eval==0.4.1", "tabulate", "protobuf", "evaluate", "scipy"],
+ "dev": ["black", "mkdocstrings-python", "mkdocs-material", "griffe-typingdoc"]
+ },
+ **common_setup_kwargs,
+)