diff --git a/awq/modules/fused/mlp.py b/awq/modules/fused/mlp.py index 9236109b..ec49988c 100644 --- a/awq/modules/fused/mlp.py +++ b/awq/modules/fused/mlp.py @@ -2,13 +2,14 @@ import torch.nn.functional as F from awq.modules.linear.gemm import WQLinear_GEMM from awq.modules.linear.gemv import WQLinear_GEMV +import warnings try: import awq_ext # with CUDA kernels - AWQ_INSTALLED = True -except: +except Exception as e: AWQ_INSTALLED = False + warnings.warn(f"AWQ extension could not be imported. Error: {e}") class QuantFusedMLP(nn.Module): diff --git a/awq/modules/fused/moe.py b/awq/modules/fused/moe.py index 70431609..ad38909a 100644 --- a/awq/modules/fused/moe.py +++ b/awq/modules/fused/moe.py @@ -1,12 +1,13 @@ import torch from typing import Dict +import warnings try: import awq_ext # with CUDA kernels - AWQ_INSTALLED = True -except: +except Exception as e: AWQ_INSTALLED = False + warnings.warn(f"AWQ extension could not be imported. Error: {e}") class FusedSparseMoeBlock(torch.nn.Module): diff --git a/awq/modules/fused/norm.py b/awq/modules/fused/norm.py index 7a552afd..a29c4517 100644 --- a/awq/modules/fused/norm.py +++ b/awq/modules/fused/norm.py @@ -1,13 +1,13 @@ import torch from torch import nn +import warnings try: import awq_ext # with CUDA kernels - AWQ_INSTALLED = True -except: +except Exception as e: AWQ_INSTALLED = False - + warnings.warn(f"AWQ extension could not be imported. Error: {e}") class FasterTransformerRMSNorm(nn.Module): def __init__(self, weight, eps=1e-6): diff --git a/awq/modules/linear/gemm.py b/awq/modules/linear/gemm.py index 23472a89..a4ca6296 100644 --- a/awq/modules/linear/gemm.py +++ b/awq/modules/linear/gemm.py @@ -3,13 +3,14 @@ from torch.autograd import Function from awq.utils.utils import get_best_device from awq.utils.packing_utils import dequantize_gemm +import warnings try: - import awq_ext # with CUDA kernels (AutoAWQ_kernels) - + import awq_ext # with CUDA kernels AWQ_INSTALLED = True -except: +except Exception as e: AWQ_INSTALLED = False + warnings.warn(f"AWQ extension could not be imported. Error: {e}") # Adapted from https://github.com/compressa-ai/AutoAWQ/tree/dev diff --git a/awq/modules/linear/gemv.py b/awq/modules/linear/gemv.py index 91cc9458..c3b45ee0 100644 --- a/awq/modules/linear/gemv.py +++ b/awq/modules/linear/gemv.py @@ -1,13 +1,13 @@ import torch import torch.nn as nn +import warnings try: import awq_ext # with CUDA kernels - AWQ_INSTALLED = True -except: +except Exception as e: AWQ_INSTALLED = False - + warnings.warn(f"AWQ extension could not be imported. Error: {e}") def make_divisible(c, divisor): return (c + divisor - 1) // divisor