Skip to content

Commit

Permalink
support max_memory to specify mem usage for each GPU (#460)
Browse files Browse the repository at this point in the history
  • Loading branch information
laoda513 authored May 2, 2024
1 parent 33af761 commit 12581fb
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 0 deletions.
2 changes: 2 additions & 0 deletions awq/models/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def from_quantized(
batch_size=1,
safetensors=True,
device_map="balanced",
max_memory=None,
offload_folder=None,
download_kwargs=None,
**config_kwargs,
Expand All @@ -108,6 +109,7 @@ def from_quantized(
use_exllama_v2=use_exllama_v2,
safetensors=safetensors,
device_map=device_map,
max_memory=max_memory,
offload_folder=offload_folder,
download_kwargs=download_kwargs,
**config_kwargs,
Expand Down
7 changes: 7 additions & 0 deletions awq/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,12 @@ def from_quantized(
"A device map that will be passed onto the model loading method from transformers."
),
] = "balanced",
max_memory: Annotated[
Dict[Union[int, str], Union[int, str]],
Doc(
'A dictionary device identifier to maximum memory which will be passed onto the model loading method from transformers. For example:{0: "4GB",1: "10GB"'
),
] = None,
offload_folder: Annotated[
str,
Doc("The folder ot offload the model to."),
Expand Down Expand Up @@ -449,6 +455,7 @@ def from_quantized(
model,
checkpoint=model_weights_path,
device_map=device_map,
max_memory=max_memory,
no_split_module_classes=[self.layer_type],
offload_folder=offload_folder,
dtype=torch_dtype,
Expand Down

0 comments on commit 12581fb

Please sign in to comment.