Skip to content

Commit

Permalink
Fix script
Browse files Browse the repository at this point in the history
  • Loading branch information
lixiang007666 committed Jul 31, 2024
1 parent 4cb851b commit 535c62e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 31 deletions.
31 changes: 17 additions & 14 deletions onediff_diffusers_extensions/examples/quant_lora/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ python3 onediff_diffusers_extensions/examples/quant_lora/test.py \
```

Performance:
- Iterations per second: 7.03 it/s
- Time taken: 4.65 seconds
- Max used CUDA memory: 10.467 GiB
- Iterations per second: 8.49 it/s
- Time taken: 3.92 seconds
- Max used CUDA memory: 10.465GiB



### Using LoRA
Expand All @@ -22,39 +23,41 @@ python3 onediff_diffusers_extensions/examples/quant_lora/test.py \
```

Performance:
- Iterations per second: 6.28 it/s
- Time taken: 5.16 seconds
- Max used CUDA memory: 10.481 GiB
- Iterations per second: 8.53 it/s
- Time taken: 3.91 seconds
- Max used CUDA memory: 10.477GiB



### Compile

Run:
```
python3 onediff_diffusers_extensions/examples/quant_lora/test.py \
--compiler-config '{"mode": "max-optimize:max-autotune:low-precision:cache-all", "memory_format": "channels_last"}' \
--compiler-config '{"mode": "max-optimize:max-autotune:low-precision", "memory_format": "channels_last", "options": {"triton.fuse_attention_allow_fp16_reduction": false}}' \
--saved-image sd_lora_compile.png \
--use_lora
```

Performance:
- Iterations per second: 13.29 it/s
- Time taken: 2.61 seconds
- Max used CUDA memory: 11.477 GiB
- Iterations per second: 14.94 it/s
- Time taken: 2.29 seconds
- Max used CUDA memory: 11.475GiB



### Compiled with Quantization

Run:
```
python3 onediff_diffusers_extensions/examples/quant_lora/test.py \
--compiler-config '{"mode": "quant:max-optimize:max-autotune:low-precision", "memory_format": "channels_last"}' \
--compiler-config '{"mode": "max-optimize:max-autotune:low-precision", "memory_format": "channels_last", "options": {"triton.fuse_attention_allow_fp16_reduction": false}}' \
--quantize-config '{"quant_type": "int8_dynamic"}' \
--saved-image sd_lora_int8.png \
--use_lora
```

Performance:
- Iterations per second: 15.55 it/s
- Time taken: 2.22 seconds
- Max used CUDA memory: 8.804 GiB
- Iterations per second: 17.00 it/s
- Time taken: 2.04 seconds
- Max used CUDA memory: 8.808GiB
30 changes: 13 additions & 17 deletions onediff_diffusers_extensions/examples/quant_lora/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import json
import time

import torch

from diffusers import AutoPipelineForText2Image as pipeline_cls
from onediffx import compile_pipe, quantize_pipe
import torch


def parse_args():
Expand All @@ -20,14 +21,14 @@ def parse_args():
parser.add_argument(
"--lora-model-id",
type=str,
default="ostris/watercolor_style_lora_sdxl",
help="LoRA model identifier for fine-tuning weights."
default="minimaxir/sdxl-wrong-lora",
help="LoRA model identifier for fine-tuning weights.",
)
parser.add_argument(
"--lora-filename",
type=str,
default="watercolor_v1_sdxl.safetensors",
help="Filename for LoRA weights."
default="pytorch_lora_weights.bin",
help="Filename for LoRA weights.",
)
parser.add_argument(
"--compiler-config", type=str, help="JSON string for compiler config."
Expand All @@ -38,7 +39,7 @@ def parse_args():
parser.add_argument(
"--prompt",
type=str,
default="A girl smiling",
default="anime style, 1 girl, indoors, sitting on the sofa, living room, pink hair, white sock, blue eyes, from back, from above, face towards viewer, playing video games, holding controller, black silk, parted lips.",
help="Prompt for the image generation.",
)
parser.add_argument(
Expand All @@ -63,7 +64,7 @@ def parse_args():
help="Path to save the generated image.",
)
parser.add_argument(
"--seed", type=int, default=3, help="Seed for random number generation."
"--seed", type=int, default=888, help="Seed for random number generation."
)
parser.add_argument(
"--warmup-iterations",
Expand All @@ -74,12 +75,6 @@ def parse_args():
parser.add_argument(
"--use_lora", action="store_true", help="Use LoRA weights for the generation"
)
parser.add_argument(
"--cross-attention-kwargs",
type=str,
default='{"scale": 0.5}',
help="JSON string for cross-attention configuration.",
)
return parser.parse_args()


Expand All @@ -94,8 +89,10 @@ def __init__(self, model, compiler_config=None, quantize_config=None):

if args.use_lora:
print("Use LoRA...")
self.pipe.load_lora_weights(args.lora_model_id, weight_name=args.lora_filename)
# self.pipe.fuse_lora()
self.pipe.load_lora_weights(
args.lora_model_id, weight_name=args.lora_filename
)
self.pipe.fuse_lora()

self.pipe.to(device)

Expand Down Expand Up @@ -147,17 +144,16 @@ def quantize_pipe(self, pipe, quantize_config):
def main():
compiler_config = json.loads(args.compiler_config) if args.compiler_config else None
quantize_config = json.loads(args.quantize_config) if args.quantize_config else None
cross_attention_kwargs = json.loads(args.cross_attention_kwargs)

sd = SDGenerator(args.model, compiler_config, quantize_config)

gen_args = {
"prompt": args.prompt,
"negative_prompt": "wrong",
"num_inference_steps": args.num_inference_steps,
"height": args.height,
"width": args.width,
"guidance_scale": args.guidance_scale,
"cross_attention_kwargs": cross_attention_kwargs,
}

sd.warmup(gen_args, args.warmup_iterations)
Expand Down

0 comments on commit 535c62e

Please sign in to comment.