diff --git a/.gitignore b/.gitignore index f021ffa..c887a25 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,6 @@ Thumbs.db # Gradio cache .gradio/example/github.mp4 + +aws/ +checkpoints/ \ No newline at end of file diff --git a/README.md b/README.md index b246329..41b9f08 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,9 @@ Running it now on website: [CATVTON-FLUX-TRY-ON](https://huggingface.co/spaces/x --- **Latest Achievement** +(2024/12/6): +- Released a new weights for tryoff. The model named [cat-tryoff-flux](https://huggingface.co/xiaozaa/cat-tryoff-flux) can extract and reconstruct the front view of clothing items from images of people wearing them. [Showcase examples](#try-off-examples) is here. + (2024/12/1): - Community comfyui support [here](https://github.com/lujiazho/ComfyUI-CatvtonFluxWrapper). Thanks to [lujiazho](https://github.com/lujiazho) @@ -43,17 +46,32 @@ Running it now on website: [CATVTON-FLUX-TRY-ON](https://huggingface.co/spaces/x --- ## Showcase + +### Try-on examples | Original | Garment | Result | |----------|---------|---------| | ![Original](example/person/1.jpg) | ![Garment](example/garment/00035_00.jpg) | ![Result](example/result/1.png) | | ![Original](example/person/1.jpg) | ![Garment](example/garment/04564_00.jpg) | ![Result](example/result/2.png) | | ![Original](example/person/00008_00.jpg) | ![Garment](example/garment/00034_00.jpg) | ![Result](example/result/3.png) | +### Try-off examples +| Original clothed model | Restored garment result | +|------------------------|------------------------| +| ![Original](example/person/00055_00.jpg) | ![Restored garment result](example/tryoff_result/restored_garment2.png) | +| ![Original](example/person/00064_00.jpg) | ![Restored garment result](example/tryoff_result/restored_garment4.png) | +| ![Original](example/person/00069_00.jpg) | ![Restored garment result](example/tryoff_result/restored_garment6.png) | + + ## Model Weights +### Tryon Fine-tuning weights in Hugging Face: 🤗 [catvton-flux-alpha](https://huggingface.co/xiaozaa/catvton-flux-alpha) LORA weights in Hugging Face: 🤗 [catvton-flux-lora-alpha](https://huggingface.co/xiaozaa/catvton-flux-lora-alpha) +### Tryoff +Fine-tuning weights in Hugging Face: 🤗 [cat-tryoff-flux](https://huggingface.co/xiaozaa/cat-tryoff-flux) + +### Dataset The model weights are trained on the [VITON-HD](https://github.com/shadow2496/VITON-HD) dataset. ## Prerequisites @@ -69,6 +87,19 @@ huggingface-cli login ## Usage +### Tryoff +Run the following command to restore the front side of the garment from the clothed model image: +```bash +python tryoff_inference.py \ +--image ./example/person/00069_00.jpg \ +--mask ./example/person/00069_00_mask.png \ +--seed 41 \ +--output_tryon test_original.png \ +--output_garment restored_garment6.png \ +--steps 30 +``` + +### Tryon Run the following command to try on an image: LORA version: @@ -104,7 +135,7 @@ python app_no_lora.py ``` Gradio demo: - +Hugging Face: 🤗 [CATVTON-FLUX-TRY-ON](https://huggingface.co/spaces/xiaozaa/catvton-flux-try-on) [![Demo](example/github.jpg)](https://upcdn.io/FW25b7k/raw/uploads/github.mp4) @@ -114,8 +145,8 @@ Gradio demo: - [x] Add gradio demo - [x] Release updated weights with better performance - [x] Train a smaller model -- [ ] Support comfyui - +- [x] Support comfyui +- [x] Release tryoff weights ## Citation ```bibtex diff --git a/example/tryoff_result/restored_garment1.png b/example/tryoff_result/restored_garment1.png new file mode 100644 index 0000000..1756e39 Binary files /dev/null and b/example/tryoff_result/restored_garment1.png differ diff --git a/example/tryoff_result/restored_garment2.png b/example/tryoff_result/restored_garment2.png new file mode 100644 index 0000000..d718319 Binary files /dev/null and b/example/tryoff_result/restored_garment2.png differ diff --git a/example/tryoff_result/restored_garment3.png b/example/tryoff_result/restored_garment3.png new file mode 100644 index 0000000..ec46806 Binary files /dev/null and b/example/tryoff_result/restored_garment3.png differ diff --git a/example/tryoff_result/restored_garment4.png b/example/tryoff_result/restored_garment4.png new file mode 100644 index 0000000..5b3229e Binary files /dev/null and b/example/tryoff_result/restored_garment4.png differ diff --git a/example/tryoff_result/restored_garment5.png b/example/tryoff_result/restored_garment5.png new file mode 100644 index 0000000..4c17388 Binary files /dev/null and b/example/tryoff_result/restored_garment5.png differ diff --git a/example/tryoff_result/restored_garment6.png b/example/tryoff_result/restored_garment6.png new file mode 100644 index 0000000..fd22a51 Binary files /dev/null and b/example/tryoff_result/restored_garment6.png differ diff --git a/requirements.txt b/requirements.txt index a196962..7e2af66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ sentencepiece peft==0.13.2 huggingface-hub spaces +protobuf diff --git a/test.png b/test.png deleted file mode 100644 index 11189c4..0000000 Binary files a/test.png and /dev/null differ diff --git a/tryoff.sh b/tryoff.sh new file mode 100644 index 0000000..e53764b --- /dev/null +++ b/tryoff.sh @@ -0,0 +1,7 @@ +python tryoff_inference.py \ +--image ./example/person/00069_00.jpg \ +--mask ./example/person/00069_00_mask.png \ +--seed 41 \ +--output_tryon test_original.png \ +--output_garment restored_garment6.png \ +--steps 30 \ No newline at end of file diff --git a/tryoff_inference.py b/tryoff_inference.py new file mode 100644 index 0000000..6f4d8c6 --- /dev/null +++ b/tryoff_inference.py @@ -0,0 +1,117 @@ +import argparse +import torch +from diffusers.utils import load_image, check_min_version +from diffusers import FluxPriorReduxPipeline, FluxFillPipeline +from diffusers import FluxTransformer2DModel +import numpy as np +from torchvision import transforms + +def run_inference( + image_path, + mask_path, + size=(576, 768), + num_steps=50, + guidance_scale=30, + seed=42, + pipe=None +): + # Build pipeline + if pipe is None: + transformer = FluxTransformer2DModel.from_pretrained( + "xiaozaa/cat-tryoff-flux", + torch_dtype=torch.bfloat16 + ) + pipe = FluxFillPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", + transformer=transformer, + torch_dtype=torch.bfloat16 + ).to("cuda") + else: + pipe.to("cuda") + + pipe.transformer.to(torch.bfloat16) + + # Add transform + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.5], [0.5]) # For RGB images + ]) + mask_transform = transforms.Compose([ + transforms.ToTensor() + ]) + + # Load and process images + # print("image_path", image_path) + image = load_image(image_path).convert("RGB").resize(size) + mask = load_image(mask_path).convert("RGB").resize(size) + + # Transform images using the new preprocessing + image_tensor = transform(image) + mask_tensor = mask_transform(mask)[:1] # Take only first channel + garment_tensor = torch.zeros_like(image_tensor) + image_tensor = image_tensor * mask_tensor + + # Create concatenated images + inpaint_image = torch.cat([garment_tensor, image_tensor], dim=2) # Concatenate along width + garment_mask = torch.zeros_like(mask_tensor) + extended_mask = torch.cat([1 - garment_mask, garment_mask], dim=2) + + prompt = f"The pair of images highlights a clothing and its styling on a model, high resolution, 4K, 8K; " \ + f"[IMAGE1] Detailed product shot of a clothing" \ + f"[IMAGE2] The same cloth is worn by a model in a lifestyle setting." + + generator = torch.Generator(device="cuda").manual_seed(seed) + + result = pipe( + height=size[1], + width=size[0] * 2, + image=inpaint_image, + mask_image=extended_mask, + num_inference_steps=num_steps, + generator=generator, + max_sequence_length=512, + guidance_scale=guidance_scale, + prompt=prompt, + ).images[0] + + # Split and save results + width = size[0] + garment_result = result.crop((0, 0, width, size[1])) + tryon_result = result.crop((width, 0, width * 2, size[1])) + + return garment_result, tryon_result + +def main(): + parser = argparse.ArgumentParser(description='Run FLUX virtual try-on inference') + parser.add_argument('--image', required=True, help='Path to the model image') + parser.add_argument('--mask', required=True, help='Path to the agnostic mask') + parser.add_argument('--output_garment', default='flux_inpaint_garment.png', help='Output path for garment result') + parser.add_argument('--output_tryon', default='flux_inpaint_tryon.png', help='Output path for try-on result') + parser.add_argument('--steps', type=int, default=50, help='Number of inference steps') + parser.add_argument('--guidance_scale', type=float, default=30, help='Guidance scale') + parser.add_argument('--seed', type=int, default=0, help='Random seed') + parser.add_argument('--width', type=int, default=576, help='Width') + parser.add_argument('--height', type=int, default=768, help='Height') + + args = parser.parse_args() + + check_min_version("0.30.2") + + garment_result, tryon_result = run_inference( + image_path=args.image, + mask_path=args.mask, + num_steps=args.steps, + guidance_scale=args.guidance_scale, + seed=args.seed, + size=(args.width, args.height) + ) + output_tryon_path=args.output_tryon + output_garment_path=args.output_garment + + tryon_result.save(output_tryon_path) + garment_result.save(output_garment_path) + + print("Successfully saved garment and try-on images") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tryon.sh b/tryon.sh new file mode 100644 index 0000000..0237787 --- /dev/null +++ b/tryon.sh @@ -0,0 +1,7 @@ +python tryon_inference.py \ +--image ./example/person/00008_00.jpg \ +--mask ./example/person/00008_00_mask.png \ +--garment ./example/garment/00034_00.jpg \ +--seed 42 \ +--output_tryon test.png \ +--steps 30 \ No newline at end of file