nftblackmagic · eltociear · Nov 24, 2024 · Nov 24, 2024 · Nov 24, 2024 · Nov 24, 2024
diff --git a/README.md b/README.md
@@ -27,11 +27,14 @@ Hugging Face: 🤗 [catvton-flux-alpha](https://huggingface.co/xiaozaa/catvton-f
 The model weights are trained on the [VITON-HD](https://github.com/shadow2496/VITON-HD) dataset.
 
 ## Prerequisites
+Make sure you are running the code with VRAM >= 40GB. (I run all my experiments on a 80GB GPU, lower VRAM will cause OOM error. Will support lower VRAM in the future.)
+
 ```bash
 bash
 conda create -n flux python=3.10
 conda activate flux
 pip install -r requirements.txt
+huggingface-cli login
 ```
 
 ## Usage
@@ -42,13 +45,19 @@ python tryon_inference.py \
 --image ./example/person/00008_00.jpg \
 --mask ./example/person/00008_00_mask.png \
 --garment ./example/garment/00034_00.jpg \
---seed 42
+--seed 42 \
+--output_tryon test.png \
+--steps 30
 ```
 
 Run the following command to start a gradio demo:
 ```bash
 python app.py
 ```
+Gradio demo:
+
+<!-- Option 2: Using a thumbnail linked to the video -->
+[![Demo](example/github.jpg)](https://github.com/user-attachments/assets/e1e69dbf-f8a8-4f34-a84a-e7be5b3d0aec)
 
 
 ## TODO:
@@ -82,4 +91,4 @@ Thanks to [dingkang](https://github.com/dingkwang) [MoonBlvd](https://github.com
 
 ## License
 - The code is licensed under the MIT License.
-- The model weights have the same license as Flux.1 Fill and VITON-HD.
+- The model weights have the same license as Flux.1 Fill and VITON-HD.
diff --git a/app.py b/app.py
@@ -66,7 +66,7 @@ def create_demo():
                     image_input = gr.ImageMask(
                         label="Model Image (Draw mask where garment should go)", 
                         type="pil",
-                        height=576,
+                        height=600,
                     )
                     gr.Examples(
                         examples=[
@@ -80,7 +80,7 @@ def create_demo():
                         label="Person Images",
                     )
                 with gr.Column():
-                    garment_input = gr.Image(label="Garment Image", type="pil", height=576)
+                    garment_input = gr.Image(label="Garment Image", type="pil", height=600)
                     gr.Examples(
                         examples=[
                             ["./example/garment/04564_00.jpg"],

diff --git a/example/github.jpg b/example/github.jpg
diff --git a/example/github.mp4 b/example/github.mp4
diff --git a/requirements.txt b/requirements.txt
@@ -24,7 +24,6 @@ fsspec==2024.6.1
 gitdb==4.0.11
 GitPython==3.1.43
 hjson==3.1.0
-huggingface-hub==0.24.5
 humanfriendly==10.0
 idna==3.7
 importlib_metadata==8.2.0
@@ -97,4 +96,5 @@ bitsandbytes==0.44.1
 gradio==5.6.0
 gradio_client==1.4.3
 prodigyopt
+huggingface-hub
 git+https://github.com/huggingface/diffusers.git
diff --git a/test.png b/test.png
diff --git a/tryon_inference.py b/tryon_inference.py
@@ -42,7 +42,7 @@ def run_inference(
     ])
 
     # Load and process images
-    print("image_path", image_path)
+    # print("image_path", image_path)
     image = load_image(image_path).convert("RGB").resize(size)
     mask = load_image(mask_path).convert("RGB").resize(size)
     garment = load_image(garment_path).convert("RGB").resize(size)
@@ -79,7 +79,6 @@ def run_inference(
     width = size[0]
     garment_result = result.crop((0, 0, width, size[1]))
     tryon_result = result.crop((width, 0, width * 2, size[1]))
-
 
     return garment_result, tryon_result
 
@@ -88,13 +87,13 @@ def main():
     parser.add_argument('--image', required=True, help='Path to the model image')
     parser.add_argument('--mask', required=True, help='Path to the agnostic mask')
     parser.add_argument('--garment', required=True, help='Path to the garment image')
-    parser.add_argument('--output-garment', default='flux_inpaint_garment.png', help='Output path for garment result')
-    parser.add_argument('--output-tryon', default='flux_inpaint_tryon.png', help='Output path for try-on result')
+    parser.add_argument('--output_garment', default='flux_inpaint_garment.png', help='Output path for garment result')
+    parser.add_argument('--output_tryon', default='flux_inpaint_tryon.png', help='Output path for try-on result')
     parser.add_argument('--steps', type=int, default=50, help='Number of inference steps')
-    parser.add_argument('--guidance-scale', type=float, default=30, help='Guidance scale')
+    parser.add_argument('--guidance_scale', type=float, default=30, help='Guidance scale')
     parser.add_argument('--seed', type=int, default=0, help='Random seed')
-    parser.add_argument('--width', type=int, default=768, help='Width')
-    parser.add_argument('--height', type=int, default=576, help='Height')
+    parser.add_argument('--width', type=int, default=576, help='Width')
+    parser.add_argument('--height', type=int, default=768, help='Height')
 
     args = parser.parse_args()
 
@@ -104,18 +103,13 @@ def main():
         image_path=args.image,
         mask_path=args.mask,
         garment_path=args.garment,
-        output_garment_path=args.output_garment,
-        output_tryon_path=args.output_tryon,
         num_steps=args.steps,
         guidance_scale=args.guidance_scale,
         seed=args.seed,
         size=(args.width, args.height)
     )
-    output_garment_path=args.output_garment,
-    output_tryon_path=args.output_tryon,
+    output_tryon_path=args.output_tryon
 
-    if output_garment_path is not None:
-        garment_result.save(output_garment_path)
     tryon_result.save(output_tryon_path)
 
     print("Successfully saved garment and try-on images")