From 90601b82b194f59d044875553ae381cddfbff5f7 Mon Sep 17 00:00:00 2001 From: Fate_nihility <61657922+CodeLyokoscj@users.noreply.github.com> Date: Mon, 5 Aug 2024 12:12:05 +0800 Subject: [PATCH 1/2] Update webgui.py for gpu settings modify "cuda -> device" to provide unified format for changing gpu settings --- webgui.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/webgui.py b/webgui.py index 5e46781..b12eeb4 100644 --- a/webgui.py +++ b/webgui.py @@ -67,7 +67,7 @@ ############# model_init started ############# ## vae init -vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path).to("cuda", dtype=weight_dtype) +vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path).to(device, dtype=weight_dtype) ## reference net init reference_unet = UNet2DConditionModel.from_pretrained( @@ -101,7 +101,7 @@ denoising_unet.load_state_dict(torch.load(config.denoising_unet_path, map_location="cpu"), strict=False) ## face locator init -face_locator = FaceLocator(320, conditioning_channels=1, block_out_channels=(16, 32, 96, 256)).to(dtype=weight_dtype, device="cuda") +face_locator = FaceLocator(320, conditioning_channels=1, block_out_channels=(16, 32, 96, 256)).to(dtype=weight_dtype, device=device) face_locator.load_state_dict(torch.load(config.face_locator_path)) ## load audio processor params @@ -122,7 +122,7 @@ audio_guider=audio_processor, face_locator=face_locator, scheduler=scheduler, -).to("cuda", dtype=weight_dtype) +).to(device, dtype=weight_dtype) def select_face(det_bboxes, probs): ## max face from faces that the prob is above 0.8 @@ -170,7 +170,7 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac face_mask = cv2.resize(face_mask, (width, height)) ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]]) - face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0 + face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device=device).unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0 video = pipe( ref_image_pil, From a4e9973dd0fa3d59ffe3b9cc34885fe60d229501 Mon Sep 17 00:00:00 2001 From: Fate_nihility <61657922+CodeLyokoscj@users.noreply.github.com> Date: Mon, 5 Aug 2024 12:18:13 +0800 Subject: [PATCH 2/2] Update mutual_self_attention.py for gpu settings gpu settings have already written in webgui.py: pipe = Audio2VideoPipeline( vae=vae, reference_unet=reference_unet, denoising_unet=denoising_unet, audio_guider=audio_processor, face_locator=face_locator, scheduler=scheduler, ).to(device, dtype=weight_dtype) Thus, redundant device settings for tensors are not needed in mutual_self_attention.py (which may cause index error on tensors). --- src/models/mutual_self_attention.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/models/mutual_self_attention.py b/src/models/mutual_self_attention.py index b5d4a7f..e3e57e9 100644 --- a/src/models/mutual_self_attention.py +++ b/src/models/mutual_self_attention.py @@ -80,13 +80,11 @@ def register_reference_hooks( [1] * batch_size * num_images_per_prompt * 16 + [0] * batch_size * num_images_per_prompt * 16 ) - .to(device) .bool() ) else: uc_mask = ( torch.Tensor([0] * batch_size * num_images_per_prompt * 2) - .to(device) .bool() ) @@ -170,7 +168,6 @@ def hacked_basic_transformer_inner_forward( [1] * (hidden_states.shape[0] // 2) + [0] * (hidden_states.shape[0] // 2) ) - .to(device) .bool() ) hidden_states_c[_uc_mask] = (