Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

faceswap with inpaint #89

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions faceswap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# !pip install opencv-python transformers accelerate insightface
from diffusers.utils import load_image
from diffusers.models import ControlNetModel
from diffusers import LCMScheduler
import math
import cv2
import torch
import numpy as np
from PIL import Image
from insightface.app import FaceAnalysis
from pipeline_stable_diffusion_xl_instantid import draw_kps
from pipeline_stable_diffusion_xl_instantid_inpaint import StableDiffusionXLInstantIDInpaintPipeline

from PIL import Image

def resize_img(input_image, max_side=1280, min_side=1024, size=None,
pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64):

w, h = input_image.size
if size is not None:
w_resize_new, h_resize_new = size
else:
ratio = min_side / min(h, w)
w, h = round(ratio*w), round(ratio*h)
ratio = max_side / max(h, w)
input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
input_image = input_image.resize([w_resize_new, h_resize_new], mode)

if pad_to_max_side:
res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
offset_x = (max_side - w_resize_new) // 2
offset_y = (max_side - h_resize_new) // 2
res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
input_image = Image.fromarray(res)
return input_image

def prepare_average_embeding(face_list):
face_emebdings = []
for face_path in face_list:
face_image = load_image(face_path)
face_image = resize_img(face_image)
face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1] # only use the maximum face
face_emb = face_info['embedding']
face_emebdings.append(face_emb)

return np.concatenate(face_emebdings)

def prepareMaskAndPoseAndControlImage(pose_image, face_info, padding = 50, mask_grow = 20, resize = True):
if padding < mask_grow:
raise ValueError('mask_grow cannot be greater than padding')

kps = face_info['kps']
width, height = pose_image.size

x1, y1, x2, y2 = face_info['bbox']
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

# check if image can contain padding & mask
m_x1 = max(0, x1 - mask_grow)
m_y1 = max(0, y1 - mask_grow)
m_x2 = min(width, x2 + mask_grow)
m_y2 = min(height, y2 + mask_grow)

m_x1, m_y1, m_x2, m_y2 = int(m_x1), int(m_y1), int(m_x2), int(m_y2)

p_x1 = max(0, x1 - padding)
p_y1 = max(0, y1 - padding)
p_x2 = min(width, x2 + padding)
p_y2 = min(height,y2 + padding)

p_x1, p_y1, p_x2, p_y2 = int(p_x1), int(p_y1), int(p_x2), int(p_y2)

# mask
mask = np.zeros([height, width, 3])
mask[m_y1:m_y2, m_x1:m_x2] = 255
mask = mask[p_y1:p_y2, p_x1:p_x2]
mask = Image.fromarray(mask.astype(np.uint8))

image = np.array(pose_image)[p_y1:p_y2, p_x1:p_x2]
image = Image.fromarray(image.astype(np.uint8))

# resize image and KPS
original_width, original_height = image.size
kps -= [p_x1, p_y1]
if resize:
mask = resize_img(mask)
image = resize_img(image)
new_width, new_height = image.size
kps *= [new_width / original_width, new_height / original_height]
control_image = draw_kps(image, kps)

# (mask, pose, control PIL images), (original positon face + padding: x, y, w, h)
return (mask, image, control_image), (p_x1, p_y1, original_width, original_height)

if __name__ == '__main__':

app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))

# Path to InstantID models
face_adapter = f'./checkpoints/ip-adapter.bin'
controlnet_path = f'./checkpoints/ControlNetModel'

# Load pipeline
controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)

# LCM Lora path ( https://huggingface.co/latent-consistency/lcm-lora-sdxl )
lora = f'loras/pytorch_lora_weights.safetensors'

# You can use any base XL model (do not use models for inpainting!)
base_model_path = 'stabilityai/stable-diffusion-xl-base-1.0'

pipe = StableDiffusionXLInstantIDInpaintPipeline.from_pretrained(
base_model_path,
controlnet=controlnet,
torch_dtype=torch.float16
)
pipe.cuda()

pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)

# load adapter
pipe.load_ip_adapter_instantid(face_adapter)
pipe.load_lora_weights(lora)
pipe.fuse_lora()

# prepare images
face_emb = prepare_average_embeding([
'examples/kaifu_resize.png', # ..., ...
])

pose_image = load_image('examples/musk_resize.jpeg')
face_info = app.get(cv2.cvtColor(np.array(pose_image), cv2.COLOR_RGB2BGR))
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1] # only use the maximum face

images, position = prepareMaskAndPoseAndControlImage(
pose_image,
face_info,
60, # padding
40, # grow mask
True # resize
)
mask, pose_image_preprocessed, control_image = images

prompt = ''
# negative_prompt is used only when guidance_scale > 1
# https://huggingface.co/docs/diffusers/api/pipelines/controlnet_sdxl
negative_prompt = '(lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured (lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch,deformed, mutated, cross-eyed, ugly, disfigured'
steps = 3
mask_strength = 0.7 # values between 0 - 1

image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
image_embeds=face_emb,
control_image=control_image,
image=pose_image_preprocessed,
mask_image=mask,
controlnet_conditioning_scale=0.8,
strength=mask_strength,
ip_adapter_scale=0.3, # keep it low
num_inference_steps=int(math.ceil(steps / mask_strength)),
guidance_scale=0.0
).images[0]

# processed face with padding
image.save('face.jpg')

# integrate cropped result into the pose image
x, y, w, h = position

image = image.resize((w, h))
pose_image.paste(image, (x, y))
pose_image.save('result.jpg')
Loading