From 9c6377f57067fd06a1fabff2754c5f86ccb2da13 Mon Sep 17 00:00:00 2001 From: Johnson Date: Fri, 3 Jul 2020 20:35:44 +0800 Subject: [PATCH] Add code and guide on using custom images --- README.md | 13 ++ align_custom_faces.py | 147 +++++++++++++++++++++ interactive_demo_custom.py | 254 +++++++++++++++++++++++++++++++++++++ 3 files changed, 414 insertions(+) create mode 100644 align_custom_faces.py create mode 100644 interactive_demo_custom.py diff --git a/README.md b/README.md index bb80fb8d..835cdb11 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,19 @@ You can change the config using `-c` parameter. To run on `celeb-hq` in 256x256 However, for configs other then FFHQ, you need to obtain new principal direction vectors for the attributes. +To run the demo on custom images, please follow the steps and commands: + + mkdir custom + # Put your custom images in custom/ + align_custom_faces.py + # The cropped faces will be located at: + # dataset_samples/faces/realign_custom1024x1024 + interactive_demo_custom.py + +If the min/max of the slide bar isn't suitable for your custom image, modify the following line in `interactive_demo_custom.py` + + bimpy.slider_float(label, v, -40.0, 40.0) + ## Repository organization #### Running scripts diff --git a/align_custom_faces.py b/align_custom_faces.py new file mode 100644 index 00000000..0af8a52e --- /dev/null +++ b/align_custom_faces.py @@ -0,0 +1,147 @@ +# Copyright 2019-2020 Stanislav Pidhorskyi +# +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import os +import numpy as np +import dlib +from PIL import Image +import PIL +import scipy +import scipy.ndimage + +# lefteye_x lefteye_y righteye_x righteye_y nose_x nose_y leftmouth_x leftmouth_y rightmouth_x rightmouth_y +# 69 111 108 111 88 136 72 152 105 152 +# 44 51 83 51 63 76 47 92 80 92 + +use_1024 = True + + +def align(img, parts, dst_dir='realign_custom1024x1024', output_size=1024, transform_size=4096, item_idx=0, enable_padding=True): + # Parse landmarks. + lm = np.array(parts) + lm_chin = lm[0: 17] # left-right + lm_eyebrow_left = lm[17: 22] # left-right + lm_eyebrow_right = lm[22: 27] # left-right + lm_nose = lm[27: 31] # top-down + lm_nostrils = lm[31: 36] # top-down + lm_eye_left = lm[36: 42] # left-clockwise + lm_eye_right = lm[42: 48] # left-clockwise + lm_mouth_outer = lm[48: 60] # left-clockwise + lm_mouth_inner = lm[60: 68] # left-clockwise + + # Calculate auxiliary vectors. + eye_left = np.mean(lm_eye_left, axis=0) + eye_right = np.mean(lm_eye_right, axis=0) + eye_avg = (eye_left + eye_right) * 0.5 + eye_to_eye = eye_right - eye_left + mouth_left = lm_mouth_outer[0] + mouth_right = lm_mouth_outer[6] + mouth_avg = (mouth_left + mouth_right) * 0.5 + eye_to_mouth = mouth_avg - eye_avg + + # Choose oriented crop rectangle. + x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1] + x /= np.hypot(*x) + + if use_1024: + x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) + else: + x *= (np.hypot(*eye_to_eye) * 1.6410 + np.hypot(*eye_to_mouth) * 1.560) / 2.0 + + y = np.flipud(x) * [-1, 1] + + if use_1024: + c = eye_avg + eye_to_mouth * 0.1 + quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) + else: + c = eye_avg + eye_to_mouth * 0.317 + quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) + + qsize = np.hypot(*x) * 2 + + img = Image.fromarray(img) + + # Shrink. + shrink = int(np.floor(qsize / output_size * 0.5)) + if shrink > 1: + rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink))) + img = img.resize(rsize, PIL.Image.ANTIALIAS) + quad /= shrink + qsize /= shrink + + # Crop. + border = max(int(np.rint(qsize * 0.1)), 3) + crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), + int(np.ceil(max(quad[:, 1])))) + crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), + min(crop[3] + border, img.size[1])) + if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]: + img = img.crop(crop) + quad -= crop[0:2] + + # Pad. + pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), + int(np.ceil(max(quad[:, 1])))) + pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), + max(pad[3] - img.size[1] + border, 0)) + if enable_padding and max(pad) > border - 4: + pad = np.maximum(pad, int(np.rint(qsize * 0.3))) + img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect') + h, w, _ = img.shape + y, x, _ = np.ogrid[:h, :w, :1] + mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]), + 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3])) + blur = qsize * 0.02 + img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0) + img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0) + img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB') + quad += pad[:2] + + # Transform. + img = img.transform((transform_size, transform_size), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR) + if output_size < transform_size: + img = img.resize((output_size, output_size), PIL.Image.ANTIALIAS) + + # Save aligned image. + dst_subdir = dst_dir + os.makedirs(dst_subdir, exist_ok=True) + img.save(os.path.join(dst_subdir, '%05d.png' % item_idx)) + + +predictor_path = 'shape_predictor_68_face_landmarks.dat' + +detector = dlib.get_frontal_face_detector() +predictor = dlib.shape_predictor(predictor_path) + +item_idx = 0 + +for filename in os.listdir('custom'): + img = np.asarray(Image.open('custom/' + filename)) + if img.shape[2] == 4: + img = img[:, :, :3] + + dets = detector(img, 0) + print("Number of faces detected: {}".format(len(dets))) + + for i, d in enumerate(dets): + print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( + i, d.left(), d.top(), d.right(), d.bottom())) + + shape = predictor(img, d) + + parts = shape.parts() + + parts = [[part.x, part.y] for part in parts] + + if use_1024: + align(img, parts, dst_dir='dataset_samples/faces/realign_custom1024x1024', output_size=1024, transform_size=4098, item_idx=item_idx) + else: + align(img, parts, dst_dir='dataset_samples/faces/realign_custom128x128', output_size=128, transform_size=512, item_idx=item_idx) + + item_idx += 1 diff --git a/interactive_demo_custom.py b/interactive_demo_custom.py new file mode 100644 index 00000000..a4e45d11 --- /dev/null +++ b/interactive_demo_custom.py @@ -0,0 +1,254 @@ +# Copyright 2019-2020 Stanislav Pidhorskyi +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import torch.utils.data +from net import * +from model import Model +from launcher import run +from checkpointer import Checkpointer +from dlutils.pytorch import count_parameters +from defaults import get_cfg_defaults +import lreq + +from PIL import Image +import bimpy + + +lreq.use_implicit_lreq.set(True) + + +indices = [0, 1, 2, 3, 4, 10, 11, 17, 19] + +labels = ["gender", + "smile", + "attractive", + "wavy-hair", + "young", + "big lips", + "big nose", + "chubby", + "glasses", + ] + + +def sample(cfg, logger): + torch.cuda.set_device(0) + model = Model( + startf=cfg.MODEL.START_CHANNEL_COUNT, + layer_count=cfg.MODEL.LAYER_COUNT, + maxf=cfg.MODEL.MAX_CHANNEL_COUNT, + latent_size=cfg.MODEL.LATENT_SPACE_SIZE, + truncation_psi=cfg.MODEL.TRUNCATIOM_PSI, + truncation_cutoff=cfg.MODEL.TRUNCATIOM_CUTOFF, + mapping_layers=cfg.MODEL.MAPPING_LAYERS, + channels=cfg.MODEL.CHANNELS, + generator=cfg.MODEL.GENERATOR, + encoder=cfg.MODEL.ENCODER) + model.cuda(0) + model.eval() + model.requires_grad_(False) + + decoder = model.decoder + encoder = model.encoder + mapping_tl = model.mapping_tl + mapping_fl = model.mapping_fl + dlatent_avg = model.dlatent_avg + + logger.info("Trainable parameters generator:") + count_parameters(decoder) + + logger.info("Trainable parameters discriminator:") + count_parameters(encoder) + + arguments = dict() + arguments["iteration"] = 0 + + model_dict = { + 'discriminator_s': encoder, + 'generator_s': decoder, + 'mapping_tl_s': mapping_tl, + 'mapping_fl_s': mapping_fl, + 'dlatent_avg': dlatent_avg + } + + checkpointer = Checkpointer(cfg, + model_dict, + {}, + logger=logger, + save=False) + + extra_checkpoint_data = checkpointer.load() + + model.eval() + + layer_count = cfg.MODEL.LAYER_COUNT + + def encode(x): + Z, _ = model.encode(x, layer_count - 1, 1) + Z = Z.repeat(1, model.mapping_fl.num_layers, 1) + return Z + + def decode(x): + layer_idx = torch.arange(2 * layer_count)[np.newaxis, :, np.newaxis] + ones = torch.ones(layer_idx.shape, dtype=torch.float32) + coefs = torch.where(layer_idx < model.truncation_cutoff, ones, ones) + # x = torch.lerp(model.dlatent_avg.buff.data, x, coefs) + return model.decoder(x, layer_count - 1, 1, noise=True) + + path = 'dataset_samples/faces/realign_custom1024x1024' + + paths = list(os.listdir(path)) + paths.sort() + paths_backup = paths[:] + randomize = bimpy.Bool(True) + current_file = bimpy.String("") + + ctx = bimpy.Context() + + attribute_values = [bimpy.Float(0) for i in indices] + + W = [torch.tensor(np.load("principal_directions/direction_%d.npy" % i), dtype=torch.float32) for i in indices] + + rnd = np.random.RandomState(5) + + def loadNext(): + img = np.asarray(Image.open(path + '/' + paths[0])) + current_file.value = paths[0] + paths.pop(0) + if len(paths) == 0: + paths.extend(paths_backup) + + if img.shape[2] == 4: + img = img[:, :, :3] + im = img.transpose((2, 0, 1)) + x = torch.tensor(np.asarray(im, dtype=np.float32), device='cpu', requires_grad=True).cuda() / 127.5 - 1. + if x.shape[0] == 4: + x = x[:3] + + needed_resolution = model.decoder.layer_to_resolution[-1] + while x.shape[2] > needed_resolution: + x = F.avg_pool2d(x, 2, 2) + if x.shape[2] != needed_resolution: + x = F.adaptive_avg_pool2d(x, (needed_resolution, needed_resolution)) + + img_src = ((x * 0.5 + 0.5) * 255).type(torch.long).clamp(0, 255).cpu().type(torch.uint8).transpose(0, 2).transpose(0, 1).numpy() + + latents_original = encode(x[None, ...].cuda()) + latents = latents_original[0, 0].clone() + latents -= model.dlatent_avg.buff.data[0] + + for v, w in zip(attribute_values, W): + v.value = (latents * w).sum() + + for v, w in zip(attribute_values, W): + latents = latents - v.value * w + + return latents, latents_original, img_src + + def loadRandom(): + latents = rnd.randn(1, cfg.MODEL.LATENT_SPACE_SIZE) + lat = torch.tensor(latents).float().cuda() + dlat = mapping_fl(lat) + layer_idx = torch.arange(2 * layer_count)[np.newaxis, :, np.newaxis] + ones = torch.ones(layer_idx.shape, dtype=torch.float32) + coefs = torch.where(layer_idx < model.truncation_cutoff, ones, ones) + dlat = torch.lerp(model.dlatent_avg.buff.data, dlat, coefs) + x = decode(dlat)[0] + img_src = ((x * 0.5 + 0.5) * 255).type(torch.long).clamp(0, 255).cpu().type(torch.uint8).transpose(0, 2).transpose(0, 1).numpy() + latents_original = dlat + latents = latents_original[0, 0].clone() + latents -= model.dlatent_avg.buff.data[0] + + for v, w in zip(attribute_values, W): + v.value = (latents * w).sum() + + for v, w in zip(attribute_values, W): + latents = latents - v.value * w + + return latents, latents_original, img_src + + latents, latents_original, img_src = loadNext() + + ctx.init(1800, 1600, "Styles") + + def update_image(w, latents_original): + with torch.no_grad(): + w = w + model.dlatent_avg.buff.data[0] + w = w[None, None, ...].repeat(1, model.mapping_fl.num_layers, 1) + + layer_idx = torch.arange(model.mapping_fl.num_layers)[np.newaxis, :, np.newaxis] + cur_layers = (7 + 1) * 2 + mixing_cutoff = cur_layers + styles = torch.where(layer_idx < mixing_cutoff, w, latents_original) + + x_rec = decode(styles) + resultsample = ((x_rec * 0.5 + 0.5) * 255).type(torch.long).clamp(0, 255) + resultsample = resultsample.cpu()[0, :, :, :] + return resultsample.type(torch.uint8).transpose(0, 2).transpose(0, 1) + + im_size = 2 ** (cfg.MODEL.LAYER_COUNT + 1) + im = update_image(latents, latents_original) + print(im.shape) + im = bimpy.Image(im) + + display_original = True + + seed = 0 + + while not ctx.should_close(): + with ctx: + new_latents = latents + sum([v.value * w for v, w in zip(attribute_values, W)]) + + if display_original: + im = bimpy.Image(img_src) + else: + im = bimpy.Image(update_image(new_latents, latents_original)) + + bimpy.begin("Principal directions") + bimpy.columns(2) + bimpy.set_column_width(0, im_size + 20) + bimpy.image(im) + bimpy.next_column() + + for v, label in zip(attribute_values, labels): + bimpy.slider_float(label, v, -40.0, 40.0) + + bimpy.checkbox("Randomize noise", randomize) + + if randomize.value: + seed += 1 + + torch.manual_seed(seed) + + if bimpy.button('Next'): + latents, latents_original, img_src = loadNext() + display_original = True + if bimpy.button('Display Reconstruction'): + display_original = False + if bimpy.button('Generate random'): + latents, latents_original, img_src = loadRandom() + display_original = False + + if bimpy.input_text("Current file", current_file, 64) and os.path.exists(path + '/' + current_file.value): + paths.insert(0, current_file.value) + latents, latents_original, img_src = loadNext() + + bimpy.end() + + +if __name__ == "__main__": + gpu_count = 1 + run(sample, get_cfg_defaults(), description='ALAE-interactive', default_config='configs/ffhq.yaml', + world_size=gpu_count, write_log=False)