diff --git a/.gitignore b/.gitignore index 9120b9c..b221ea6 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ gfpgan enhance_face_test.py checkpoints tmp -train_stage2.py \ No newline at end of file +train_stage2.py +espnet \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/AniTalker.iml b/.idea/AniTalker.iml new file mode 100644 index 0000000..6b4d15b --- /dev/null +++ b/.idea/AniTalker.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..b9670a5 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..da2e240 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..0eb2651 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 1899962..16271a8 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,10 @@ ckpts/ [Explanation of Parameters for demo.py](md_docs/config.md) +## Run the demo on macOS + +[How to run on macOS](md_docs/run_on_macOS.md) + ### Main Inference Scripts (Hubert, Better Result 💪) - Recommended diff --git a/assets/models_huggingface.png b/assets/models_huggingface.png new file mode 100644 index 0000000..4ae31d9 Binary files /dev/null and b/assets/models_huggingface.png differ diff --git a/assets/results_run_on_macOS.png b/assets/results_run_on_macOS.png new file mode 100644 index 0000000..1ddbebf Binary files /dev/null and b/assets/results_run_on_macOS.png differ diff --git a/code/demo.py b/code/demo.py index 1843c0c..68b454e 100644 --- a/code/demo.py +++ b/code/demo.py @@ -205,7 +205,7 @@ def main(args): padding = np.tile(pose_obj[-1, :], (frame_end - pose_obj.shape[0], 1)) pose_obj = np.vstack((pose_obj, padding)) - pose_signal = torch.Tensor(pose_obj).unsqueeze(0).to(args.device) / 90 # 90 is for normalization here + pose_signal = torch.Tensor(pose_obj).unsqueeze(0).to(args.device)/ 90 # 90 is for normalization here else: yaw_signal = torch.zeros(1, frame_end, 1).to(args.device) + args.pose_yaw pitch_signal = torch.zeros(1, frame_end, 1).to(args.device) + args.pose_pitch @@ -292,4 +292,13 @@ def main(args): args = parser.parse_args() + # macOS Config + # Check if MPS is available + if torch.backends.mps.is_available(): + args.device = torch.device("mps") + print("MPS backend is available.") + # else: + # args.device = torch.device("cpu") + # print("MPS backend is not available. Using CPU instead.") + main(args) \ No newline at end of file diff --git a/code/diffusion/base.py b/code/diffusion/base.py index 2232de9..b87ad74 100644 --- a/code/diffusion/base.py +++ b/code/diffusion/base.py @@ -960,7 +960,14 @@ def _extract_into_tensor(arr, timesteps, broadcast_shape): dimension equal to the length of timesteps. :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims. """ - res = th.from_numpy(arr).to(device=timesteps.device)[timesteps].float() + + if th.backends.mps.is_available(): + arr = arr.astype(np.float32) + # Convert the numpy array to a tensor and then move to the device + res = th.from_numpy(arr).to(device=timesteps.device)[timesteps] + else: + res = th.from_numpy(arr).to(device=timesteps.device)[timesteps].float() + while len(res.shape) < len(broadcast_shape): res = res[..., None] return res.expand(broadcast_shape) diff --git a/code/networks/styledecoder.py b/code/networks/styledecoder.py index e2ea080..6ab37a9 100644 --- a/code/networks/styledecoder.py +++ b/code/networks/styledecoder.py @@ -1,4 +1,6 @@ import math +# import os +# os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" import torch from torch import nn from torch.nn import functional as F diff --git a/md_docs/run_on_macOS.md b/md_docs/run_on_macOS.md new file mode 100644 index 0000000..a15fcd1 --- /dev/null +++ b/md_docs/run_on_macOS.md @@ -0,0 +1,50 @@ +Successfully run on Macbook pro M3 Max (128GB + 8TB), Sonoma 14.6.1 + +# 1. Project Download + +``` +git clone https://github.com/X-LANCE/AniTalker.git +``` + +# 2. Dependencies Installation + +``` +conda create -n anitalker python==3.9.0 -c conda-forge +conda activate anitalker +conda install pytorch torchvision torchaudio -c pytorch +# install espnet +git clone https://github.com/espnet/espnet.git +cd espnet +pip install -e . +# install python_speech_features +pip install python_speech_features +conda install libffi +pip install -r requirements_macOS.txt +``` + +# 3. Assets Download + +``` +# Model +cd AniTalker +mkdir ckpts +Go to https://huggingface.co/taocode/anitalker_ckpts/tree/main +then download all six models in path ~/AniTalker/ckpts/ + +# npy Go to https://huggingface.co/datasets/taocode/anitalker_hubert_feature_samples/blob/main/monalisa.npy +then download monalisa.npy in path ~/AniTalker/test_demos/audios_hubert/ +``` +![](../assets/models_huggingface.png) +# 4. Run + +``` + PYTORCH_ENABLE_MPS_FALLBACK=1 python ./code/demo.py \ --infer_type 'hubert_audio_only' \ --stage1_checkpoint_path 'ckpts/stage1.ckpt' \ --stage2_checkpoint_path 'ckpts/stage2_audio_only_hubert.ckpt' \ --test_image_path 'test_demos/portraits/monalisa.jpg' \ --test_audio_path 'test_demos/audios/monalisa.wav' \ --test_hubert_path 'test_demos/audios_hubert/monalisa.npy' \ --result_path 'outputs/monalisa_hubert/' +``` +![](../assets/results_run_on_macOS.png) + +# 5. Modify log + +- dependencies: requirements.txt +- use mps insted of cuda +- change float64 to float32 +- PYTORCH_ENABLE_MPS_FALLBACK=1 diff --git a/outputs/monalisa_hubert/monalisa-monalisa.mp4 b/outputs/monalisa_hubert/monalisa-monalisa.mp4 index efdd70f..cd5290c 100644 Binary files a/outputs/monalisa_hubert/monalisa-monalisa.mp4 and b/outputs/monalisa_hubert/monalisa-monalisa.mp4 differ diff --git a/requirements.txt b/requirements.txt index abd5715..b2d6e2e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ pytorch-lightning==1.6.5 torchmetrics==0.5.0 torch==1.8.0 torchvision -scipy==1.5.4 +scipy numpy==1.19.5 tqdm espnet==202301 diff --git a/requirements_macOS.txt b/requirements_macOS.txt new file mode 100644 index 0000000..32b3d69 --- /dev/null +++ b/requirements_macOS.txt @@ -0,0 +1,7 @@ +pytorch-lightning==1.6.5 +torchmetrics==0.5.0 +scipy +numpy==1.19.5 +tqdm +moviepy +transformers==4.19.2 \ No newline at end of file