From 8dc392e0b5a3ecf56f8d4430efa1b219e0fd1bf1 Mon Sep 17 00:00:00 2001 From: penghuailiang Date: Fri, 1 May 2020 20:59:12 +0800 Subject: [PATCH] code-optimus --- .gitignore | 1 + Assets/LipSync/Editor/LipSyncOfflineRecognizer.cs | 2 +- Assets/LipSync/Scripts/Core/LipSyncRecognizer.cs | 4 ++-- Assets/LipSync/Scripts/Core/LipSyncRuntimeRecognizer.cs | 2 +- Assets/LipSync/Scripts/Core/MathToolBox.cs | 8 ++++++-- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 7a29e89..6387a89 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ Assets/FMODBuild* Assets/Plugins* Assets/StreamingAssets* fmod_editor.log +**/cn* # Autogenerated VS/MD solution and project files *.unityproj diff --git a/Assets/LipSync/Editor/LipSyncOfflineRecognizer.cs b/Assets/LipSync/Editor/LipSyncOfflineRecognizer.cs index 2b7db49..1ba8680 100644 --- a/Assets/LipSync/Editor/LipSyncOfflineRecognizer.cs +++ b/Assets/LipSync/Editor/LipSyncOfflineRecognizer.cs @@ -41,7 +41,7 @@ public string[] RecognizeAllByAudioClip(AudioClip audioClip) { MathToolBox.Convolute(currentAudioSpectrum, gaussianFilter, MathToolBox.EPaddleType.Repeat, smoothedAudioSpectrum); MathToolBox.FindLocalLargestPeaks(smoothedAudioSpectrum, peakValues, peakPositions); - frequencyUnit = audioClip.frequency / 2 / windowSize; + frequencyUnit = audioClip.frequency / windowSize; for (int l = 0; l < formantArray.Length; ++l) { formantArray[l] = peakPositions[l] * frequencyUnit; diff --git a/Assets/LipSync/Scripts/Core/LipSyncRecognizer.cs b/Assets/LipSync/Scripts/Core/LipSyncRecognizer.cs index 1c9fd7c..1477899 100644 --- a/Assets/LipSync/Scripts/Core/LipSyncRecognizer.cs +++ b/Assets/LipSync/Scripts/Core/LipSyncRecognizer.cs @@ -11,9 +11,9 @@ public class LipSyncRecognizer protected const int FORMANT_COUNT = 1; protected string[] vowelsByFormantJP = { "i", "u", "e", "o", "a" }; - protected float[] vowelFormantFloorJP = { 0.0f, 250.0f, 300.0f, 450.0f, 600.0f }; + protected float[] vowelFormantFloorJP = { 0.0f, 500.0f, 600.0f, 900.0f, 1200.0f }; protected string[] vowelsByFormantCN = { "i", "v", "u", "e", "o", "a" }; - protected float[] vowelFormantFloorCN = { 0.0f, 100.0f, 250.0f, 300.0f, 450.0f, 600.0f }; + protected float[] vowelFormantFloorCN = { 0.0f, 200.0f, 500.0f, 600.0f, 900.0f, 1200.0f }; protected string[] currentVowels; protected float[] currentVowelFormantCeilValues; diff --git a/Assets/LipSync/Scripts/Core/LipSyncRuntimeRecognizer.cs b/Assets/LipSync/Scripts/Core/LipSyncRuntimeRecognizer.cs index 2e2665d..c3a4e00 100644 --- a/Assets/LipSync/Scripts/Core/LipSyncRuntimeRecognizer.cs +++ b/Assets/LipSync/Scripts/Core/LipSyncRuntimeRecognizer.cs @@ -26,7 +26,7 @@ private void Recognize(ref string result, int sampleRate) { MathToolBox.Convolute(playingAudioSpectrum, gaussianFilter, MathToolBox.EPaddleType.Repeat, smoothedAudioSpectrum); MathToolBox.FindLocalLargestPeaks(smoothedAudioSpectrum, peakValues, peakPositions); - frequencyUnit = sampleRate / 2 / windowSize; + frequencyUnit = sampleRate / windowSize; for (int i = 0; i < formantArray.Length; ++i) { formantArray[i] = peakPositions[i] * frequencyUnit; diff --git a/Assets/LipSync/Scripts/Core/MathToolBox.cs b/Assets/LipSync/Scripts/Core/MathToolBox.cs index 9adaf5d..dcf6cc4 100644 --- a/Assets/LipSync/Scripts/Core/MathToolBox.cs +++ b/Assets/LipSync/Scripts/Core/MathToolBox.cs @@ -80,7 +80,7 @@ public static void Convolute(float[] data, float[] filter, EPaddleType paddleTyp /// Source data. /// Array to store peak values. /// Array to store peak values' positions. - public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[] peakPosition) + public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[] peakPosition) { int peakNum = 0; float lastPeak = 0.0f; @@ -88,6 +88,7 @@ public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[] bool isIncreasing = false; bool isPeakIncreasing = false; + string str = ""; for (int i = 0; i < data.Length - 1; ++i) { if (data[i] < data[i + 1]) @@ -96,8 +97,9 @@ public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[] } else { - if (isIncreasing) + if (isIncreasing) { + str += (i * 86) + "-"; if (lastPeak < data[i]) // Peak found. 找到峰值, 一般fft窗口有两三个峰值 { isPeakIncreasing = true; @@ -121,11 +123,13 @@ public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[] isIncreasing = false; } + if (peakNum >= peakValue.Length) { break; } } + Debug.Log(str + " " + peakNum + " " + peakPosition[0] * 86); } ///