diff --git a/.gitignore b/.gitignore
index 7a29e89..6387a89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,7 @@ Assets/FMODBuild*
Assets/Plugins*
Assets/StreamingAssets*
fmod_editor.log
+**/cn*
# Autogenerated VS/MD solution and project files
*.unityproj
diff --git a/Assets/LipSync/Editor/LipSyncOfflineRecognizer.cs b/Assets/LipSync/Editor/LipSyncOfflineRecognizer.cs
index 2b7db49..1ba8680 100644
--- a/Assets/LipSync/Editor/LipSyncOfflineRecognizer.cs
+++ b/Assets/LipSync/Editor/LipSyncOfflineRecognizer.cs
@@ -41,7 +41,7 @@ public string[] RecognizeAllByAudioClip(AudioClip audioClip)
{
MathToolBox.Convolute(currentAudioSpectrum, gaussianFilter, MathToolBox.EPaddleType.Repeat, smoothedAudioSpectrum);
MathToolBox.FindLocalLargestPeaks(smoothedAudioSpectrum, peakValues, peakPositions);
- frequencyUnit = audioClip.frequency / 2 / windowSize;
+ frequencyUnit = audioClip.frequency / windowSize;
for (int l = 0; l < formantArray.Length; ++l)
{
formantArray[l] = peakPositions[l] * frequencyUnit;
diff --git a/Assets/LipSync/Scripts/Core/LipSyncRecognizer.cs b/Assets/LipSync/Scripts/Core/LipSyncRecognizer.cs
index 1c9fd7c..1477899 100644
--- a/Assets/LipSync/Scripts/Core/LipSyncRecognizer.cs
+++ b/Assets/LipSync/Scripts/Core/LipSyncRecognizer.cs
@@ -11,9 +11,9 @@ public class LipSyncRecognizer
protected const int FORMANT_COUNT = 1;
protected string[] vowelsByFormantJP = { "i", "u", "e", "o", "a" };
- protected float[] vowelFormantFloorJP = { 0.0f, 250.0f, 300.0f, 450.0f, 600.0f };
+ protected float[] vowelFormantFloorJP = { 0.0f, 500.0f, 600.0f, 900.0f, 1200.0f };
protected string[] vowelsByFormantCN = { "i", "v", "u", "e", "o", "a" };
- protected float[] vowelFormantFloorCN = { 0.0f, 100.0f, 250.0f, 300.0f, 450.0f, 600.0f };
+ protected float[] vowelFormantFloorCN = { 0.0f, 200.0f, 500.0f, 600.0f, 900.0f, 1200.0f };
protected string[] currentVowels;
protected float[] currentVowelFormantCeilValues;
diff --git a/Assets/LipSync/Scripts/Core/LipSyncRuntimeRecognizer.cs b/Assets/LipSync/Scripts/Core/LipSyncRuntimeRecognizer.cs
index 2e2665d..c3a4e00 100644
--- a/Assets/LipSync/Scripts/Core/LipSyncRuntimeRecognizer.cs
+++ b/Assets/LipSync/Scripts/Core/LipSyncRuntimeRecognizer.cs
@@ -26,7 +26,7 @@ private void Recognize(ref string result, int sampleRate)
{
MathToolBox.Convolute(playingAudioSpectrum, gaussianFilter, MathToolBox.EPaddleType.Repeat, smoothedAudioSpectrum);
MathToolBox.FindLocalLargestPeaks(smoothedAudioSpectrum, peakValues, peakPositions);
- frequencyUnit = sampleRate / 2 / windowSize;
+ frequencyUnit = sampleRate / windowSize;
for (int i = 0; i < formantArray.Length; ++i)
{
formantArray[i] = peakPositions[i] * frequencyUnit;
diff --git a/Assets/LipSync/Scripts/Core/MathToolBox.cs b/Assets/LipSync/Scripts/Core/MathToolBox.cs
index 9adaf5d..dcf6cc4 100644
--- a/Assets/LipSync/Scripts/Core/MathToolBox.cs
+++ b/Assets/LipSync/Scripts/Core/MathToolBox.cs
@@ -80,7 +80,7 @@ public static void Convolute(float[] data, float[] filter, EPaddleType paddleTyp
/// Source data.
/// Array to store peak values.
/// Array to store peak values' positions.
- public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[] peakPosition)
+ public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[] peakPosition)
{
int peakNum = 0;
float lastPeak = 0.0f;
@@ -88,6 +88,7 @@ public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[]
bool isIncreasing = false;
bool isPeakIncreasing = false;
+ string str = "";
for (int i = 0; i < data.Length - 1; ++i)
{
if (data[i] < data[i + 1])
@@ -96,8 +97,9 @@ public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[]
}
else
{
- if (isIncreasing)
+ if (isIncreasing)
{
+ str += (i * 86) + "-";
if (lastPeak < data[i]) // Peak found. 找到峰值, 一般fft窗口有两三个峰值
{
isPeakIncreasing = true;
@@ -121,11 +123,13 @@ public static void FindLocalLargestPeaks(float[] data, float[] peakValue, int[]
isIncreasing = false;
}
+
if (peakNum >= peakValue.Length)
{
break;
}
}
+ Debug.Log(str + " " + peakNum + " " + peakPosition[0] * 86);
}
///