Automatic lip syncing detect human voice and set voice frequency ?

Question

I'm new in Unity 3d, And developing one App. In that there is features of "automatic" lip syncing.

I'm following below tutorial

http://answers.unity3d.com/questions/139323/any-way-of-quotautomaticquot-lip-syncing.html

And look at below my code

 using UnityEngine;
 using System.Collections;
 
 public class  lipmovement2: MonoBehaviour
 {
 
     // Use this for initialization
 
     /*Class for implementing Lips Syncronisation*/
 
     public AudioClip source_clip;
     public float[] freqData;
     int nSamples = 256;
     int fMax = 24000;
 
     public Transform upmouth0_M, upmouth01_L, upmouth02_R, downmouth1_M, downmouth11_L, downmouth12_R;
     float volume = 1000;
 //    float freqLow = 200;
 //    float freqHigh = 800;
     //value change
 
     float freqLow = 200;
     float freqHigh = 1600;
 
     int sizeFilter = 5;
     float[] filter;
     float filterSum;
     int posFilter = 0;
     int qSample = 0;
 
     int video_Length, secCounter;
 
     float y0, y1;
 
     void OnEnable ()
     {
         secCounter = 0;
 
 //        y0 = mouth0.localPosition.y;
 //        y1 = mouth1.localPosition.y;
 
         y0 = upmouth0_M.localPosition.y;
         y0 = upmouth01_L.localPosition.y;
         y0 = upmouth02_R.localPosition.y;
         y1 = downmouth1_M.localPosition.y;
         y1 = downmouth11_L.localPosition.y;
         y1 = downmouth12_R.localPosition.y;
 
         freqData = new float[nSamples];
         //source_clip = SetFace.voiceOver;
         GetComponent<AudioSource> ().clip = Rec_voice.instance.voiceFeed.clip;
         GetComponent<AudioSource> ().Play ();
         video_Length = Mathf.CeilToInt (source_clip.length);
 
     }
 

 float BandVol (float fLow, float fHigh)
     {
         fLow = Mathf.Clamp (fLow, 20, fMax);
         fHigh = Mathf.Clamp (fHigh, fLow, fMax);
 
         GetComponent<AudioSource> ().GetSpectrumData (freqData, 0, FFTWindow.BlackmanHarris);
 
         int n1 = Mathf.FloorToInt (fLow * nSamples / fMax);
         int n2 = Mathf.FloorToInt (fHigh * nSamples / fMax);
     
         float sum = 0;
 
         for (int i = n1; i <= n2; i++) {
             sum = freqData [i];
         }
             
         return sum;
     }
 
     float MovingAverage (float sample)
     {
         if (qSample == 0)
             filter = new float[sizeFilter];
 
         filterSum += sample - filter [posFilter];
         filter [posFilter++] = sample;
 
         if (posFilter > qSample) {
             qSample = posFilter;
         }
 
         posFilter = posFilter % sizeFilter;
         return filterSum / qSample;
     }
 
     void Start ()
     {
         /*secCounter = 0;
 
         y0 = mouth0.localPosition.y;
         y1 = mouth1.localPosition.y;
 
         freqData = new float[nSamples];
         //source_clip = SetFace.voiceOver;
         GetComponent<AudioSource> ().clip = Rec_voice.instance.voiceOver;
         GetComponent<AudioSource> ().Play ();
         video_Length = Mathf.CeilToInt (source_clip.length);
 */
         //Debug.Log (y0);
         //    DebugConsole.Log (y0.ToString ());
     
 
         //    Debug.Log (Application.persistentDataPath);
 
         /*StartCoroutine (Timer ());
         StartCoroutine (recordScreen ());
 */
     }
 
     /*    IEnumerator Timer ()
     {
         while (secCounter < video_Length) {
             yield return new WaitForSeconds (1f);
             secCounter += 1;
         }
     }*/
 
 
     float limValue;
     // Update is called once per frame
     void Update ()
     {
         float band_vol = BandVol (freqLow, freqHigh);
         float val = MovingAverage (band_vol) * volume;
         //limValue = val;//Mathf.Clamp (val, 0, 0.1f);
         //limValue = Mathf.Clamp (val, 0, 10f);
         //check new lip movement abd set clamp val
         limValue = Mathf.Clamp (val, 0, 25f);
         //Debug.Log (y0 - limValue);
         if (Input.GetKeyDown (KeyCode.Escape)) {
             Application.Quit ();
         }
         /*    mouth0.position = new Vector3 (mouth0.position.x, y0 - MovingAverage (band_vol) * volume, mouth0.position.z);
         mouth1.position = new Vector3 (mouth1.position.x, y1 + MovingAverage (band_vol) * volume * 0.3f, mouth1.position.z);*/
     }
     void LateUpdate ()
     {
 //        mouth0.localPosition = new Vector3 (mouth0.localPosition.x, y0 - limValue, mouth0.localPosition.z);
 //        mouth1.localPosition = new Vector3 (mouth1.localPosition.x, y1 + limValue, mouth1.localPosition.z);
         upmouth0_M.localPosition = new Vector3 (upmouth0_M.localPosition.x, y0 - limValue, upmouth0_M.localPosition.z);
         upmouth01_L.localPosition = new Vector3 (upmouth01_L.localPosition.x, y0 - limValue, upmouth01_L.localPosition.z);
         upmouth02_R.localPosition = new Vector3 (upmouth02_R.localPosition.x, y0 - limValue, upmouth02_R.localPosition.z);
         downmouth1_M.localPosition = new Vector3 (downmouth1_M.localPosition.x, y1 + limValue, downmouth1_M.localPosition.z);
         downmouth11_L.localPosition = new Vector3 (downmouth11_L.localPosition.x, y1 + limValue, downmouth11_L.localPosition.z);
         downmouth12_R.localPosition = new Vector3 (downmouth12_R.localPosition.x, y1 + limValue, downmouth12_R.localPosition.z);
 
     }
         
 }

Here I'm facing some issue like below

1) How to recognise voice of Human? : Because if another voice like music or etc then it will be detected so how can we stop? I want lips will syncing only for human voice.

2) When I recording if distance is close to device then it working perfectly but if distance is little more away then lips is not syncing.

So suggest me where I'm going to wrong? and how to solve above issues ?

Automatic lip syncing detect human voice and set voice frequency ?

0 Replies

Your answer

Follow this Question

Related Questions