Sunday, 22 April 2012
PSOLA.CPP TD-Psola TTS C++ Code
///////////////////////////////////////////////////////////////////////
// CopyRight : Copyright (c) 2004, IRLab, All rights reserved
// File Name : Psola.cpp
// File Summary : This file provide the modules to adjust the wav
// with TD-Psola
// Author : elevens
// Create Time : 2004/8/25
// Project Name : TTS
// Version : 1.0
// Histroy :
////////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include <fstream>
#include <iostream>
#include <assert.h>
#include <algorithm>
#include <string>
#include <math.h>
#include <vector>
#include "Psola.h"
#include ".\psola.h"
//#include ".\psola.h"
#define __DEBUG
using namespace std;
#ifdef __DEBUG
ofstream out("out.txt");
#endif
///////////////////////////////////////////////////////////////////////
// Function Name : CPsola
// Function func : constructor of CPsola object, used to init members of the class
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
CPsola::CPsola()
:m_dPitch(NULL),m_dNewPitch(NULL),flag(NULL),m_Data(NULL),m_InData(NULL)
{
m_uSamFre = 16000;
m_dFrameLen = 0.02;
m_dX1 = 0.02;
m_dAmpMul = -1;
m_dDuration = -1;
m_uPitchLen = 0;
}
///////////////////////////////////////////////////////////////////////
// Function Name : CPsola
// Function func : constructor of CPsola object, used to init members of the class
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : _Data: wav data
// _DataLen: the length of the data
// output :
////////////////////////////////////////////////////////////////////////
CPsola::CPsola(short* _Data,unsigned _DataLen)
:m_Data(_Data),m_uDataLen(_DataLen),m_dPitch(NULL),m_dNewPitch(NULL),flag(NULL),m_InData(_Data)
{
m_uSamFre = 16000;
m_dFrameLen = 0.02;
m_dX1 = 0.02;
m_dAmpMul = -1;
m_dDuration = -1;
m_uPitchLen = 0;
// m_uNewPitchLen = 0;
}
///////////////////////////////////////////////////////////////////////
// Function Name : FindMax
// Function func : find the max value of data between begin and end
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : begin: which position ti begin whith
// end: the end position
// data: the array of wav data
// output : the position of the max value
////////////////////////////////////////////////////////////////////////
unsigned CPsola::FindMax(unsigned begin,unsigned end,short* data)
{
unsigned k,position;
short max = -32768;
for(k=begin; k<=end; ++k)
{
if(data[k] > max)
{
max = data[k];
position = k;
}
}
return position;
}
///////////////////////////////////////////////////////////////////////
// Function Name : Approximate
// Function func : Change a float to a int using the round rule
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the float value
// output : the int value
////////////////////////////////////////////////////////////////////////
int CPsola::Approximate(float temp)
{
int value = static_cast<int>(temp);
float decimal = temp - value;
if( (decimal > -0.5) && (decimal < 0.5) )
return value;
else if(decimal < 0)
return (value - 1);
else
return (value + 1);
}
///////////////////////////////////////////////////////////////////////
// Function Name : Middle
// Function func : Find the middle value of five numbers
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : mid: the position to start with
// data: the wav data
// output : the middle value
////////////////////////////////////////////////////////////////////////
short CPsola::Middle(unsigned mid,short* data)
{
short k,max1,max2,max3;
max1 = max2 = max3 = -32768;
for(k=1; k<=4; k++)
{
if(data[mid+k] > max1)
{
max3 = max2;
max2 = max1;
max1 = data[mid+k];
}
else if(data[mid+k] > max2)
{
max3 = max2;
max2 = data[mid+k];
}
else if(data[mid+k] > max3)
{
max3 = data[mid+k];
}
}
return max3;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetData
// Function func : Set the Data set
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : data: a pointer to the data
// length: the length of the data set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetData(short* data,unsigned length)
{
if(flag != NULL)
delete [] flag;
if((m_Data != m_InData) && (m_Data != NULL))
delete [] m_Data;
flag = NULL;
m_Data = data;
m_uDataLen = length;
m_InData = m_Data;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetAmplitudeMultiple
// Function func : Set the Mutiple of the Amplitude
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the Mutiple to be set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetAmplitudeMultiple(float temp)
{
if(temp >= 0)
m_dAmpMul = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetDuration
// Function func : Set the Duration to adjust to
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the Duration to be set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetDuration(float temp)
{
if(temp >= 0)
m_dDuration = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetFrameLength
// Function func : Set the length of one frame
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the length of frame to be set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetFrameLength(float temp)
{
if(temp >= 0)
m_dFrameLen = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetSampleFrequency
// Function func : Set the sample frequency
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the sample frequency to be set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetSampleFrequency(unsigned temp)
{
m_uSamFre = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetX1
// Function func : Set the length of the first frame
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the length of the first frame
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetX1(float temp)
{
if(temp >= 0)
m_dX1 = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetPitch
// Function func : Set the pitches of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : pitch: the array of the pitches
// size: the length of the pitches
// X1: the length of the first frame
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetPitch(float* pitch,unsigned size,float X1)
{
m_dPitch = pitch;
m_uPitchLen = size;
if(X1 >= 0)
m_dX1 = X1;
if(!MarkPitch())
return;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetNewPitch
// Function func : Set the pitches of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : pitch: the array of the pitches
// size: the length of the pitches
// X1: the length of the first frame
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetNewPitch(float* newpitch,unsigned newsize)
{
if(newsize != m_uPitchLen)
return;
m_dNewPitch = newpitch;
}
///////////////////////////////////////////////////////////////////////
// Function Name : MarkPitch
// Function func : Mark pitches on the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output : if the array of pitches is longer, return false
////////////////////////////////////////////////////////////////////////
bool CPsola::MarkPitch()
{
if( (m_dX1+(m_uPitchLen-2)*m_dFrameLen)*m_uSamFre >= m_uDataLen)
{
cout<<"error in markpitch"<<endl;
return false;
}
if(flag!=NULL)
{
delete [] flag;
flag = NULL;
}
flag = new bool[m_uDataLen];
memset(flag,0,m_uDataLen*sizeof(bool));
flag[0] = 1;
unsigned first = static_cast<unsigned>(m_dX1*m_uSamFre);
unsigned Pmax =0;
unsigned begin = 0;
unsigned end = 0;
for(unsigned i=0 ;i<m_uPitchLen ;++i)
{
// cout<<i<<endl;
if(i == 0)
begin = 0;
else
begin = static_cast<unsigned>((i-1)*m_dFrameLen*m_uSamFre) + first;
if(m_dPitch[i] > 0)
{
end = static_cast<unsigned>(i*m_dFrameLen*m_uSamFre) + first;
if(i == m_uPitchLen-1)
end = m_uDataLen - 1;
Pmax = FindMax(begin,end,m_Data);
// cout<<"Pmax "<<Pmax<<endl;
flag[Pmax] = 1;
MarkOneFrame(i,Pmax);
}
else
{
Pmax = begin + static_cast<unsigned>(m_dFrameLen*m_uSamFre/2);
if(Pmax < m_uDataLen-1)
flag[Pmax] = 1;
// cout<<Pmax<<endl;
Pmax += static_cast<unsigned>(m_dFrameLen*m_uSamFre/2);
// cout<<Pmax<<endl;
if(Pmax < m_uDataLen-1)
flag[Pmax] = 1;
}
}
// for(int i=0 ;i<m_uDataLen ;++i)
// if(flag[i])
// out<<i<<endl;
return true;
}
///////////////////////////////////////////////////////////////////////
// Function Name : MarkOneFrame
// Function func : Mark pitches on one frame
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : j: the jth Frame
// Pmax: the position of the max value
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::MarkOneFrame(unsigned j,unsigned Pmax)
{
unsigned first = static_cast<unsigned>(m_dX1*m_uSamFre);
if(m_dPitch[j] < 0)
return;
float T = 1/m_dPitch[j];
int p,p1,p2,i;
p2 = p1 = Pmax;
p = 0;
int temp;
int end = (j == (m_uPitchLen-1) )?static_cast<int>(m_uDataLen-1-T*m_uSamFre):static_cast<int>(j*m_dFrameLen*m_uSamFre+first-T*m_uSamFre);
while(p1 <= (end - 5) )
{
p = FindMax(static_cast<unsigned>(p1+T*m_uSamFre-5),static_cast<unsigned>(p1+T*m_uSamFre+5),m_Data);
flag[p] = 1;
p1 = p;
}
if (p!=0 && p-end<=5 && p+T*m_uSamFre+5<m_uDataLen )
{
p = FindMax(static_cast<unsigned>(p+T*m_uSamFre-5),static_cast<unsigned>(p+T*m_uSamFre+5),m_Data);
flag[p] = 1;
}
//´Ó×î´ó·åÖµµãÏòÇ°±ê×¢
unsigned begin = (j == 0)?static_cast<unsigned>(T*m_uSamFre):static_cast<unsigned>((j-1)*m_dFrameLen*m_uSamFre+first+T*m_uSamFre);
while(p2 >= (begin + 5) )
{
p = FindMax(static_cast<unsigned>(p2-T*m_uSamFre-5),static_cast<unsigned>(p2-T*m_uSamFre+5),m_Data);
flag[p] = 1;
p2 = p;
}
if (p!=0 && begin-p<=5 && p-T*m_uSamFre-5>0 )
{
p = FindMax(static_cast<unsigned>(p-T*m_uSamFre-5),static_cast<unsigned>(p-T*m_uSamFre+5),m_Data);
flag[p] = 1;
temp = p+20 >= m_uDataLen ? m_uDataLen-1 : p+20;
for( i = p+1 ; i <= temp ; i++ )
{
if(flag[i]==1)
{
if(m_Data[i]>m_Data[p]) flag[p] = 0;
else flag[i] = 0;
break;
}
}
}
}
///////////////////////////////////////////////////////////////////////
// Function Name : Adjust
// Function func : Adjust the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::Adjust()
{
AdjustAmplitude();
AdjustDuration();
// AdjustPitch();
}
///////////////////////////////////////////////////////////////////////
// Function Name : AdjustAmplitude
// Function func : Adjust the Amplitude of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::AdjustAmplitude()
{
if(m_dAmpMul == -1)
return;
// cout<<m_uDataLen<<endl;
for(unsigned i=0 ;i<m_uDataLen ;++i)
{
m_Data[i] = static_cast<short>(m_dAmpMul*m_Data[i]);
}
}
///////////////////////////////////////////////////////////////////////
// Function Name : AdjustDuration
// Function func : Adjust the Duration of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::AdjustDuration()
{
if(m_dDuration == -1)
{
short* NewData = new short[m_uDataLen];
memcpy(NewData,m_Data,m_uDataLen*2);
m_Data = NewData;
return;
}
if(flag == NULL)
return;
float sum = 0;
int nframe = 0;
int nFirstVowel = 0;
for(unsigned i=0 ;i<m_uPitchLen-1 ;++i)
// for(unsigned i=0 ;i<m_uPitchLen ;++i)
{
if(m_dPitch[i] > 0)
{
if( nFirstVowel == 0 )
{
nFirstVowel = i;
continue;
}
sum += m_dPitch[i];
++nframe;
}
}
if(nframe == 0)
return;
short* NewData;
unsigned NewLen = static_cast<unsigned>(m_dDuration*m_uSamFre);
if(NewLen < m_uDataLen)
NewData = new short[m_uDataLen];
else
NewData = new short[NewLen];
memset(NewData,0,NewLen*2);
float avg = sum/nframe; //¸ÃÒô½Ú»ùƵµÄƽ¾ùÖµ
//Ò»¹²ÐèÒªÔö¼Ó»ò¼õÉÙµÄÖÜÆÚ¸öÊý
int p_add = Approximate((static_cast<int>(NewLen) - static_cast<int>(m_uDataLen))*avg/m_uSamFre);
//ÿ¸öÖÜÆÚ¸´ÖƼ¸´Î
int every1 = static_cast<int>(static_cast<float>(p_add)/(sum*m_dFrameLen));
int every2 = every1;
//ÿFrame¼õ¼¸¸öÖÜÆÚ
int every3 = -(p_add/nframe);
int every4 = every3;
unsigned end = 0;
int oldpos = 0;
int n = 1;
unsigned t=0;
int pos = 0; //ÓàÊý
if(p_add > 0)
{
pos = Approximate(p_add - every1*sum*m_dFrameLen);
p_add = p_add + every1 + 1;
}
else
{
pos = p_add + every3*nframe;
p_add = p_add - every3;
}
unsigned uLastFrameLen = static_cast<unsigned>(m_uDataLen - m_dX1*m_uSamFre)%static_cast<unsigned>(0.02*m_uSamFre);
//Ìø¹ýµÚÒ»¸öÔªÒôFrame
unsigned skip = m_dX1*m_uSamFre + nFirstVowel*m_dFrameLen*m_uSamFre;
unsigned k = 0 ;
unsigned pos_start;
if(pos>=0)
pos_start = m_uDataLen - uLastFrameLen - (m_uSamFre/avg)*pos;
else
pos_start = m_uDataLen - uLastFrameLen + (m_uSamFre*m_dFrameLen)*pos;
for(unsigned i=0 ;i<m_uDataLen-uLastFrameLen/*Ìø¹ý×îºóÒ»¸öFrame*/ ;++i)
{
if(flag[i] != 0)
{
if( i < skip || ((i-t) >= 150) || ((i-t) <= 10) )
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
end = end+i-t;
t = i;
}
}
else //×ÇÒô
{
every1 = every2;
if(p_add > 0) //ÑÓ³¤
{
while(every1 >= 0)
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
end = end + i - t;
}
--every1;
}
if(pos != 0 && pos_start-i <= m_uSamFre/avg) //ÓàÊý´ÓºóÃæ¼Ó
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
--pos;
end = end + i - t;
}
--p_add;
}
p_add = p_add - every2;
t=i;
}
//Ëõ¶Ì
else if(p_add < 0)
{
oldpos = Approximate(m_dFrameLen*avg);
if(every3 <= 0)
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
end = end + i - t;
}
++n;
}
else
{
t = i;
--every3;
++p_add;
++n;
}
//¿ØÖÆ´Óÿ֡ÖÐɾ³ý
if(n == oldpos)
{
//´¦Àíp_addµÄÓàÊý²¿·Ö
if(pos <= 0 && i>=pos_start)
{
end = end - (i - t);
++pos;
++p_add;
}
every3 = every4;
n = 1;
}
t = i;
}
}
}
}
//×îºóÒ»¸öFrame
if( end+m_uDataLen-t < NewLen )
{
memcpy(NewData+end,m_Data+t,(m_uDataLen-t)*2);
end += m_uDataLen-t;
}
else if(end < NewLen)
{
memcpy(NewData+end,m_Data+t,(NewLen-end)*2);
end = NewLen;
}
m_Data = NewData;
m_uDataLen = end ;
}
///////////////////////////////////////////////////////////////////////
// Function Name : AdjustPitch
// Function func : Adjust the Pitches of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::AdjustPitch()
{
if(m_dNewPitch == NULL)
return;
if(flag == NULL)
return;
unsigned NewLen = 0;
unsigned i;
for( i=1 ;i<m_uPitchLen ;++i)
// NewLen += static_cast<unsigned>(((m_dNewPitch[i]/m_dPitch[i]) + 1)*m_dFrameLen*m_uSamFre);
NewLen += static_cast<unsigned>((static_cast<float>(m_dNewPitch[i])/static_cast<float>(m_dPitch[i]) + 1)*m_dFrameLen*m_uSamFre);
unsigned x1 = static_cast<unsigned>(m_dX1*m_uSamFre);
NewLen += x1;
short* NewData = new short[NewLen];
bool* NewFlag = new bool[NewLen];
memset(NewFlag,0,NewLen*sizeof(bool));
unsigned end = 0;
memcpy(NewData,m_Data,x1*sizeof(short));
end += x1;
unsigned oldpos = 0;
unsigned pos = 0;
int t = 0;
int pit = 0;
unsigned win_length = 0;
unsigned spos = 0;
unsigned epos = 0;
short* FrontWin = new short[Approximate((2.0/50)*m_uSamFre) + 1];
short* BehindWin = new short[Approximate((2.0/50)*m_uSamFre) + 1];
float hanning = 0;
vector<unsigned> vecPos;
for( i=1 ;i<m_uPitchLen ;++i)
{
if(m_dNewPitch[i] == 0)
continue;
oldpos = static_cast<unsigned>((m_dX1 + (i - 1)*m_dFrameLen)*m_uSamFre);
pos = static_cast<unsigned>((m_dX1 + i*m_dFrameLen)*m_uSamFre);
if(i == m_uPitchLen)
pos = m_uDataLen;
//»ùƵûÓиıäµÄ»òÊÇÔÀ´ÊÇÇåÒôµÄ
if((m_dNewPitch[i] == m_dPitch[i]) || (m_dPitch[i] == 0) )
{
memcpy(NewData+end ,m_Data+oldpos ,pos-oldpos);
end = end + pos - oldpos;
continue;
}
vecPos.clear();
for(unsigned k = oldpos ;k<pos ;++k)
if(!flag[k])
vecPos.push_back(k);
memcpy(NewData+end ,m_Data+oldpos ,vecPos[0]-oldpos);
end = end + vecPos[0] - oldpos;
NewFlag[end] = 1;
t = Approximate((1/m_dPitch[i])*m_uSamFre);
win_length = 2*t + 1;
pit = Approximate((1/m_dNewPitch[i] - 1/m_dPitch[i])*m_uSamFre);
spos = vecPos[0] - t;
epos = vecPos[0] + t;
if(vecPos[0] < t)
spos = 0;
if(epos > (m_uDataLen - 1) )
epos = m_uDataLen - 1;
for( i=spos ;i<epos+1 ;++i)
{
hanning = 0.5 - 0.5*cos(2*(i-spos)*3.1415926/(win_length-1));
BehindWin[i-spos] = static_cast<short>(m_Data[i]*hanning);
}
if(pit > 0)
{
for(size_t k=1 ;k<vecPos.size()-1 ;++k)
{
short* temp = FrontWin;
FrontWin = BehindWin;
BehindWin = FrontWin;
spos = vecPos[k] - t;
epos = vecPos[k] + t;
if(vecPos[k] < t)
spos = 0;
if(epos > (m_uDataLen - 1) )
epos = m_uDataLen - 1;
for( i=spos ;i<epos+1 ;++i)
{
hanning = 0.5 - 0.5*cos(2*(i-spos)*3.1415926/(win_length-1));
BehindWin[i-spos] = static_cast<short>(m_Data[i]*hanning);
}
// if(end > 1) //²»Ì«¿ÉÄÜend<2
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*(FrontWin[t+pit] + BehindWin[0]) + 0.15*(FrontWin[t+pit+1] + BehindWin[1]);
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*(FrontWin[t+pit] + BehindWin[0]) + 0.15*(FrontWin[t+pit+1] + BehindWin[1]);
++end;
//Ç°°ë²¿·ÖµÄµþ¼Ó
//ÕâÀïÔÚµÚÒ»Ö¡ºÍ×îºóÒ»Ö¡»¹ÓÐÒ»¶¨µÄÎÊÌâ
for( i=1 ;i<=t-pit ;++i)
{
NewData[end] = FrontWin[t+pit+i] + BehindWin[i];
++end;
}
//ÖÐÐIJ¿·Ö²»ÐèÒªµþ¼Ó
//¶ÔÁ½²¿·ÖµÄÏàÁÚµã×÷ƽ»¬
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*BehindWin[t-pit+1] + 0.15*BehindWin[t-pit+2];
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*BehindWin[t-pit+1] + 0.15*BehindWin[t-pit+2];
++end;
//ºó°ë²¿·Ö
for( i=t-pit+2 ;i<t+pit ;++i)
{
NewData[end] = BehindWin[i];
++end;
}
NewFlag[end] = 1;
}
//´¦Àí×îºóÒ»¸ö´°µÄÊý¾Ý
for( i=1 ;i<t-pit-1 ;++i)
{
NewData[end] = FrontWin[t+pit+i] + BehindWin[i];
++end;
}
NewFlag[end] = 1;
}
else
{
pit = -pit;
for(size_t k=1 ;k<vecPos.size() ;++k)
{
short* temp = FrontWin;
FrontWin = BehindWin;
BehindWin = FrontWin;
spos = vecPos[k] - t;
epos = vecPos[k] + t;
if(vecPos[k] < t)
spos = 0;
if(epos > (m_uDataLen - 1) )
epos = m_uDataLen - 1;
for( i=spos ;i<epos+1 ;++i)
{
hanning = 0.5 - 0.5*cos(2*(i-spos)*3.1415926/(win_length-1));
BehindWin[i-spos] = static_cast<short>(m_Data[i]*hanning);
}
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*(FrontWin[t] + BehindWin[pit]) + 0.15*(FrontWin[t+1] + BehindWin[pit+1]);
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*(FrontWin[t] + BehindWin[pit]) + 0.15*(FrontWin[t+1] + BehindWin[pit+1]);
++end;
for( i=1 ;i<t-pit-1 ;++i)
{
NewData[end] = FrontWin[t-pit+i] + BehindWin[i];
++end;
}
NewFlag[end] = 1;
}
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*m_Data[vecPos.back()] + 0.15*m_Data[vecPos.back()+1];
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*m_Data[vecPos.back()] + 0.15*m_Data[vecPos.back()+1];
++end;
for( i=vecPos.back() ;i<pos ;++i)
{
NewData[end] = m_Data[i];
++end;
}
}
}
Smooth(NewData,end);
delete [] FrontWin;
delete [] BehindWin;
delete [] flag;
flag = NewFlag;
m_Data = NewData;
m_uDataLen = end;
}
///////////////////////////////////////////////////////////////////////
// Function Name : Smooth
// Function func : Smooth the new data after pitch adjusting
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : data; pointer to input data
// length: the length of the input data
// output : pointer to the data after smoothing
////////////////////////////////////////////////////////////////////////
void CPsola::Smooth(short* data,unsigned length)
{
short* temp = new short[length];
unsigned i;
//ÖÐֵƽ»¬
for( i=2 ;i<length-2 ;++i)
temp[i] = Middle(i-2,data);
temp[length-2] = data[length-2];
temp[length-1] = data[length-1];
//ÏßÐÔƽ»¬
for( i=2 ;i<length-2 ;++i)
data[i] = 0.15*temp[i-2] + 0.2*temp[i-1] + 0.3*temp[i] + 0.2*temp[i+1] + 0.15*temp[i+2];
data[length-2] = temp[length-2];
data[length-1] = temp[length-1];
delete [] temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : GetNewLen
// Function func : get the new length of the new data
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output : the length of the new data
////////////////////////////////////////////////////////////////////////
unsigned CPsola::GetNewLen()
{
return m_uDataLen;
}
///////////////////////////////////////////////////////////////////////
// Function Name : GetNewData
// Function func : get the pointer to the new data
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output : the pointer to the new data
////////////////////////////////////////////////////////////////////////
short* CPsola::GetNewData()
{
return m_Data;
}
CPsola::~CPsola()
{
/* freopen("out.txt","w",stdout);
for(int i=0,j=0; i< 18; i++)
{
if(flag[i]==1)
{
cout<<i-j<<endl;
j=i;
}
}
*/
if(flag != NULL)
delete [] flag;
if((m_Data != m_InData) && (m_Data != NULL))
delete [] m_Data;
}
///////////////////////////////////////////////////////////////////////
// Function Name : TD_PSOLA
// Function func :
// Author : Taliux
// Create Time : 2004/12/1
// Class Name : CPsola
// input : float tscale,float pscale
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::TD_PSOLA(float tscale,float pscale)
{
if(tscale==1 && pscale==1)
return ;
// MarkPitch();
vector<unsigned> vPM,vNewPM;
GetPitchMarks(vPM);
vNewPM=vPM;
int pos;
unsigned i,T0,pshift=0;
if(pscale!=1)
{
for(i=1;i<vPM.size();i++)
{
T0=vPM.at(i)-vPM.at(i-1);
if (IsVowel(vPM.at(i-1)))
{
if (pscale>1)
pshift=pshift-Approximate(T0*(pscale-1.0)/pscale); //»ùƵ¸Ä±ä¶ÔӦʱ³¤±ä»¯
else
pshift=pshift+Approximate(T0*(1.0/pscale-1.0));
}
vNewPM.at(i)=vPM.at(i)+pshift; //Éú³ÉÄ¿±ê»ùÒô±ê×¢ÐòÁÐ
}
}
float new_tscale=tscale*(double)vPM.back()/(double)vNewPM.back(); //Éú³ÉеĻùÒô±ê×¢ÐòÁкó¶Ôʱ³¤Ð޸IJÎÊýµÄµ÷Õû
// vector<unsigned>::iterator iVowelBegin;
int avg=GetAvgPitchLen(vNewPM,pos);
if(avg==0)
return;
int tot = (vNewPM.back()*new_tscale - vNewPM.at(pos)) / avg;
if(tot<=0)
return;
int orl = vNewPM.size()-pos-1;
vector<int> useds;
GetUseds(orl,tot,vNewPM.size()-2,useds);
vector<vector<unsigned> > final;
GetFinal(vPM,vNewPM,pos,useds,final);
unsigned ylen=final.back()[0]+(final.back()[2]-final.back()[1]);
short* y = new short[ylen];
memset(y,0,ylen*2);
memcpy(y,m_Data,vNewPM.at(pos)*2);
vector<float> w;
OverlapAdd(final,y,ylen,w);
//ƽ»¬
if(pscale>1)
smooth(y,ylen,w);
m_Data=y;
m_uDataLen=ylen;
}
void CPsola::GetPitchMarks(vector<unsigned>& vPM)
{
vPM.clear();
// vPM.push_back(0);
for(unsigned i=0;i<m_uDataLen;i++ )
if(flag[i])
{
#ifdef __DEBUG
out<<i<<endl;
#endif
vPM.push_back(i);
}
return;
}
bool CPsola::IsVowel(unsigned i)
{
if(i<=m_dX1*m_uSamFre)
return m_dPitch[0]>0;
unsigned j;
j=(i-unsigned(m_dX1*m_uSamFre))/unsigned(m_dFrameLen*m_uSamFre)+1;
if(j>=m_uPitchLen) return false;
return m_dPitch[j]>0;
}
int CPsola::GetAvgPitchLen(vector<unsigned>& vPM,int& pos)
{
unsigned sum=0;
int count=0;
pos = 0;
for(int i=1;i<vPM.size()-1;i++)
{
if ( !IsVowel(vPM[i]) )
continue;
if ( pos==0 )
pos = i;
sum += vPM[i+1] - vPM[i];
count++;
}
if(count==0)
return 0;
return sum/count;
}
///////////////////////////////////////////////////////////////////////
// Function Name : PSOLA
// Function func :
// Author : Taliux
// Create Time : 2004/12/1
// Class Name : CPsola
// input : float tscale,float pscale
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::PSOLA(float tscale,float pscale,bool bFD = false)
{
if(tscale==1 && pscale==1)
return ;
// MarkPitch();
vector<unsigned> vPM,vNewPM;
GetPitchMarks(vPM);
vNewPM = vPM;
int pos;
unsigned i,j,T0,pshift=0;
if(pscale!=1)
{
for(i=1;i<vPM.size();i++)
{
T0=vPM.at(i)-vPM.at(i-1);
if (IsVowel(vPM.at(i-1)))
{
if (pscale>1)
pshift=pshift-Approximate(T0*(pscale-1.0)/pscale); //»ùƵ¸Ä±ä¶ÔӦʱ³¤±ä»¯
else
pshift=pshift+Approximate(T0*(1.0/pscale-1.0));
}
vNewPM.at(i)=vPM.at(i)+pshift; //Éú³ÉÄ¿±ê»ùÒô±ê×¢ÐòÁÐ
}
}
float new_tscale=tscale*(float)vPM.back()/(float)vNewPM.back(); //Éú³ÉеĻùÒô±ê×¢ÐòÁкó¶Ôʱ³¤Ð޸IJÎÊýµÄµ÷Õû
int avg = GetAvgPitchLen(vNewPM,pos);
if ( avg == 0 )
return;
int tot = (vNewPM.back()*new_tscale - vNewPM.at(pos)) / avg;
if ( tot <= 0 )
return;
int orl = vNewPM.size()-pos-1;
vector<int> useds;
GetUseds(orl,tot,vNewPM.size()-2,useds);
vector<vector<unsigned> > final;
GetFinal(vPM,vNewPM,pos,useds,final);
unsigned ylen=final.back()[0]+(final.back()[2]-final.back()[1]);
short* y = new short[ylen];
memset(y,0,ylen*2);
vector<float> w;
memcpy(y,m_Data,vNewPM.at(pos)*2);
float* pBeta = NULL;
if ( bFD)
{
pBeta = new float[final.size()];
for ( i = 0; i < final.size(); i++ )
pBeta[i] = pscale;
}
OverlapAdd(final,y,ylen,w,pBeta);
if ( pBeta ) delete [] pBeta;
//ƽ»¬
if(pscale>1)
smooth(y,ylen,w);
m_Data=y;
m_uDataLen=ylen;
}
void CPsola::PSOLA(float dur, float* pNewPitch, int nPitchLen, float dX1)
{
vector<unsigned> vPM,vNewPM;
int pos;
GetPitchMarks(vPM);
if(vPM.empty()) return;
vNewPM=vPM;
GetAvgPitchLen(vNewPM,pos);
unsigned i,j,k,T0,uvl,t=1;
int pshift=0;
float alpha,beta;
while ( pNewPitch[t] == 0 && t < nPitchLen ) t++;
uvl = (dX1+(t-1)*m_dFrameLen)*m_uSamFre;
int nvc = vNewPM.size() - pos -1;
beta = (float)uvl/(float)(vNewPM.at(pos));
// out<<"pos\t"<<pos<<"uvl\t"<<uvl<<endl;
vector<short> unvoiced(uvl);
//ÏßÐÔ²îÖµµ÷Õû¸¨Òô³¤¶È
for( i = 0; i < uvl; i++ )
{
j = i / beta;
alpha = (float)i/beta - j;
unvoiced.at(i) = (1-alpha)*m_Data[j]+alpha*m_Data[j+1];
}
pshift = uvl - vNewPM.at(pos);
vNewPM.at(pos) = uvl;
float pscale;
for ( i = pos+1; i < vPM.size(); i++ )
{
T0=vPM.at(i)-vPM.at(i-1);
k = (float)(i-pos)/(float)nvc * (nPitchLen-t) + t;
pscale = pNewPitch[k]/(m_uSamFre/(float)T0);
if ( pscale < 0.5 || pscale > 3 )
pscale = 1;
if (IsVowel(vPM.at(i-1)))
{
out<<pscale<<"\t";
if (pscale>1)
pshift=pshift-Approximate(T0*(pscale-1.0)/pscale); //»ùƵ¸Ä±ä¶ÔӦʱ³¤±ä»¯
else
pshift=pshift+Approximate(T0*(1.0/pscale-1.0));
}
// pshift += Approximate(1.0/pNewPitch[k]*m_uSamFre - T0);
vNewPM.at(i)=vPM.at(i)+pshift; //Éú³ÉÄ¿±ê»ùÒô±ê×¢ÐòÁÐ
}
out<<endl;
//ÎÒ²»ÏëÔÙÍæÁË£¡
int orl = nvc;
int avg = (vNewPM.back()-vNewPM.at(pos))/orl;
int tot = (dur*m_uSamFre-uvl)/avg;
out<<vNewPM.back()<<"\t"<<vNewPM.at(pos)<<endl;
vector<int> useds;
GetUseds(orl,tot,vNewPM.size()-2,useds);
out<<tot<<"\t"<<orl<<"\t"<<avg<<endl;
for(i=0;i<useds.size();i++) out<<useds[i]<<"\t";
out<<endl;
vector<vector<unsigned> > final;
GetFinal(vPM,vNewPM,pos,useds,final);
out<<"\t"<<final.size()<<endl;
unsigned ylen=final.back()[0]+(final.back()[2]-final.back()[1]);
short* y = new short[ylen];
memset(y,0,ylen*2);
memcpy(y,&unvoiced[0],unvoiced.size()*2);
vector<float> w;
float* pBeta = new float[final.size()];
for ( i = 0; i < final.size()-1; i++ )
pBeta[i] =
(float)(final.at(i)[2]-final.at(i)[1])/2/(float)(final.at(i+1)[0]-final.at(i)[0]);
pBeta[i] = pBeta[i-1];
OverlapAdd(final,y,ylen,w/*,pBeta*/);
delete[] pBeta;
smooth(y,ylen,w);
m_Data=y;
m_uDataLen=ylen;
}
void CPsola::GetUseds(int orl, int tot, int size, vector<int>& useds)
{
int m,n,i;
if(tot>orl)
{
n = tot/orl;
m = tot%orl;
useds = vector<int>(size,n);
if(m>0)
{
n = orl / m; //ÿ¼¸¸ö¼ÓÒ»¸ö
for(i=1;i<=m;i++)
{
if(i*n>=orl)
break;
useds.at(size-i*n)++;
}
}
}
else
{
useds = vector<int>(size,1);
m = orl - tot; //ÐèÒª¼ôµô¶àÉÙ»ùÒôÖÜÆÚ
if(m>0)
{
n = orl / m; //ÿ¼¸¸ö¼õÒ»¸ö
for(i=1;i<=m;i++)
{
if(i*n>=orl)
break;
useds.at(size-i*n)=0;
}
}
}
}
void CPsola::GetFinal(vector<unsigned>& vPM,vector<unsigned>& vNewPM,
int pos, vector<int>& useds, vector<vector<unsigned> >& final)
{
final.clear();
int start=vNewPM.at(pos),count=0,i,j;
for(i=pos;i<useds.size();i++)
{
if (useds.at(i)>0)
{
final.push_back(vector<unsigned>(3));
final.at(count)[0]=start;
final.at(count)[1]=vPM.at(i);
final.at(count)[2]=vPM.at(i+2);
count++;
start=start+vNewPM.at(i+1)-vNewPM.at(i);
}
for(j=2;j<=useds.at(i);j++)
{
final.push_back(vector<unsigned>(3));
final.at(count)[0]=start;
final.at(count)[1]=vPM[i];
final.at(count)[2]=vPM[i+2];
count++;
start=start+vNewPM.at(i+1)-vNewPM.at(i);
}
}
}
void CPsola::smooth(short* y, unsigned ylen, vector<float>& w)
{
for ( unsigned i = 0; i < ylen; i++ )
{
if ( w.at(i) < 0.1 )
w.at(i)=1;
y[i] = y[i] / w.at(i);
}
}
void CPsola::OverlapAdd(vector<vector<unsigned> >& final, short* y, unsigned ylen,
vector<float>& w, float* pBeta)
{
unsigned i,j;
int k,kv;
vector<short> frm;
vector<float> wgt;
float alpha,beta;
w = vector<float>(ylen,0);
int len;
int numfrm = final.size();
float re,te;
int p=16;
vector<float> a,r;
vector<complex<float> > fft_spec,lpc_spec,exc_spec,new_exc;
unsigned start;
int temp = 0;
for ( i = 0; i < numfrm; i++ )
{
start=final.at(i)[0];
len=final.at(i)[2]-final.at(i)[1];
m_filter.window(wgt,len,"hanning");
frm=vector<short>(m_Data+final.at(i)[1],m_Data+final.at(i)[2]);
for(j=0;j<frm.size();j++)
frm.at(j) *= wgt.at(j);
if ( pBeta != NULL )
{
m_filter.Wave2LPC(frm,a,r,p,&re,&te);
m_filter.FFT(frm,fft_spec);
m_filter.LPCSpec(a,fft_spec.size(),sqrt(re),lpc_spec);
m_filter.ExcitationSpec(fft_spec,lpc_spec,exc_spec);
new_exc.clear();
new_exc.resize(exc_spec.size());
beta = pBeta[i];
if (beta < 0.5 || beta > 2) beta == 1;
for( k = 0; k < new_exc.size()/2; k++ )
{
kv = k / beta;
alpha = (float)k/beta - kv;
///////////////////
if(kv >= exc_spec.size()/2) //if beta<1 copy the tail of the spectrum
{
if ( temp == 0 ) temp = k-1;
if ( 2*temp-k < 0) temp = k-1;
// out<<k<<"\t"<<kv<<beta<<"\t"<<2*temp-k<<endl;
new_exc.at(k) = new_exc.at(2*temp-k);
}
else
new_exc.at(k) = (1-alpha)*exc_spec.at(kv)+alpha*exc_spec.at(kv+1);
new_exc.at(new_exc.size()-1-k) = conj(new_exc.at(k));
}
for( k = 0; k < lpc_spec.size(); k++ )
fft_spec.at(k) = lpc_spec.at(k)*new_exc.at(k);
m_filter.FFT(frm,fft_spec,true);
len /= beta;
frm.clear();
frm.resize(len);
for( k = 0 ; k < len; k++ )
frm.at(k) = real(fft_spec.at(k));
m_filter.window(wgt,len,"hanning");
}
for(j=start;j<start+len && j<ylen;j++)
{
y[j]=y[j]+frm.at(j-start);
w.at(j)=w.at(j)+wgt.at(j-start);
}
}
}
Subscribe to:
Post Comments (Atom)
Hi
ReplyDeletein your finction SetPitch(float* pitch,unsigned size,float X1)
i wounder what is the pitch ?
pitch can find from m_Data or other ?