Sunday, 22 April 2012
WavFile.h
////////////////////////////////////////////////////////////////////////////////
///
/// Classes for easy reading & writing of WAV sound files.
///
/// For big-endian CPU, define BIG_ENDIAN during compile-time to correctly
/// parse the WAV files with such processors.
///
/// Admittingly, more complete WAV reader routines may exist in public domain, but
/// the reason for 'yet another' one is that those generic WAV reader libraries are
/// exhaustingly large and cumbersome! Wanted to have something simpler here, i.e.
/// something that's not already larger than rest of the SoundTouch/SoundStretch program...
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2006/02/05 16:44:06 $
// File revision : $Revision: 1.7 $
//
// $Id: WavFile.h,v 1.7 2006/02/05 16:44:06 Olli Exp $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#ifndef WAVFILE_H
#define WAVFILE_H
#include <stdio.h>
#ifndef uint
typedef unsigned int uint;
#endif
/// WAV audio file 'riff' section header
typedef struct
{
char riff_char[4];
int package_len;
char wave[4];
} WavRiff;
/// WAV audio file 'format' section header
typedef struct
{
char fmt[4];
int format_len;
short fixed;
short channel_number;
int sample_rate;
int byte_rate;
short byte_per_sample;
short bits_per_sample;
} WavFormat;
/// WAV audio file 'data' section header
typedef struct
{
char data_field[4];
uint data_len;
} WavData;
/// WAV audio file header
typedef struct
{
WavRiff riff;
WavFormat format;
WavData data;
} WavHeader;
/// Class for reading WAV audio files.
class WavInFile
{
private:
/// File pointer.
FILE *fptr;
/// Counter of how many bytes of sample data have been read from the file.
uint dataRead;
/// WAV header information
WavHeader header;
/// Read WAV file headers.
/// \return zero if all ok, nonzero if file format is invalid.
int readWavHeaders();
/// Checks WAV file header tags.
/// \return zero if all ok, nonzero if file format is invalid.
int checkCharTags();
/// Reads a single WAV file header block.
/// \return zero if all ok, nonzero if file format is invalid.
int readHeaderBlock();
/// Reads WAV file 'riff' block
int readRIFFBlock();
public:
/// Constructor: Opens the given WAV file. If the file can't be opened,
/// throws 'runtime_error' exception.
WavInFile(const char *filename);
/// Destructor: Closes the file.
~WavInFile();
/// Close the file. Notice that file is automatically closed also when the
/// class instance is deleted.
void close();
/// Rewind to beginning of the file
void rewind();
/// Get sample rate.
uint getSampleRate() const;
/// Get number of bits per sample, i.e. 8 or 16.
uint getNumBits() const;
/// Get sample data size in bytes. Ahem, this should return same information as
/// 'getBytesPerSample'...
uint getDataSizeInBytes() const;
/// Get total number of samples in file.
uint getNumSamples() const;
/// Get number of bytes per audio sample (e.g. 16bit stereo = 4 bytes/sample)
uint getBytesPerSample() const;
/// Get number of audio channels in the file (1=mono, 2=stereo)
uint getNumChannels() const;
/// Get the audio file length in milliseconds
uint getLengthMS() const;
/// Reads audio samples from the WAV file. This routine works only for 8 bit samples.
/// Reads given number of elements from the file or if end-of-file reached, as many
/// elements as are left in the file.
///
/// \return Number of 8-bit integers read from the file.
int read(char *buffer, int maxElems);
/// Reads audio samples from the WAV file to 16 bit integer format. Reads given number
/// of elements from the file or if end-of-file reached, as many elements as are
/// left in the file.
///
/// \return Number of 16-bit integers read from the file.
int read(short *buffer, ///< Pointer to buffer where to read data.
int maxElems ///< Size of 'buffer' array (number of array elements).
);
/// Reads audio samples from the WAV file to floating point format, converting
/// sample values to range [-1,1[. Reads given number of elements from the file
/// or if end-of-file reached, as many elements as are left in the file.
///
/// \return Number of elements read from the file.
int read(float *buffer, ///< Pointer to buffer where to read data.
int maxElems ///< Size of 'buffer' array (number of array elements).
);
/// Check end-of-file.
///
/// \return Nonzero if end-of-file reached.
int eof() const;
};
/// Class for writing WAV audio files.
class WavOutFile
{
private:
/// Pointer to the WAV file
FILE *fptr;
/// WAV file header data.
WavHeader header;
/// Counter of how many bytes have been written to the file so far.
int bytesWritten;
/// Fills in WAV file header information.
void fillInHeader(const uint sampleRate, const uint bits, const uint channels);
/// Finishes the WAV file header by supplementing information of amount of
/// data written to file etc
void finishHeader();
/// Writes the WAV file header.
void writeHeader();
public:
/// Constructor: Creates a new WAV file. Throws a 'runtime_error' exception
/// if file creation fails.
WavOutFile(const char *fileName, ///< Filename
int sampleRate, ///< Sample rate (e.g. 44100 etc)
int bits, ///< Bits per sample (8 or 16 bits)
int channels ///< Number of channels (1=mono, 2=stereo)
);
/// Destructor: Finalizes & closes the WAV file.
~WavOutFile();
/// Write data to WAV file. This function works only with 8bit samples.
/// Throws a 'runtime_error' exception if writing to file fails.
void write(const char *buffer, ///< Pointer to sample data buffer.
int numElems ///< How many array items are to be written to file.
);
/// Write data to WAV file. Throws a 'runtime_error' exception if writing to
/// file fails.
void write(const short *buffer, ///< Pointer to sample data buffer.
int numElems ///< How many array items are to be written to file.
);
/// Write data to WAV file in floating point format, saturating sample values to range
/// [-1..+1[. Throws a 'runtime_error' exception if writing to file fails.
void write(const float *buffer, ///< Pointer to sample data buffer.
int numElems ///< How many array items are to be written to file.
);
/// Finalize & close the WAV file. Automatically supplements the WAV file header
/// information according to written data etc.
///
/// Notice that file is automatically closed also when the class instance is deleted.
void close();
};
#endif
Easy reading & writing of WAV sound files.
////////////////////////////////////////////////////////////////////////////////
///
/// Classes for easy reading & writing of WAV sound files.
///
/// For big-endian CPU, define _BIG_ENDIAN_ during compile-time to correctly
/// parse the WAV files with such processors.
///
/// Admittingly, more complete WAV reader routines may exist in public domain,
/// but the reason for 'yet another' one is that those generic WAV reader
/// libraries are exhaustingly large and cumbersome! Wanted to have something
/// simpler here, i.e. something that's not already larger than rest of the
/// SoundTouch/SoundStretch program...
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2006/02/05 16:44:06 $
// File revision : $Revision: 1.15 $
//
// $Id: WavFile.cpp,v 1.15 2006/02/05 16:44:06 Olli Exp $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdexcept>
#include <string>
#include <assert.h>
#include <limits.h>
#include "WavFile.h"
using namespace std;
const static char riffStr[] = "RIFF";
const static char waveStr[] = "WAVE";
const static char fmtStr[] = "fmt ";
const static char dataStr[] = "data";
//////////////////////////////////////////////////////////////////////////////
//
// Helper functions for swapping byte order to correctly read/write WAV files
// with big-endian CPU's: Define compile-time definition _BIG_ENDIAN_ to
// turn-on the conversion if it appears necessary.
//
// For example, Intel x86 is little-endian and doesn't require conversion,
// while PowerPC of Mac's and many other RISC cpu's are big-endian.
#ifdef BYTE_ORDER
// In gcc compiler detect the byte order automatically
#if BYTE_ORDER == BIG_ENDIAN
// big-endian platform.
#define _BIG_ENDIAN_
#endif
#endif
#ifdef _BIG_ENDIAN_
// big-endian CPU, swap bytes in 16 & 32 bit words
// helper-function to swap byte-order of 32bit integer
static inline void _swap32(unsigned int &dwData)
{
dwData = ((dwData >> 24) & 0x000000FF) |
((dwData >> 8) & 0x0000FF00) |
((dwData << 8) & 0x00FF0000) |
((dwData << 24) & 0xFF000000);
}
// helper-function to swap byte-order of 16bit integer
static inline void _swap16(unsigned short &wData)
{
wData = ((wData >> 8) & 0x00FF) |
((wData << 8) & 0xFF00);
}
// helper-function to swap byte-order of buffer of 16bit integers
static inline void _swap16Buffer(unsigned short *pData, unsigned int dwNumWords)
{
unsigned long i;
for (i = 0; i < dwNumWords; i ++)
{
_swap16(pData[i]);
}
}
#else // BIG_ENDIAN
// little-endian CPU, WAV file is ok as such
// dummy helper-function
static inline void _swap32(unsigned int &dwData)
{
// do nothing
}
// dummy helper-function
static inline void _swap16(unsigned short &wData)
{
// do nothing
}
// dummy helper-function
static inline void _swap16Buffer(unsigned short *pData, unsigned int dwNumBytes)
{
// do nothing
}
#endif // BIG_ENDIAN
//////////////////////////////////////////////////////////////////////////////
//
// Class WavInFile
//
WavInFile::WavInFile(const char *fileName)
{
int hdrsOk;
// Try to open the file for reading
fptr = fopen(fileName, "rb");
if (fptr == NULL)
{
// didn't succeed
string msg = "Error : Unable to open file \"";
msg += fileName;
msg += "\" for reading.";
throw runtime_error(msg);
}
// Read the file headers
hdrsOk = readWavHeaders();
if (hdrsOk != 0)
{
// Something didn't match in the wav file headers
string msg = "File \"";
msg += fileName;
msg += "\" is corrupt or not a WAV file";
throw runtime_error(msg);
}
if (header.format.fixed != 1)
{
string msg = "File \"";
msg += fileName;
msg += "\" uses unsupported encoding.";
throw runtime_error(msg);
}
dataRead = 0;
}
WavInFile::~WavInFile()
{
close();
}
void WavInFile::rewind()
{
int hdrsOk;
fseek(fptr, 0, SEEK_SET);
hdrsOk = readWavHeaders();
assert(hdrsOk == 0);
dataRead = 0;
}
int WavInFile::checkCharTags()
{
// header.format.fmt should equal to 'fmt '
if (memcmp(fmtStr, header.format.fmt, 4) != 0) return -1;
// header.data.data_field should equal to 'data'
if (memcmp(dataStr, header.data.data_field, 4) != 0) return -1;
return 0;
}
int WavInFile::read(char *buffer, int maxElems)
{
int numBytes;
uint afterDataRead;
// ensure it's 8 bit format
if (header.format.bits_per_sample != 8)
{
throw runtime_error("Error: WavInFile::read(char*, int) works only with 8bit samples.");
}
assert(sizeof(char) == 1);
numBytes = maxElems;
afterDataRead = dataRead + numBytes;
if (afterDataRead > header.data.data_len)
{
// Don't read more samples than are marked available in header
numBytes = header.data.data_len - dataRead;
assert(numBytes >= 0);
}
numBytes = fread(buffer, 1, numBytes, fptr);
dataRead += numBytes;
return numBytes;
}
int WavInFile::read(short *buffer, int maxElems)
{
unsigned int afterDataRead;
int numBytes;
int numElems;
if (header.format.bits_per_sample == 8)
{
// 8 bit format
char *temp = new char[maxElems];
int i;
numElems = read(temp, maxElems);
// convert from 8 to 16 bit
for (i = 0; i < numElems; i ++)
{
buffer[i] = temp[i] << 8;
}
delete[] temp;
}
else
{
// 16 bit format
assert(header.format.bits_per_sample == 16);
assert(sizeof(short) == 2);
numBytes = maxElems * 2;
afterDataRead = dataRead + numBytes;
if (afterDataRead > header.data.data_len)
{
// Don't read more samples than are marked available in header
numBytes = header.data.data_len - dataRead;
assert(numBytes >= 0);
}
numBytes = fread(buffer, 1, numBytes, fptr);
dataRead += numBytes;
numElems = numBytes / 2;
// 16bit samples, swap byte order if necessary
_swap16Buffer((unsigned short *)buffer, numElems);
}
return numElems;
}
int WavInFile::read(float *buffer, int maxElems)
{
short *temp = new short[maxElems];
int num;
int i;
double fscale;
num = read(temp, maxElems);
fscale = 1.0 / 32768.0;
// convert to floats, scale to range [-1..+1[
for (i = 0; i < num; i ++)
{
buffer[i] = (float)(fscale * (double)temp[i]);
}
delete[] temp;
return num;
}
int WavInFile::eof() const
{
// return true if all data has been read or file eof has reached
return (dataRead == header.data.data_len || feof(fptr));
}
void WavInFile::close()
{
fclose(fptr);
fptr = NULL;
}
// test if character code is between a white space ' ' and little 'z'
static int isAlpha(char c)
{
return (c >= ' ' && c <= 'z') ? 1 : 0;
}
// test if all characters are between a white space ' ' and little 'z'
static int isAlphaStr(char *str)
{
int c;
c = str[0];
while (c)
{
if (isAlpha(c) == 0) return 0;
str ++;
c = str[0];
}
return 1;
}
int WavInFile::readRIFFBlock()
{
fread(&(header.riff), sizeof(WavRiff), 1, fptr);
// swap 32bit data byte order if necessary
_swap32((unsigned int &)header.riff.package_len);
// header.riff.riff_char should equal to 'RIFF');
if (memcmp(riffStr, header.riff.riff_char, 4) != 0) return -1;
// header.riff.wave should equal to 'WAVE'
if (memcmp(waveStr, header.riff.wave, 4) != 0) return -1;
return 0;
}
int WavInFile::readHeaderBlock()
{
char label[5];
string sLabel;
// lead label string
fread(label, 1, 4, fptr);
label[4] = 0;
if (isAlphaStr(label) == 0) return -1; // not a valid label
// Decode blocks according to their label
if (strcmp(label, fmtStr) == 0)
{
int nLen, nDump;
// 'fmt ' block
memcpy(header.format.fmt, fmtStr, 4);
// read length of the format field
fread(&nLen, sizeof(int), 1, fptr);
// swap byte order if necessary
_swap32((unsigned int &)nLen); // int format_len;
header.format.format_len = nLen;
// calculate how much length differs from expected
nDump = nLen - (sizeof(header.format) - 8);
// if format_len is larger than expected, read only as much data as we've space for
if (nDump > 0)
{
nLen = sizeof(header.format) - 8;
}
// read data
fread(&(header.format.fixed), nLen, 1, fptr);
// swap byte order if necessary
_swap16((unsigned short &)header.format.fixed); // short int fixed;
_swap16((unsigned short &)header.format.channel_number); // short int channel_number;
_swap32((unsigned int &)header.format.sample_rate); // int sample_rate;
_swap32((unsigned int &)header.format.byte_rate); // int byte_rate;
_swap16((unsigned short &)header.format.byte_per_sample); // short int byte_per_sample;
_swap16((unsigned short &)header.format.bits_per_sample); // short int bits_per_sample;
// if format_len is larger than expected, skip the extra data
if (nDump > 0)
{
fseek(fptr, nDump, SEEK_CUR);
}
return 0;
}
else if (strcmp(label, dataStr) == 0)
{
// 'data' block
memcpy(header.data.data_field, dataStr, 4);
fread(&(header.data.data_len), sizeof(uint), 1, fptr);
// swap byte order if necessary
_swap32((unsigned int &)header.data.data_len);
return 1;
}
else
{
uint len, i;
uint temp;
// unknown block
// read length
fread(&len, sizeof(len), 1, fptr);
// scan through the block
for (i = 0; i < len; i ++)
{
fread(&temp, 1, 1, fptr);
if (feof(fptr)) return -1; // unexpected eof
}
}
return 0;
}
int WavInFile::readWavHeaders()
{
int res;
memset(&header, 0, sizeof(header));
res = readRIFFBlock();
if (res) return 1;
// read header blocks until data block is found
do
{
// read header blocks
res = readHeaderBlock();
if (res < 0) return 1; // error in file structure
} while (res == 0);
// check that all required tags are legal
return checkCharTags();
}
uint WavInFile::getNumChannels() const
{
return header.format.channel_number;
}
uint WavInFile::getNumBits() const
{
return header.format.bits_per_sample;
}
uint WavInFile::getBytesPerSample() const
{
return getNumChannels() * getNumBits() / 8;
}
uint WavInFile::getSampleRate() const
{
return header.format.sample_rate;
}
uint WavInFile::getDataSizeInBytes() const
{
return header.data.data_len;
}
uint WavInFile::getNumSamples() const
{
return header.data.data_len / header.format.byte_per_sample;
}
uint WavInFile::getLengthMS() const
{
uint numSamples;
uint sampleRate;
numSamples = getNumSamples();
sampleRate = getSampleRate();
assert(numSamples < UINT_MAX / 1000);
return (1000 * numSamples / sampleRate);
}
//////////////////////////////////////////////////////////////////////////////
//
// Class WavOutFile
//
WavOutFile::WavOutFile(const char *fileName, int sampleRate, int bits, int channels)
{
bytesWritten = 0;
fptr = fopen(fileName, "wb");
if (fptr == NULL)
{
string msg = "Error : Unable to open file \"";
msg += fileName;
msg += "\" for writing.";
//pmsg = msg.c_str;
throw runtime_error(msg);
}
fillInHeader(sampleRate, bits, channels);
writeHeader();
}
WavOutFile::~WavOutFile()
{
close();
}
void WavOutFile::fillInHeader(uint sampleRate, uint bits, uint channels)
{
// fill in the 'riff' part..
// copy string 'RIFF' to riff_char
memcpy(&(header.riff.riff_char), riffStr, 4);
// package_len unknown so far
header.riff.package_len = 0;
// copy string 'WAVE' to wave
memcpy(&(header.riff.wave), waveStr, 4);
// fill in the 'format' part..
// copy string 'fmt ' to fmt
memcpy(&(header.format.fmt), fmtStr, 4);
header.format.format_len = 0x10;
header.format.fixed = 1;
header.format.channel_number = (short)channels;
header.format.sample_rate = sampleRate;
header.format.bits_per_sample = (short)bits;
header.format.byte_per_sample = (short)(bits * channels / 8);
header.format.byte_rate = header.format.byte_per_sample * sampleRate;
header.format.sample_rate = sampleRate;
// fill in the 'data' part..
// copy string 'data' to data_field
memcpy(&(header.data.data_field), dataStr, 4);
// data_len unknown so far
header.data.data_len = 0;
}
void WavOutFile::finishHeader()
{
// supplement the file length into the header structure
header.riff.package_len = bytesWritten + 36;
header.data.data_len = bytesWritten;
writeHeader();
}
void WavOutFile::writeHeader()
{
WavHeader hdrTemp;
// swap byte order if necessary
hdrTemp = header;
_swap32((unsigned int &)hdrTemp.riff.package_len);
_swap32((unsigned int &)hdrTemp.format.format_len);
_swap16((unsigned short &)hdrTemp.format.fixed);
_swap16((unsigned short &)hdrTemp.format.channel_number);
_swap32((unsigned int &)hdrTemp.format.sample_rate);
_swap32((unsigned int &)hdrTemp.format.byte_rate);
_swap16((unsigned short &)hdrTemp.format.byte_per_sample);
_swap16((unsigned short &)hdrTemp.format.bits_per_sample);
_swap32((unsigned int &)hdrTemp.data.data_len);
// write the supplemented header in the beginning of the file
fseek(fptr, 0, SEEK_SET);
fwrite(&hdrTemp, sizeof(hdrTemp), 1, fptr);
// jump back to the end of the file
fseek(fptr, 0, SEEK_END);
}
void WavOutFile::close()
{
finishHeader();
fclose(fptr);
fptr = NULL;
}
void WavOutFile::write(const char *buffer, int numElems)
{
int res;
if (header.format.bits_per_sample != 8)
{
throw runtime_error("Error: WavOutFile::write(const char*, int) accepts only 8bit samples.");
}
assert(sizeof(char) == 1);
res = fwrite(buffer, 1, numElems, fptr);
if (res != numElems)
{
throw runtime_error("Error while writing to a wav file.");
}
bytesWritten += numElems;
}
void WavOutFile::write(const short *buffer, int numElems)
{
int res;
// 16 bit samples
if (numElems < 1) return; // nothing to do
if (header.format.bits_per_sample == 8)
{
int i;
char *temp = new char[numElems];
// convert from 16bit format to 8bit format
for (i = 0; i < numElems; i ++)
{
temp[i] = buffer[i] >> 8;
}
// write in 8bit format
write(temp, numElems);
delete[] temp;
}
else
{
// 16bit format
unsigned short *pTemp = new unsigned short[numElems];
assert(header.format.bits_per_sample == 16);
// allocate temp buffer to swap byte order if necessary
memcpy(pTemp, buffer, numElems * 2);
_swap16Buffer(pTemp, numElems);
res = fwrite(pTemp, 2, numElems, fptr);
delete[] pTemp;
if (res != numElems)
{
throw runtime_error("Error while writing to a wav file.");
}
bytesWritten += 2 * numElems;
}
}
void WavOutFile::write(const float *buffer, int numElems)
{
int i;
short *temp = new short[numElems];
int iTemp;
// convert to 16 bit integer
for (i = 0; i < numElems; i ++)
{
// convert to integer
iTemp = (int)(32768.0f * buffer[i]);
// saturate
if (iTemp < -32768) iTemp = -32768;
if (iTemp > 32767) iTemp = 32767;
temp[i] = (short)iTemp;
}
write(temp, numElems);
delete[] temp;
}
Simple SOLA algorithm Main.cpp
/////////////////////////////////////////////////////////////////////
//
// Simple SOLA algorithm example. The example reads a .wav sound
// file with mono-16bit-44100Hz sample format, process it with SOLA
// and writes output into another .wav file.
//
// Copyright (c) Olli Parviainen 2006 <oparviai@iki.fi>
//
/////////////////////////////////////////////////////////////////////
#include <stdexcept>
#include "wavfile.h"
using namespace std;
// Time scaling factor, values > 1.0 increase, values < 1.0 decrease tempo
#define TIME_SCALE 1 // 15% slower tempo
// Processing sequence size (100 msec with 44100Hz samplerate)
#define SEQUENCE 800//4410
// Overlapping size (20 msec)
#define OVERLAP 160//882
// Best overlap offset seeking window (15 msec)
#define SEEK_WINDOW 120//662
// Processing sequence flat mid-section duration
#define FLAT_DURATION (SEQUENCE - 2 * (OVERLAP))
// Theoretical interval between the processing seqeuences
#define SEQUENCE_SKIP ((int)((SEQUENCE - OVERLAP) * (TIME_SCALE)))
typedef short SAMPLE; // sample type, 16bit signed integer
// Use cross-correlation function to find best overlapping offset
// where input_prev and input_new match best with each other
int seek_best_overlap(const SAMPLE *input_prev, const SAMPLE *input_new)
{
int i;
int bestoffset = 0;
float bestcorr = -1e30f;
float temp[OVERLAP];
// Precalculate overlapping slopes with input_prev
for (i = 0; i < OVERLAP; i ++)
{
temp[i] = (float)(input_prev[i] * i * (OVERLAP - i));
}
// Find best overlap offset within [0..SEEK_WINDOW]
for (i = 0; i < SEEK_WINDOW; i ++)
{
int j;
float crosscorr = 0;
for (j = 0; j < OVERLAP; j ++)
{
crosscorr += (float)input_new[i + j] * temp[j];
}
if (crosscorr > bestcorr)
{
// found new best offset candidate
bestcorr = crosscorr;
bestoffset = i;
}
}
return bestoffset;
}
// Overlap 'input_prev' with 'input_new' by sliding the amplitudes during
// OVERLAP samples. Store result to 'output'.
void overlap(SAMPLE *output, const SAMPLE *input_prev, const SAMPLE *input_new)
{
int i;
for (i = 0; i < OVERLAP; i ++)
{
output[i] = (input_prev[i] * (OVERLAP - i) + input_new[i] * i) / OVERLAP;
}
}
// SOLA algorithm. Performs time scaling for sample data given in 'input',
// write result to 'output'. Return number of output samples.
int sola(SAMPLE *output, const SAMPLE *input, int num_in_samples)
{
int num_out_samples = 0;
const SAMPLE *seq_offset = input;
const SAMPLE *prev_offset;
int nTest = SEQUENCE_SKIP;
while (num_in_samples > SEQUENCE_SKIP + SEEK_WINDOW)
{
// copy flat mid-sequence from current processing sequence to output
memcpy(output, seq_offset, FLAT_DURATION * sizeof(SAMPLE));
// calculate a pointer to overlap at end of the processing sequence
prev_offset = seq_offset + FLAT_DURATION;
// update input pointer to theoretical next processing sequence begin
input += SEQUENCE_SKIP - OVERLAP;
// seek actual best matching offset using cross-correlation
seq_offset = input + seek_best_overlap(prev_offset, input);
// do overlapping between previous & new sequence, copy result to output
overlap(output + FLAT_DURATION, prev_offset, seq_offset);
// Update input & sequence pointers by overlapping amount
seq_offset += OVERLAP;
input += OVERLAP;
// Update output pointer & sample counters
output += SEQUENCE - OVERLAP;
num_out_samples += SEQUENCE - OVERLAP;
num_in_samples -= SEQUENCE_SKIP;
}
return num_out_samples;
}
// Buffers for input/output sample data. For sake of simplicity, these are
// just made 'big enough' for the example purpose.
SAMPLE inbuffer[10240000];
SAMPLE outbuffer[20240000];
int main(int numstr, char **pstr)
{
if (numstr < 3)
{
printf("usage: solatest input.wav output.wav\n");
return -1;
}
try
{
int insamples, outsamples;
// Open input file
WavInFile infile(pstr[1]);
if ((infile.getSampleRate() != 44100) || (infile.getNumChannels() != 1))
{
printf("Sorry, this example processes mono audio sampled at 44100Hz.\n");
return -1;
}
// Read data from input file
insamples = infile.read(inbuffer, 10240000);
// Process
outsamples = sola(outbuffer, inbuffer, insamples);
// Write result to output file
WavOutFile outfile(pstr[2], infile.getSampleRate(), infile.getNumBits(), infile.getNumChannels());
outfile.write(outbuffer, outsamples);
}
catch (exception &e)
{
printf("Error: %s\n", e.what());
}
return 0;
}
SOLA.M MATHLAB
sa=585;ss=438; %ÕâÊÇ?µµ?µÄ
w=512;
wov=w-ss;kmax=500;
x=wavread('v017');
%*********** time scaling **************
xst=1;yout=[];
xbuff=x(sa:sa+w-1);
st=sa:sa:length(x); %?ªÊ?Ê?ÓïÒô?Î?ÓSa???ªÊ?ÊäÈë,ÒÔºóÃ??ÎÏòºóÒÆ??Sa?öµã
r=mod(length(x),sa);
num=(length(x)-r)/sa; %?Ü??Òª?øÐÐnumÂÖ?Ù??
x=[x; zeros(w+kmax,1)];%ÓïÒôÎ??ÎÐèÒª??0??ÒòΪ?ÖÎö???ÚW?áÒÆ????ÇÒ?î?óÒÆ??kmax?öµã
for j=1:(num)
y=x(xst:1:xst+w-1); %?ªÊ?Ê?ÓïÒô?Î?ÓSa??
start=st(j):st(j)+kmax-1; %?ÖÎö???ÚµÄÆðµã???î?àÒÆ??kmax?öµã
cy=y(end:-1:end-wov+1); %È??öÊä?öÐòÁÐyµÄºówov?öµã
cy=cy(end:-1:1);
km_buf=zeros(1,kmax); %ÓÃÀ??ÇÂ?kmax?ö??Ïà?ØϵÊý
for i=1:kmax
xbuff=x(start(i):start(i)+w-1); %?ÖÎö???ÚËù?ØÈ?µÄw?öµã
cx=xbuff(1:wov); %È??öÇ?wov?öµã
rxx_k=sum(cx.^2);
rxy_k=sum(cx.*cy);
if ( rxx_k==0) %ÈôΪÁã???íÊ?Òѵ?ÓïÒôÎ??Î?Õ?Å??0µÄ???Ö??ÔòÍ?Ö?
kmbuf(i)=0;
break;
else
km_buf(i)=(rxy_k.^2)./rxx_k;
end
end
km=find(km_buf==max(km_buf)); %ÕÒ?ö?î?óµÄ??Ïà?ØϵÊýÔÚkm_bufµÄÎ?ÖÃ
yout=[yout; x(start(km)+wov:start(km)+w+1)]; %?ÑÒ?ÖÂÐÔ?îºÃµÄÐòÁеÄSs?öµã??Ϊ?îºóÊä?ö
xst=xst+sa;
end
%************?ä?ÉÑùÂÊ************************
L=sa;M=ss;
data=[];
data_out=[];
y_end=length(yout);
y_st=0;
for j=1:L;
for i=1:M;
y_st=y_st+1;
if y_st<y_end
invert=linspace(yout(y_st),yout(y_st+1),L+1);%ÔÚÃ?Á??öµãÖ??äÏßÐÎ?åÈëL-1?öµã
elseif y_st==y_end
invert=linspace(yout(y_st),0,L+1); %ÈôÊÇ?îºóÒ??öµã??ÔòËüÓëÁãÖ??ä?åÈëL-1?öµã
else
break;
end
data=[data invert(1:end-1)]; %Ã??Î?ÑL?öµãÊä?öµ?data
end
data_out=[data_out data(1:M:end)]; %ÔÚdataÖÐÃ??ôM?öµãÈ??öÒ??öµã????Êä?öµ?data_out
data=[]; %?ÑdataµÄÄÚÈÝÇå?ý
end
Pitch Synchronous Overlap Add Method (PSOLA.CPP)
PSOLA.CPP
#include "../common/tdpsola.h"
#include "psola.h"
CPSOLA instance;
void PSOLA_EnableCosineSmooth(bool enable)
{
instance.enableCosineSmooth(enable);
}
void PSOLA_SetSpectralMapping(bool useBezier, int x1, int y1, int x2, int y2)
{
instance.setSpectralMapping(useBezier, x1, y1, x2, y2);
}
bool PSOLA_IsCosineSmoothEnabled()
{
return instance.isCosineSmoothEnabled();
}
void PSOLA_EnableVoicelessExtension(int method)
{
instance.setVoicelessExtension(method);
}
int PSOLA_GetVoicelessExtension()
{
return instance.getVoicelessExtension();
}
unsigned PSOLA_ModifyPitchContour(
const short * srcWave,
unsigned srcLength,
const unsigned *srcTags,
unsigned tagNumber,
const unsigned *trgPeriods,
unsigned periodNumber,
unsigned trgDuration,
float specRatio,
short * trgWave,
unsigned trgBufferLength,
unsigned sampleRate)
{
return instance.modifyPitchContour(srcWave, srcLength, srcTags, tagNumber, trgWave, trgBufferLength, trgPeriods, periodNumber, trgDuration, specRatio, sampleRate);
}
unsigned PSOLA_Modify(
const short * srcWave,
unsigned srcLength,
const unsigned *srcTags,
unsigned tagNumber,
unsigned trgPitch,
unsigned trgDuration,
float specRatio,
short * trgWave,
unsigned trgBufferLength,
unsigned sampleRate)
{
return instance.modify(srcWave, srcLength, srcTags, tagNumber, trgWave, trgBufferLength, trgPitch, trgDuration, specRatio, sampleRate);
}
unsigned PSOLA_ModifyRatio(
const short * srcWave,
unsigned srcLength,
const unsigned * srcTags,
unsigned tagNumber,
float pitchRatio,
float durationRatio,
float specRatio,
short * trgWave,
unsigned trgBufferLength,
unsigned sampleRate
)
{
return instance.modifyRatio(srcWave, srcLength, srcTags, tagNumber, trgWave, trgBufferLength, pitchRatio, durationRatio, specRatio, sampleRate);
}
PSOLA.H
///
/// Modify wave using TP-PSOLA algorithm
///
/// @version 1.0.0
/// @author Jun Xu
/// @date 2007/07/18
///
#ifndef _CST_PSOLA_PSOLA_H_
#define _CST_PSOLA_PSOLA_H_
#ifndef PSOLA_EXPORTS
# define PSOLA_DLL_EXPORTS __declspec(dllimport)
# ifdef _DEBUG
# pragma comment(lib, "psolad.lib")
# pragma message("Linking with psolad.dll")
# else
# pragma comment(lib, "psola.lib")
# pragma message("Linking with psola.dll")
# endif
#else
# define PSOLA_DLL_EXPORTS __declspec(dllexport)
#endif
#ifdef _cplusplus
extern "C" {
#endif
#define PSOLA_VLPPMETHOD_NONE 0 ///< ÇåÒô¶Î²»×ö»ùƵÀ©Õ¹
#define PSOLA_VLPPMETHOD_FIXED 1 ///< ÇåÒô¶Î×ö¹Ì¶¨ÖÜÆڵĻùƵÀ©Õ¹
#define PSOLA_VLPPMETHOD_EQUAL 2 ///< ÇåÒô¶Î×öÓëµÚÒ»¸öÖÜÆÚÏàµÈµÄµÈÖÜÆÚÀ©Õ¹
#define PSOLA_VLPPMETHOD_PEAK 3 ///< ÇåÒô¶Î¸ù¾ÝÓïÒô¼â·åµãÀ´×öÖÜÆÚÀ©Õ¹
#define PSOLA_VLPPMETHOD_AUTO 4 ///< ×Ô¶¯×öÇåÒôÀ©Õ¹£¨¸ù¾Ýʱ³¤Ð޸ıÈÀý¾ö¶¨£©
#define PSOLA_VLPPMETHOD_MAX 4
///
/// ÉèÖÃÐ޸Ĺý³ÌÖеÄƵÆ×Ó³É䷽ʽ£¬Ð§¹û²»ºÃ£¬É÷ÓÃ
///
/// @param useBezier true:ʹÓñ´Èû¶ûÇúÏß,false:ʹÓÃÕÛÏß
/// @param x1,y1 µÚÒ»¸ö¿ØÖƵãµÄ×ø±ê
/// @param x2,y2 µÚ¶þ¸ö¿ØÖƵãµÄ×ø±ê
///
PSOLA_DLL_EXPORTS void PSOLA_SetSpectralMapping(bool useBezier, int x1, int y1, int x2, int y2);
///
/// ÉèÖÃÇåÒô¶ÎÖÜÆÚÀ©Õ¹·½Ê½
///
/// @param method 0-3£¬²Î¼ûÉÏÃæµÄºê¶¨Òå
///
PSOLA_DLL_EXPORTS void PSOLA_EnableVoicelessExtension(int method);
/// »ñÈ¡ÇåÒô¶ÎÖÜÆÚÀ©Õ¹·½Ê½
PSOLA_DLL_EXPORTS int PSOLA_GetVoicelessExtension();
///
/// ÆôÓÃÓàÏÒº¯Êý½øÐÐÆ´½Ó±ß½çƽ»¬
///
/// @param enable true:ÆôÓÃ,false:²»ÆôÓÃ
///
PSOLA_DLL_EXPORTS void PSOLA_EnableCosineSmooth(bool enable);
/// ÅжÏÓàÏұ߽çƽ»¬ÊÇ·ñ±»ÆôÓÃ
PSOLA_DLL_EXPORTS bool PSOLA_IsCosineSmoothEnabled();
///
/// Modify wave using PSOLA model
/// ʹÓÃPSOLAÄ£ÐͽøÐÐÓïÒôÐ޸ģ¬Ö¸¶¨Ä¿±êµÄƽ¾ù»ùƵÖÜÆÚÒÔ¼°ÓïÒô³¤¶È
///
/// @param srcWave[in] wave buffer read from speech database
/// ÓïÒôÊý¾Ý£¬±ØÐëΪ16bit²ÉÑù¾«¶È
/// @param srcLength[in] wave buffer length, in short count
/// ÓïÒôÊý¾ÝµÄ²ÉÑùµã¸öÊý
/// @param srcTags[in] peak tags read from speech database
/// each tag indicate the peak position offset to the first sample of wave
/// ÓïÒôÊý¾ÝµÄ·åÖµµã±ê×¢Êý×é
/// ÄÚ²¿±£´æÿ¸ö·åÖµµãÏà¶ÔÓïÒôÆðʼµãµÄÆ«ÒÆλÖÃ
/// @param tagNumber[in] peak tag count of srcTags
/// ·åÖµ±ê×¢¸öÊý
/// @param trgPitch[in] predicted average pitch period
/// trgPitch=0 means keeping pitch no change
/// Ä¿±ê»ùƵÖÜÆڵĴóС£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
/// @param trgDuration[in] predicted wave duration, in short
/// Ä¿±êÓïÒô²ÉÑùµã¸öÊý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
/// @param specRatio [in] modification ratio of spectra
/// ƵÆ×Ð޸ıÈÀý£¬0Ϊ²»ÐÞ¸Ä
/// @param trgWave[out] modified wave, buffer should be allocated outside
/// Ä¿±êÓïÒôÊý¾Ý»º³åÇø£¬ÓÉÍⲿ·ÖÅ䣬Îñ±Ø±ÈtrgDurationÒª´óһЩ
/// @param sampleRate[in] Sample count per second, default is 16000
/// ²ÉÑùÂÊ£¬Ò»°ãÇëʹÓÃ16000
///
/// @return true if modified successfully
/// false if not, then the content of trgWave if un-defined
///
PSOLA_DLL_EXPORTS unsigned PSOLA_Modify(
const short * srcWave,
unsigned srcLength,
const unsigned *srcTags,
unsigned tagNumber,
unsigned trgPitch,
unsigned trgDuration,
float specRatio,
short * trgWave,
unsigned trgBufferLength,
unsigned sampleRate);
///
/// Modify wave using PSOLA model
/// ʹÓÃPSOLAÄ£ÐͽøÐÐÓïÒôÐ޸ģ¬Ö¸¶¨Ä¿±ê»ùƵÇúÏß
///
/// @param srcWave[in] wave buffer read from speech database
/// ÓïÒôÊý¾Ý£¬±ØÐëΪ16bit²ÉÑù¾«¶È
/// @param srcLength[in] wave buffer length, in short count
/// ÓïÒôÊý¾ÝµÄ²ÉÑùµã¸öÊý
/// @param srcTags[in] peak tags read from speech database
/// each tag indicate the peak position offset to the first sample of wave
/// ÓïÒôÊý¾ÝµÄ·åÖµµã±ê×¢Êý×é
/// ÄÚ²¿±£´æÿ¸ö·åÖµµãÏà¶ÔÓïÒôÆðʼµãµÄÆ«ÒÆλÖÃ
/// @param tagNumber[in] peak tag count of srcTags
/// ·åÖµ±ê×¢¸öÊý
/// @param trgPeriods[in] predicted pitch period
/// Ä¿±ê»ùƵÖÜÆÚÊý×é
/// @param periodNumber[in] pitch period count of target
/// Ä¿±ê»ùƵÖÜÆÚÊýÄ¿
/// @param trgDuration[in] predicted wave duration, in short
/// Ä¿±êÓïÒô²ÉÑùµã¸öÊý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
/// @param specRatio [in] modification ratio of spectra
/// ƵÆ×Ð޸ıÈÀý£¬0Ϊ²»ÐÞ¸Ä
/// @param trgWave[out] modified wave, buffer should be allocated outside
/// Ä¿±êÓïÒôÊý¾Ý»º³åÇø£¬ÓÉÍⲿ·ÖÅ䣬Îñ±Ø±ÈtrgDurationÒª´óһЩ
/// @param sampleRate[in] Sample count per second, default is 16000
/// ²ÉÑùÂÊ£¬Ò»°ãÇëʹÓÃ16000
///
/// @return true if modified successfully
/// false if not, then the content of trgWave if un-defined
///
PSOLA_DLL_EXPORTS unsigned PSOLA_ModifyPitchContour(
const short * srcWave,
unsigned srcLength,
const unsigned *srcTags,
unsigned tagNumber,
const unsigned *trgPeriods,
unsigned periodNumber,
unsigned trgDuration,
float specRatio,
short * trgWave,
unsigned trgBufferLength,
unsigned sampleRate);
///
/// Modify wave using PSOLA model
/// ʹÓÃPSOLAÄ£ÐͽøÐÐÓïÒôÐ޸ģ¬Ö¸¶¨ÖÜÆÚ£¬Ê±³¤µÄÐ޸ıÈÀý
///
/// @param srcWave[in] wave buffer read from speech database
/// ÓïÒôÊý¾Ý£¬±ØÐëΪ16bit²ÉÑù¾«¶È
/// @param srcLength[in] wave buffer length, in short count
/// ÓïÒôÊý¾ÝµÄ²ÉÑùµã¸öÊý
/// @param srcTags[in] peak tags read from speech database
/// each tag indicate the peak position offset to the first sample of wave
/// ÓïÒôÊý¾ÝµÄ·åÖµµã±ê×¢Êý×é
/// ÄÚ²¿±£´æÿ¸ö·åÖµµãÏà¶ÔÓïÒôÆðʼµãµÄÆ«ÒÆλÖÃ
/// @param tagNumber[in] peak tag count of srcTags
/// ·åÖµ±ê×¢¸öÊý
/// @param pitchRatio[in] modification ratio of pitch
/// Ä¿±ê»ùƵÖÜÆÚÐ޸ıÈÀý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
/// @param durationRatio[in]modification ratio of duration
/// Ä¿±êÓïÒôʱ³¤Ð޸ıÈÀý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
/// @param specRatio [in] modification ratio of spectra
/// ƵÆ×Ð޸ıÈÀý£¬0Ϊ²»ÐÞ¸Ä
/// @param trgWave[out] modified wave, buffer should be allocated outside
/// Ä¿±êÓïÒôÊý¾Ý»º³åÇø£¬ÓÉÍⲿ·ÖÅ䣬Îñ±Ø±ÈtrgDurationÒª´óһЩ
/// @param sampleRate[in] Sample count per second, default is 16000
/// ²ÉÑùÂÊ£¬Ò»°ãÇëʹÓÃ16000
///
/// @return true if modified successfully
/// false if not, then the content of trgWave if un-defined
///
PSOLA_DLL_EXPORTS unsigned PSOLA_ModifyRatio(
const short * srcWave,
unsigned srcLength,
const unsigned *srcTags,
unsigned tagNumber,
float pitchRatio,
float durationRatio,
float specRatio,
short * trgWave,
unsigned trgBufferLength,
unsigned sampleRate);
#ifdef _cplusplus
}
#endif
#endif
PSOLA. h Header
#ifndef PSOLA_H_
#define PSOLA_H_
#include <vector>
#include "DSP.h"
using namespace std;
class CPsola{
public:
CPsola();
CPsola(short*,unsigned);
void SetData(short*,unsigned);
void SetAmplitudeMultiple(float);
void SetDuration(float);
void SetPitch(float*,unsigned,float);
void SetNewPitch(float*,unsigned);
void SetSampleFrequency(unsigned);
void SetFrameLength(float);
void SetX1(float);
void Adjust();
void TD_PSOLA(float,float);
void PSOLA(float,float,bool);
unsigned GetNewLen();
short* GetNewData();
~CPsola();
private:
unsigned FindMax(unsigned,unsigned,short*);
int Approximate(float);
short Middle(unsigned,short*);
bool MarkPitch();
void MarkOneFrame(unsigned,unsigned);
void AdjustAmplitude();
void AdjustDuration();
void AdjustPitch();
void Smooth(short*,unsigned);
private:
unsigned m_uSamFre;
float m_dFrameLen;
float m_dX1;
float m_dAmpMul;
float m_dDuration;
unsigned m_uPitchLen;
float* m_dPitch;
float* m_dNewPitch;
unsigned m_uDataLen;
;
// unsigned m_uNewPitchLen;
short* m_Data;
bool* flag;
short* m_InData;
CDSP m_filter;
void GetPitchMarks(vector<unsigned>&);
bool IsVowel(unsigned);
int GetAvgPitchLen(vector<unsigned>&,int&);
void GetFinal(vector<unsigned>&,vector<unsigned>&,
int,vector<int>&,vector<vector<unsigned> >&);
void GetUseds(int,int,int,vector<int>&);
void smooth(short*,unsigned,vector<float>&);
void OverlapAdd(vector<vector<unsigned> >& final, short* y, unsigned ylen,
vector<float>& w, float* pBeta = NULL);
public:
void PSOLA(float,float*,int,float);
};
#endif
DSP.h Digital Signal Processing C++ Header
#pragma once
#include <vector>
#include <cmath>
#include <complex>
#include <iostream>
#include <algorithm>
using namespace std;
#define PI 3.1415926
#define FLOAT_MAX (float)1.0e37
class CDSP
{
public:
CDSP(void);
~CDSP(void);
void FFT(const vector<short>& s, vector<complex<float> >& spec, bool invert=false);
// void FFT (float *x, float *y, int n, int m);
float AutoCorrelate(const vector<short>& s, vector<float>& r, int p);
void Wave2LPC(const vector<short>& s, vector<float>& a, vector<float>& k, int p, float *re, float *te);
void LPC2RefC(const vector<float>& a, vector<float>& k);
void RefC2LPC(const vector<float>& k, vector<float>& a);
void LPC2Cepstrum(const vector<float>& a, vector<float>& c);
void Cepstrum2LPC(const vector<float>& c, vector<float>& a);
void SpecModulus(const vector<complex<float> >& spec, vector<float>& m);
void LSF2LPC(const vector<float>& lsf, vector<float>& a_coef);
void LPC2LSF(const vector<float>& a, vector<float>& lsf);
void window(vector<float>& wgt, int len, string type);
void LPCSpec(const vector<float>& a, int len, float G, vector<complex<float> >& spec);
float GetG(const vector<short>& s, const vector<float>& a);
void ExcitationSpec(const vector<complex<float> >& WavSpec, const vector<complex<float> >& LpcSpec, vector<complex<float> >& ExcSpec);
private:
float Durbin(vector<float>& k, vector<float>& a, vector<float>& r, float E, int p);
// void cheby(vector<float>& g, int order);
// void cacm283(vector<float>& a,int order,vector<float>& r);
template <typename T> inline void swap(T& a,T& b) { T temp=a; a = b; b = temp; }
};
PSOLA.CPP TD-Psola TTS C++ Code
///////////////////////////////////////////////////////////////////////
// CopyRight : Copyright (c) 2004, IRLab, All rights reserved
// File Name : Psola.cpp
// File Summary : This file provide the modules to adjust the wav
// with TD-Psola
// Author : elevens
// Create Time : 2004/8/25
// Project Name : TTS
// Version : 1.0
// Histroy :
////////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include <fstream>
#include <iostream>
#include <assert.h>
#include <algorithm>
#include <string>
#include <math.h>
#include <vector>
#include "Psola.h"
#include ".\psola.h"
//#include ".\psola.h"
#define __DEBUG
using namespace std;
#ifdef __DEBUG
ofstream out("out.txt");
#endif
///////////////////////////////////////////////////////////////////////
// Function Name : CPsola
// Function func : constructor of CPsola object, used to init members of the class
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
CPsola::CPsola()
:m_dPitch(NULL),m_dNewPitch(NULL),flag(NULL),m_Data(NULL),m_InData(NULL)
{
m_uSamFre = 16000;
m_dFrameLen = 0.02;
m_dX1 = 0.02;
m_dAmpMul = -1;
m_dDuration = -1;
m_uPitchLen = 0;
}
///////////////////////////////////////////////////////////////////////
// Function Name : CPsola
// Function func : constructor of CPsola object, used to init members of the class
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : _Data: wav data
// _DataLen: the length of the data
// output :
////////////////////////////////////////////////////////////////////////
CPsola::CPsola(short* _Data,unsigned _DataLen)
:m_Data(_Data),m_uDataLen(_DataLen),m_dPitch(NULL),m_dNewPitch(NULL),flag(NULL),m_InData(_Data)
{
m_uSamFre = 16000;
m_dFrameLen = 0.02;
m_dX1 = 0.02;
m_dAmpMul = -1;
m_dDuration = -1;
m_uPitchLen = 0;
// m_uNewPitchLen = 0;
}
///////////////////////////////////////////////////////////////////////
// Function Name : FindMax
// Function func : find the max value of data between begin and end
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : begin: which position ti begin whith
// end: the end position
// data: the array of wav data
// output : the position of the max value
////////////////////////////////////////////////////////////////////////
unsigned CPsola::FindMax(unsigned begin,unsigned end,short* data)
{
unsigned k,position;
short max = -32768;
for(k=begin; k<=end; ++k)
{
if(data[k] > max)
{
max = data[k];
position = k;
}
}
return position;
}
///////////////////////////////////////////////////////////////////////
// Function Name : Approximate
// Function func : Change a float to a int using the round rule
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the float value
// output : the int value
////////////////////////////////////////////////////////////////////////
int CPsola::Approximate(float temp)
{
int value = static_cast<int>(temp);
float decimal = temp - value;
if( (decimal > -0.5) && (decimal < 0.5) )
return value;
else if(decimal < 0)
return (value - 1);
else
return (value + 1);
}
///////////////////////////////////////////////////////////////////////
// Function Name : Middle
// Function func : Find the middle value of five numbers
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : mid: the position to start with
// data: the wav data
// output : the middle value
////////////////////////////////////////////////////////////////////////
short CPsola::Middle(unsigned mid,short* data)
{
short k,max1,max2,max3;
max1 = max2 = max3 = -32768;
for(k=1; k<=4; k++)
{
if(data[mid+k] > max1)
{
max3 = max2;
max2 = max1;
max1 = data[mid+k];
}
else if(data[mid+k] > max2)
{
max3 = max2;
max2 = data[mid+k];
}
else if(data[mid+k] > max3)
{
max3 = data[mid+k];
}
}
return max3;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetData
// Function func : Set the Data set
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : data: a pointer to the data
// length: the length of the data set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetData(short* data,unsigned length)
{
if(flag != NULL)
delete [] flag;
if((m_Data != m_InData) && (m_Data != NULL))
delete [] m_Data;
flag = NULL;
m_Data = data;
m_uDataLen = length;
m_InData = m_Data;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetAmplitudeMultiple
// Function func : Set the Mutiple of the Amplitude
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the Mutiple to be set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetAmplitudeMultiple(float temp)
{
if(temp >= 0)
m_dAmpMul = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetDuration
// Function func : Set the Duration to adjust to
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the Duration to be set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetDuration(float temp)
{
if(temp >= 0)
m_dDuration = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetFrameLength
// Function func : Set the length of one frame
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the length of frame to be set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetFrameLength(float temp)
{
if(temp >= 0)
m_dFrameLen = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetSampleFrequency
// Function func : Set the sample frequency
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the sample frequency to be set
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetSampleFrequency(unsigned temp)
{
m_uSamFre = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetX1
// Function func : Set the length of the first frame
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : temp: the length of the first frame
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetX1(float temp)
{
if(temp >= 0)
m_dX1 = temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetPitch
// Function func : Set the pitches of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : pitch: the array of the pitches
// size: the length of the pitches
// X1: the length of the first frame
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetPitch(float* pitch,unsigned size,float X1)
{
m_dPitch = pitch;
m_uPitchLen = size;
if(X1 >= 0)
m_dX1 = X1;
if(!MarkPitch())
return;
}
///////////////////////////////////////////////////////////////////////
// Function Name : SetNewPitch
// Function func : Set the pitches of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : pitch: the array of the pitches
// size: the length of the pitches
// X1: the length of the first frame
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::SetNewPitch(float* newpitch,unsigned newsize)
{
if(newsize != m_uPitchLen)
return;
m_dNewPitch = newpitch;
}
///////////////////////////////////////////////////////////////////////
// Function Name : MarkPitch
// Function func : Mark pitches on the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output : if the array of pitches is longer, return false
////////////////////////////////////////////////////////////////////////
bool CPsola::MarkPitch()
{
if( (m_dX1+(m_uPitchLen-2)*m_dFrameLen)*m_uSamFre >= m_uDataLen)
{
cout<<"error in markpitch"<<endl;
return false;
}
if(flag!=NULL)
{
delete [] flag;
flag = NULL;
}
flag = new bool[m_uDataLen];
memset(flag,0,m_uDataLen*sizeof(bool));
flag[0] = 1;
unsigned first = static_cast<unsigned>(m_dX1*m_uSamFre);
unsigned Pmax =0;
unsigned begin = 0;
unsigned end = 0;
for(unsigned i=0 ;i<m_uPitchLen ;++i)
{
// cout<<i<<endl;
if(i == 0)
begin = 0;
else
begin = static_cast<unsigned>((i-1)*m_dFrameLen*m_uSamFre) + first;
if(m_dPitch[i] > 0)
{
end = static_cast<unsigned>(i*m_dFrameLen*m_uSamFre) + first;
if(i == m_uPitchLen-1)
end = m_uDataLen - 1;
Pmax = FindMax(begin,end,m_Data);
// cout<<"Pmax "<<Pmax<<endl;
flag[Pmax] = 1;
MarkOneFrame(i,Pmax);
}
else
{
Pmax = begin + static_cast<unsigned>(m_dFrameLen*m_uSamFre/2);
if(Pmax < m_uDataLen-1)
flag[Pmax] = 1;
// cout<<Pmax<<endl;
Pmax += static_cast<unsigned>(m_dFrameLen*m_uSamFre/2);
// cout<<Pmax<<endl;
if(Pmax < m_uDataLen-1)
flag[Pmax] = 1;
}
}
// for(int i=0 ;i<m_uDataLen ;++i)
// if(flag[i])
// out<<i<<endl;
return true;
}
///////////////////////////////////////////////////////////////////////
// Function Name : MarkOneFrame
// Function func : Mark pitches on one frame
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : j: the jth Frame
// Pmax: the position of the max value
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::MarkOneFrame(unsigned j,unsigned Pmax)
{
unsigned first = static_cast<unsigned>(m_dX1*m_uSamFre);
if(m_dPitch[j] < 0)
return;
float T = 1/m_dPitch[j];
int p,p1,p2,i;
p2 = p1 = Pmax;
p = 0;
int temp;
int end = (j == (m_uPitchLen-1) )?static_cast<int>(m_uDataLen-1-T*m_uSamFre):static_cast<int>(j*m_dFrameLen*m_uSamFre+first-T*m_uSamFre);
while(p1 <= (end - 5) )
{
p = FindMax(static_cast<unsigned>(p1+T*m_uSamFre-5),static_cast<unsigned>(p1+T*m_uSamFre+5),m_Data);
flag[p] = 1;
p1 = p;
}
if (p!=0 && p-end<=5 && p+T*m_uSamFre+5<m_uDataLen )
{
p = FindMax(static_cast<unsigned>(p+T*m_uSamFre-5),static_cast<unsigned>(p+T*m_uSamFre+5),m_Data);
flag[p] = 1;
}
//´Ó×î´ó·åÖµµãÏòÇ°±ê×¢
unsigned begin = (j == 0)?static_cast<unsigned>(T*m_uSamFre):static_cast<unsigned>((j-1)*m_dFrameLen*m_uSamFre+first+T*m_uSamFre);
while(p2 >= (begin + 5) )
{
p = FindMax(static_cast<unsigned>(p2-T*m_uSamFre-5),static_cast<unsigned>(p2-T*m_uSamFre+5),m_Data);
flag[p] = 1;
p2 = p;
}
if (p!=0 && begin-p<=5 && p-T*m_uSamFre-5>0 )
{
p = FindMax(static_cast<unsigned>(p-T*m_uSamFre-5),static_cast<unsigned>(p-T*m_uSamFre+5),m_Data);
flag[p] = 1;
temp = p+20 >= m_uDataLen ? m_uDataLen-1 : p+20;
for( i = p+1 ; i <= temp ; i++ )
{
if(flag[i]==1)
{
if(m_Data[i]>m_Data[p]) flag[p] = 0;
else flag[i] = 0;
break;
}
}
}
}
///////////////////////////////////////////////////////////////////////
// Function Name : Adjust
// Function func : Adjust the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::Adjust()
{
AdjustAmplitude();
AdjustDuration();
// AdjustPitch();
}
///////////////////////////////////////////////////////////////////////
// Function Name : AdjustAmplitude
// Function func : Adjust the Amplitude of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::AdjustAmplitude()
{
if(m_dAmpMul == -1)
return;
// cout<<m_uDataLen<<endl;
for(unsigned i=0 ;i<m_uDataLen ;++i)
{
m_Data[i] = static_cast<short>(m_dAmpMul*m_Data[i]);
}
}
///////////////////////////////////////////////////////////////////////
// Function Name : AdjustDuration
// Function func : Adjust the Duration of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::AdjustDuration()
{
if(m_dDuration == -1)
{
short* NewData = new short[m_uDataLen];
memcpy(NewData,m_Data,m_uDataLen*2);
m_Data = NewData;
return;
}
if(flag == NULL)
return;
float sum = 0;
int nframe = 0;
int nFirstVowel = 0;
for(unsigned i=0 ;i<m_uPitchLen-1 ;++i)
// for(unsigned i=0 ;i<m_uPitchLen ;++i)
{
if(m_dPitch[i] > 0)
{
if( nFirstVowel == 0 )
{
nFirstVowel = i;
continue;
}
sum += m_dPitch[i];
++nframe;
}
}
if(nframe == 0)
return;
short* NewData;
unsigned NewLen = static_cast<unsigned>(m_dDuration*m_uSamFre);
if(NewLen < m_uDataLen)
NewData = new short[m_uDataLen];
else
NewData = new short[NewLen];
memset(NewData,0,NewLen*2);
float avg = sum/nframe; //¸ÃÒô½Ú»ùƵµÄƽ¾ùÖµ
//Ò»¹²ÐèÒªÔö¼Ó»ò¼õÉÙµÄÖÜÆÚ¸öÊý
int p_add = Approximate((static_cast<int>(NewLen) - static_cast<int>(m_uDataLen))*avg/m_uSamFre);
//ÿ¸öÖÜÆÚ¸´ÖƼ¸´Î
int every1 = static_cast<int>(static_cast<float>(p_add)/(sum*m_dFrameLen));
int every2 = every1;
//ÿFrame¼õ¼¸¸öÖÜÆÚ
int every3 = -(p_add/nframe);
int every4 = every3;
unsigned end = 0;
int oldpos = 0;
int n = 1;
unsigned t=0;
int pos = 0; //ÓàÊý
if(p_add > 0)
{
pos = Approximate(p_add - every1*sum*m_dFrameLen);
p_add = p_add + every1 + 1;
}
else
{
pos = p_add + every3*nframe;
p_add = p_add - every3;
}
unsigned uLastFrameLen = static_cast<unsigned>(m_uDataLen - m_dX1*m_uSamFre)%static_cast<unsigned>(0.02*m_uSamFre);
//Ìø¹ýµÚÒ»¸öÔªÒôFrame
unsigned skip = m_dX1*m_uSamFre + nFirstVowel*m_dFrameLen*m_uSamFre;
unsigned k = 0 ;
unsigned pos_start;
if(pos>=0)
pos_start = m_uDataLen - uLastFrameLen - (m_uSamFre/avg)*pos;
else
pos_start = m_uDataLen - uLastFrameLen + (m_uSamFre*m_dFrameLen)*pos;
for(unsigned i=0 ;i<m_uDataLen-uLastFrameLen/*Ìø¹ý×îºóÒ»¸öFrame*/ ;++i)
{
if(flag[i] != 0)
{
if( i < skip || ((i-t) >= 150) || ((i-t) <= 10) )
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
end = end+i-t;
t = i;
}
}
else //×ÇÒô
{
every1 = every2;
if(p_add > 0) //ÑÓ³¤
{
while(every1 >= 0)
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
end = end + i - t;
}
--every1;
}
if(pos != 0 && pos_start-i <= m_uSamFre/avg) //ÓàÊý´ÓºóÃæ¼Ó
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
--pos;
end = end + i - t;
}
--p_add;
}
p_add = p_add - every2;
t=i;
}
//Ëõ¶Ì
else if(p_add < 0)
{
oldpos = Approximate(m_dFrameLen*avg);
if(every3 <= 0)
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
end = end + i - t;
}
++n;
}
else
{
t = i;
--every3;
++p_add;
++n;
}
//¿ØÖÆ´Óÿ֡ÖÐɾ³ý
if(n == oldpos)
{
//´¦Àíp_addµÄÓàÊý²¿·Ö
if(pos <= 0 && i>=pos_start)
{
end = end - (i - t);
++pos;
++p_add;
}
every3 = every4;
n = 1;
}
t = i;
}
}
}
}
//×îºóÒ»¸öFrame
if( end+m_uDataLen-t < NewLen )
{
memcpy(NewData+end,m_Data+t,(m_uDataLen-t)*2);
end += m_uDataLen-t;
}
else if(end < NewLen)
{
memcpy(NewData+end,m_Data+t,(NewLen-end)*2);
end = NewLen;
}
m_Data = NewData;
m_uDataLen = end ;
}
///////////////////////////////////////////////////////////////////////
// Function Name : AdjustPitch
// Function func : Adjust the Pitches of the wav
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::AdjustPitch()
{
if(m_dNewPitch == NULL)
return;
if(flag == NULL)
return;
unsigned NewLen = 0;
unsigned i;
for( i=1 ;i<m_uPitchLen ;++i)
// NewLen += static_cast<unsigned>(((m_dNewPitch[i]/m_dPitch[i]) + 1)*m_dFrameLen*m_uSamFre);
NewLen += static_cast<unsigned>((static_cast<float>(m_dNewPitch[i])/static_cast<float>(m_dPitch[i]) + 1)*m_dFrameLen*m_uSamFre);
unsigned x1 = static_cast<unsigned>(m_dX1*m_uSamFre);
NewLen += x1;
short* NewData = new short[NewLen];
bool* NewFlag = new bool[NewLen];
memset(NewFlag,0,NewLen*sizeof(bool));
unsigned end = 0;
memcpy(NewData,m_Data,x1*sizeof(short));
end += x1;
unsigned oldpos = 0;
unsigned pos = 0;
int t = 0;
int pit = 0;
unsigned win_length = 0;
unsigned spos = 0;
unsigned epos = 0;
short* FrontWin = new short[Approximate((2.0/50)*m_uSamFre) + 1];
short* BehindWin = new short[Approximate((2.0/50)*m_uSamFre) + 1];
float hanning = 0;
vector<unsigned> vecPos;
for( i=1 ;i<m_uPitchLen ;++i)
{
if(m_dNewPitch[i] == 0)
continue;
oldpos = static_cast<unsigned>((m_dX1 + (i - 1)*m_dFrameLen)*m_uSamFre);
pos = static_cast<unsigned>((m_dX1 + i*m_dFrameLen)*m_uSamFre);
if(i == m_uPitchLen)
pos = m_uDataLen;
//»ùƵûÓиıäµÄ»òÊÇÔÀ´ÊÇÇåÒôµÄ
if((m_dNewPitch[i] == m_dPitch[i]) || (m_dPitch[i] == 0) )
{
memcpy(NewData+end ,m_Data+oldpos ,pos-oldpos);
end = end + pos - oldpos;
continue;
}
vecPos.clear();
for(unsigned k = oldpos ;k<pos ;++k)
if(!flag[k])
vecPos.push_back(k);
memcpy(NewData+end ,m_Data+oldpos ,vecPos[0]-oldpos);
end = end + vecPos[0] - oldpos;
NewFlag[end] = 1;
t = Approximate((1/m_dPitch[i])*m_uSamFre);
win_length = 2*t + 1;
pit = Approximate((1/m_dNewPitch[i] - 1/m_dPitch[i])*m_uSamFre);
spos = vecPos[0] - t;
epos = vecPos[0] + t;
if(vecPos[0] < t)
spos = 0;
if(epos > (m_uDataLen - 1) )
epos = m_uDataLen - 1;
for( i=spos ;i<epos+1 ;++i)
{
hanning = 0.5 - 0.5*cos(2*(i-spos)*3.1415926/(win_length-1));
BehindWin[i-spos] = static_cast<short>(m_Data[i]*hanning);
}
if(pit > 0)
{
for(size_t k=1 ;k<vecPos.size()-1 ;++k)
{
short* temp = FrontWin;
FrontWin = BehindWin;
BehindWin = FrontWin;
spos = vecPos[k] - t;
epos = vecPos[k] + t;
if(vecPos[k] < t)
spos = 0;
if(epos > (m_uDataLen - 1) )
epos = m_uDataLen - 1;
for( i=spos ;i<epos+1 ;++i)
{
hanning = 0.5 - 0.5*cos(2*(i-spos)*3.1415926/(win_length-1));
BehindWin[i-spos] = static_cast<short>(m_Data[i]*hanning);
}
// if(end > 1) //²»Ì«¿ÉÄÜend<2
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*(FrontWin[t+pit] + BehindWin[0]) + 0.15*(FrontWin[t+pit+1] + BehindWin[1]);
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*(FrontWin[t+pit] + BehindWin[0]) + 0.15*(FrontWin[t+pit+1] + BehindWin[1]);
++end;
//Ç°°ë²¿·ÖµÄµþ¼Ó
//ÕâÀïÔÚµÚÒ»Ö¡ºÍ×îºóÒ»Ö¡»¹ÓÐÒ»¶¨µÄÎÊÌâ
for( i=1 ;i<=t-pit ;++i)
{
NewData[end] = FrontWin[t+pit+i] + BehindWin[i];
++end;
}
//ÖÐÐIJ¿·Ö²»ÐèÒªµþ¼Ó
//¶ÔÁ½²¿·ÖµÄÏàÁÚµã×÷ƽ»¬
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*BehindWin[t-pit+1] + 0.15*BehindWin[t-pit+2];
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*BehindWin[t-pit+1] + 0.15*BehindWin[t-pit+2];
++end;
//ºó°ë²¿·Ö
for( i=t-pit+2 ;i<t+pit ;++i)
{
NewData[end] = BehindWin[i];
++end;
}
NewFlag[end] = 1;
}
//´¦Àí×îºóÒ»¸ö´°µÄÊý¾Ý
for( i=1 ;i<t-pit-1 ;++i)
{
NewData[end] = FrontWin[t+pit+i] + BehindWin[i];
++end;
}
NewFlag[end] = 1;
}
else
{
pit = -pit;
for(size_t k=1 ;k<vecPos.size() ;++k)
{
short* temp = FrontWin;
FrontWin = BehindWin;
BehindWin = FrontWin;
spos = vecPos[k] - t;
epos = vecPos[k] + t;
if(vecPos[k] < t)
spos = 0;
if(epos > (m_uDataLen - 1) )
epos = m_uDataLen - 1;
for( i=spos ;i<epos+1 ;++i)
{
hanning = 0.5 - 0.5*cos(2*(i-spos)*3.1415926/(win_length-1));
BehindWin[i-spos] = static_cast<short>(m_Data[i]*hanning);
}
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*(FrontWin[t] + BehindWin[pit]) + 0.15*(FrontWin[t+1] + BehindWin[pit+1]);
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*(FrontWin[t] + BehindWin[pit]) + 0.15*(FrontWin[t+1] + BehindWin[pit+1]);
++end;
for( i=1 ;i<t-pit-1 ;++i)
{
NewData[end] = FrontWin[t-pit+i] + BehindWin[i];
++end;
}
NewFlag[end] = 1;
}
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*m_Data[vecPos.back()] + 0.15*m_Data[vecPos.back()+1];
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*m_Data[vecPos.back()] + 0.15*m_Data[vecPos.back()+1];
++end;
for( i=vecPos.back() ;i<pos ;++i)
{
NewData[end] = m_Data[i];
++end;
}
}
}
Smooth(NewData,end);
delete [] FrontWin;
delete [] BehindWin;
delete [] flag;
flag = NewFlag;
m_Data = NewData;
m_uDataLen = end;
}
///////////////////////////////////////////////////////////////////////
// Function Name : Smooth
// Function func : Smooth the new data after pitch adjusting
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input : data; pointer to input data
// length: the length of the input data
// output : pointer to the data after smoothing
////////////////////////////////////////////////////////////////////////
void CPsola::Smooth(short* data,unsigned length)
{
short* temp = new short[length];
unsigned i;
//ÖÐֵƽ»¬
for( i=2 ;i<length-2 ;++i)
temp[i] = Middle(i-2,data);
temp[length-2] = data[length-2];
temp[length-1] = data[length-1];
//ÏßÐÔƽ»¬
for( i=2 ;i<length-2 ;++i)
data[i] = 0.15*temp[i-2] + 0.2*temp[i-1] + 0.3*temp[i] + 0.2*temp[i+1] + 0.15*temp[i+2];
data[length-2] = temp[length-2];
data[length-1] = temp[length-1];
delete [] temp;
}
///////////////////////////////////////////////////////////////////////
// Function Name : GetNewLen
// Function func : get the new length of the new data
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output : the length of the new data
////////////////////////////////////////////////////////////////////////
unsigned CPsola::GetNewLen()
{
return m_uDataLen;
}
///////////////////////////////////////////////////////////////////////
// Function Name : GetNewData
// Function func : get the pointer to the new data
// Author : elevens
// Create Time : 2004/8/16
// Class Name : CPsola
// input :
// output : the pointer to the new data
////////////////////////////////////////////////////////////////////////
short* CPsola::GetNewData()
{
return m_Data;
}
CPsola::~CPsola()
{
/* freopen("out.txt","w",stdout);
for(int i=0,j=0; i< 18; i++)
{
if(flag[i]==1)
{
cout<<i-j<<endl;
j=i;
}
}
*/
if(flag != NULL)
delete [] flag;
if((m_Data != m_InData) && (m_Data != NULL))
delete [] m_Data;
}
///////////////////////////////////////////////////////////////////////
// Function Name : TD_PSOLA
// Function func :
// Author : Taliux
// Create Time : 2004/12/1
// Class Name : CPsola
// input : float tscale,float pscale
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::TD_PSOLA(float tscale,float pscale)
{
if(tscale==1 && pscale==1)
return ;
// MarkPitch();
vector<unsigned> vPM,vNewPM;
GetPitchMarks(vPM);
vNewPM=vPM;
int pos;
unsigned i,T0,pshift=0;
if(pscale!=1)
{
for(i=1;i<vPM.size();i++)
{
T0=vPM.at(i)-vPM.at(i-1);
if (IsVowel(vPM.at(i-1)))
{
if (pscale>1)
pshift=pshift-Approximate(T0*(pscale-1.0)/pscale); //»ùƵ¸Ä±ä¶ÔӦʱ³¤±ä»¯
else
pshift=pshift+Approximate(T0*(1.0/pscale-1.0));
}
vNewPM.at(i)=vPM.at(i)+pshift; //Éú³ÉÄ¿±ê»ùÒô±ê×¢ÐòÁÐ
}
}
float new_tscale=tscale*(double)vPM.back()/(double)vNewPM.back(); //Éú³ÉеĻùÒô±ê×¢ÐòÁкó¶Ôʱ³¤Ð޸IJÎÊýµÄµ÷Õû
// vector<unsigned>::iterator iVowelBegin;
int avg=GetAvgPitchLen(vNewPM,pos);
if(avg==0)
return;
int tot = (vNewPM.back()*new_tscale - vNewPM.at(pos)) / avg;
if(tot<=0)
return;
int orl = vNewPM.size()-pos-1;
vector<int> useds;
GetUseds(orl,tot,vNewPM.size()-2,useds);
vector<vector<unsigned> > final;
GetFinal(vPM,vNewPM,pos,useds,final);
unsigned ylen=final.back()[0]+(final.back()[2]-final.back()[1]);
short* y = new short[ylen];
memset(y,0,ylen*2);
memcpy(y,m_Data,vNewPM.at(pos)*2);
vector<float> w;
OverlapAdd(final,y,ylen,w);
//ƽ»¬
if(pscale>1)
smooth(y,ylen,w);
m_Data=y;
m_uDataLen=ylen;
}
void CPsola::GetPitchMarks(vector<unsigned>& vPM)
{
vPM.clear();
// vPM.push_back(0);
for(unsigned i=0;i<m_uDataLen;i++ )
if(flag[i])
{
#ifdef __DEBUG
out<<i<<endl;
#endif
vPM.push_back(i);
}
return;
}
bool CPsola::IsVowel(unsigned i)
{
if(i<=m_dX1*m_uSamFre)
return m_dPitch[0]>0;
unsigned j;
j=(i-unsigned(m_dX1*m_uSamFre))/unsigned(m_dFrameLen*m_uSamFre)+1;
if(j>=m_uPitchLen) return false;
return m_dPitch[j]>0;
}
int CPsola::GetAvgPitchLen(vector<unsigned>& vPM,int& pos)
{
unsigned sum=0;
int count=0;
pos = 0;
for(int i=1;i<vPM.size()-1;i++)
{
if ( !IsVowel(vPM[i]) )
continue;
if ( pos==0 )
pos = i;
sum += vPM[i+1] - vPM[i];
count++;
}
if(count==0)
return 0;
return sum/count;
}
///////////////////////////////////////////////////////////////////////
// Function Name : PSOLA
// Function func :
// Author : Taliux
// Create Time : 2004/12/1
// Class Name : CPsola
// input : float tscale,float pscale
// output :
////////////////////////////////////////////////////////////////////////
void CPsola::PSOLA(float tscale,float pscale,bool bFD = false)
{
if(tscale==1 && pscale==1)
return ;
// MarkPitch();
vector<unsigned> vPM,vNewPM;
GetPitchMarks(vPM);
vNewPM = vPM;
int pos;
unsigned i,j,T0,pshift=0;
if(pscale!=1)
{
for(i=1;i<vPM.size();i++)
{
T0=vPM.at(i)-vPM.at(i-1);
if (IsVowel(vPM.at(i-1)))
{
if (pscale>1)
pshift=pshift-Approximate(T0*(pscale-1.0)/pscale); //»ùƵ¸Ä±ä¶ÔӦʱ³¤±ä»¯
else
pshift=pshift+Approximate(T0*(1.0/pscale-1.0));
}
vNewPM.at(i)=vPM.at(i)+pshift; //Éú³ÉÄ¿±ê»ùÒô±ê×¢ÐòÁÐ
}
}
float new_tscale=tscale*(float)vPM.back()/(float)vNewPM.back(); //Éú³ÉеĻùÒô±ê×¢ÐòÁкó¶Ôʱ³¤Ð޸IJÎÊýµÄµ÷Õû
int avg = GetAvgPitchLen(vNewPM,pos);
if ( avg == 0 )
return;
int tot = (vNewPM.back()*new_tscale - vNewPM.at(pos)) / avg;
if ( tot <= 0 )
return;
int orl = vNewPM.size()-pos-1;
vector<int> useds;
GetUseds(orl,tot,vNewPM.size()-2,useds);
vector<vector<unsigned> > final;
GetFinal(vPM,vNewPM,pos,useds,final);
unsigned ylen=final.back()[0]+(final.back()[2]-final.back()[1]);
short* y = new short[ylen];
memset(y,0,ylen*2);
vector<float> w;
memcpy(y,m_Data,vNewPM.at(pos)*2);
float* pBeta = NULL;
if ( bFD)
{
pBeta = new float[final.size()];
for ( i = 0; i < final.size(); i++ )
pBeta[i] = pscale;
}
OverlapAdd(final,y,ylen,w,pBeta);
if ( pBeta ) delete [] pBeta;
//ƽ»¬
if(pscale>1)
smooth(y,ylen,w);
m_Data=y;
m_uDataLen=ylen;
}
void CPsola::PSOLA(float dur, float* pNewPitch, int nPitchLen, float dX1)
{
vector<unsigned> vPM,vNewPM;
int pos;
GetPitchMarks(vPM);
if(vPM.empty()) return;
vNewPM=vPM;
GetAvgPitchLen(vNewPM,pos);
unsigned i,j,k,T0,uvl,t=1;
int pshift=0;
float alpha,beta;
while ( pNewPitch[t] == 0 && t < nPitchLen ) t++;
uvl = (dX1+(t-1)*m_dFrameLen)*m_uSamFre;
int nvc = vNewPM.size() - pos -1;
beta = (float)uvl/(float)(vNewPM.at(pos));
// out<<"pos\t"<<pos<<"uvl\t"<<uvl<<endl;
vector<short> unvoiced(uvl);
//ÏßÐÔ²îÖµµ÷Õû¸¨Òô³¤¶È
for( i = 0; i < uvl; i++ )
{
j = i / beta;
alpha = (float)i/beta - j;
unvoiced.at(i) = (1-alpha)*m_Data[j]+alpha*m_Data[j+1];
}
pshift = uvl - vNewPM.at(pos);
vNewPM.at(pos) = uvl;
float pscale;
for ( i = pos+1; i < vPM.size(); i++ )
{
T0=vPM.at(i)-vPM.at(i-1);
k = (float)(i-pos)/(float)nvc * (nPitchLen-t) + t;
pscale = pNewPitch[k]/(m_uSamFre/(float)T0);
if ( pscale < 0.5 || pscale > 3 )
pscale = 1;
if (IsVowel(vPM.at(i-1)))
{
out<<pscale<<"\t";
if (pscale>1)
pshift=pshift-Approximate(T0*(pscale-1.0)/pscale); //»ùƵ¸Ä±ä¶ÔӦʱ³¤±ä»¯
else
pshift=pshift+Approximate(T0*(1.0/pscale-1.0));
}
// pshift += Approximate(1.0/pNewPitch[k]*m_uSamFre - T0);
vNewPM.at(i)=vPM.at(i)+pshift; //Éú³ÉÄ¿±ê»ùÒô±ê×¢ÐòÁÐ
}
out<<endl;
//ÎÒ²»ÏëÔÙÍæÁË£¡
int orl = nvc;
int avg = (vNewPM.back()-vNewPM.at(pos))/orl;
int tot = (dur*m_uSamFre-uvl)/avg;
out<<vNewPM.back()<<"\t"<<vNewPM.at(pos)<<endl;
vector<int> useds;
GetUseds(orl,tot,vNewPM.size()-2,useds);
out<<tot<<"\t"<<orl<<"\t"<<avg<<endl;
for(i=0;i<useds.size();i++) out<<useds[i]<<"\t";
out<<endl;
vector<vector<unsigned> > final;
GetFinal(vPM,vNewPM,pos,useds,final);
out<<"\t"<<final.size()<<endl;
unsigned ylen=final.back()[0]+(final.back()[2]-final.back()[1]);
short* y = new short[ylen];
memset(y,0,ylen*2);
memcpy(y,&unvoiced[0],unvoiced.size()*2);
vector<float> w;
float* pBeta = new float[final.size()];
for ( i = 0; i < final.size()-1; i++ )
pBeta[i] =
(float)(final.at(i)[2]-final.at(i)[1])/2/(float)(final.at(i+1)[0]-final.at(i)[0]);
pBeta[i] = pBeta[i-1];
OverlapAdd(final,y,ylen,w/*,pBeta*/);
delete[] pBeta;
smooth(y,ylen,w);
m_Data=y;
m_uDataLen=ylen;
}
void CPsola::GetUseds(int orl, int tot, int size, vector<int>& useds)
{
int m,n,i;
if(tot>orl)
{
n = tot/orl;
m = tot%orl;
useds = vector<int>(size,n);
if(m>0)
{
n = orl / m; //ÿ¼¸¸ö¼ÓÒ»¸ö
for(i=1;i<=m;i++)
{
if(i*n>=orl)
break;
useds.at(size-i*n)++;
}
}
}
else
{
useds = vector<int>(size,1);
m = orl - tot; //ÐèÒª¼ôµô¶àÉÙ»ùÒôÖÜÆÚ
if(m>0)
{
n = orl / m; //ÿ¼¸¸ö¼õÒ»¸ö
for(i=1;i<=m;i++)
{
if(i*n>=orl)
break;
useds.at(size-i*n)=0;
}
}
}
}
void CPsola::GetFinal(vector<unsigned>& vPM,vector<unsigned>& vNewPM,
int pos, vector<int>& useds, vector<vector<unsigned> >& final)
{
final.clear();
int start=vNewPM.at(pos),count=0,i,j;
for(i=pos;i<useds.size();i++)
{
if (useds.at(i)>0)
{
final.push_back(vector<unsigned>(3));
final.at(count)[0]=start;
final.at(count)[1]=vPM.at(i);
final.at(count)[2]=vPM.at(i+2);
count++;
start=start+vNewPM.at(i+1)-vNewPM.at(i);
}
for(j=2;j<=useds.at(i);j++)
{
final.push_back(vector<unsigned>(3));
final.at(count)[0]=start;
final.at(count)[1]=vPM[i];
final.at(count)[2]=vPM[i+2];
count++;
start=start+vNewPM.at(i+1)-vNewPM.at(i);
}
}
}
void CPsola::smooth(short* y, unsigned ylen, vector<float>& w)
{
for ( unsigned i = 0; i < ylen; i++ )
{
if ( w.at(i) < 0.1 )
w.at(i)=1;
y[i] = y[i] / w.at(i);
}
}
void CPsola::OverlapAdd(vector<vector<unsigned> >& final, short* y, unsigned ylen,
vector<float>& w, float* pBeta)
{
unsigned i,j;
int k,kv;
vector<short> frm;
vector<float> wgt;
float alpha,beta;
w = vector<float>(ylen,0);
int len;
int numfrm = final.size();
float re,te;
int p=16;
vector<float> a,r;
vector<complex<float> > fft_spec,lpc_spec,exc_spec,new_exc;
unsigned start;
int temp = 0;
for ( i = 0; i < numfrm; i++ )
{
start=final.at(i)[0];
len=final.at(i)[2]-final.at(i)[1];
m_filter.window(wgt,len,"hanning");
frm=vector<short>(m_Data+final.at(i)[1],m_Data+final.at(i)[2]);
for(j=0;j<frm.size();j++)
frm.at(j) *= wgt.at(j);
if ( pBeta != NULL )
{
m_filter.Wave2LPC(frm,a,r,p,&re,&te);
m_filter.FFT(frm,fft_spec);
m_filter.LPCSpec(a,fft_spec.size(),sqrt(re),lpc_spec);
m_filter.ExcitationSpec(fft_spec,lpc_spec,exc_spec);
new_exc.clear();
new_exc.resize(exc_spec.size());
beta = pBeta[i];
if (beta < 0.5 || beta > 2) beta == 1;
for( k = 0; k < new_exc.size()/2; k++ )
{
kv = k / beta;
alpha = (float)k/beta - kv;
///////////////////
if(kv >= exc_spec.size()/2) //if beta<1 copy the tail of the spectrum
{
if ( temp == 0 ) temp = k-1;
if ( 2*temp-k < 0) temp = k-1;
// out<<k<<"\t"<<kv<<beta<<"\t"<<2*temp-k<<endl;
new_exc.at(k) = new_exc.at(2*temp-k);
}
else
new_exc.at(k) = (1-alpha)*exc_spec.at(kv)+alpha*exc_spec.at(kv+1);
new_exc.at(new_exc.size()-1-k) = conj(new_exc.at(k));
}
for( k = 0; k < lpc_spec.size(); k++ )
fft_spec.at(k) = lpc_spec.at(k)*new_exc.at(k);
m_filter.FFT(frm,fft_spec,true);
len /= beta;
frm.clear();
frm.resize(len);
for( k = 0 ; k < len; k++ )
frm.at(k) = real(fft_spec.at(k));
m_filter.window(wgt,len,"hanning");
}
for(j=start;j<start+len && j<ylen;j++)
{
y[j]=y[j]+frm.at(j-start);
w.at(j)=w.at(j)+wgt.at(j-start);
}
}
}
Subscribe to:
Posts (Atom)